Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A727F200C62 for ; Wed, 26 Apr 2017 19:48:17 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id A5F20160BBD; Wed, 26 Apr 2017 17:48:17 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id CD5A8160B8F for ; Wed, 26 Apr 2017 19:48:16 +0200 (CEST) Received: (qmail 69940 invoked by uid 500); 26 Apr 2017 17:48:16 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 69930 invoked by uid 99); 26 Apr 2017 17:48:15 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 26 Apr 2017 17:48:15 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id DB940E1103; Wed, 26 Apr 2017 17:48:15 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: cpoerschke@apache.org To: commits@lucene.apache.org Date: Wed, 26 Apr 2017 17:48:15 -0000 Message-Id: <8d633b9c619649178bb2ee69b118675a@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [01/14] lucene-solr:jira/solr-8668: SOLR-7452: JSON Facet API - refining for numBuckets archived-at: Wed, 26 Apr 2017 17:48:17 -0000 Repository: lucene-solr Updated Branches: refs/heads/jira/solr-8668 4d1c775e2 -> e9c20eacc SOLR-7452: JSON Facet API - refining for numBuckets Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4f89f98f Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4f89f98f Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4f89f98f Branch: refs/heads/jira/solr-8668 Commit: 4f89f98f665ed659669ee1454bd32ff99ed097a7 Parents: 680f4d7 Author: yonik Authored: Tue Apr 25 11:00:30 2017 -0400 Committer: yonik Committed: Tue Apr 25 11:00:30 2017 -0400 ---------------------------------------------------------------------- .../solr/search/facet/FacetFieldProcessor.java | 30 +++++++++++------ .../facet/FacetFieldProcessorByArray.java | 2 +- .../search/facet/TestJsonFacetRefinement.java | 35 ++++++++++++++------ 3 files changed, 45 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f89f98f/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java ---------------------------------------------------------------------- diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java index 65b88d8..e9fb079 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessor.java @@ -290,17 +290,7 @@ abstract class FacetFieldProcessor extends FacetProcessor { if (!fcontext.isShard()) { res.add("numBuckets", numBuckets); } else { - DocSet domain = fcontext.base; - if (freq.prefix != null) { - Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix); - domain = fcontext.searcher.getDocSet(prefixFilter, domain); - } - - HLLAgg agg = new HLLAgg(freq.field); - SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1); - acc.collect(domain, 0); - acc.key = "numBuckets"; - acc.setValues(res, 0); + calculateNumBuckets(res); } } @@ -351,6 +341,20 @@ abstract class FacetFieldProcessor extends FacetProcessor { return res; } + private void calculateNumBuckets(SimpleOrderedMap target) throws IOException { + DocSet domain = fcontext.base; + if (freq.prefix != null) { + Query prefixFilter = sf.getType().getPrefixQuery(null, sf, freq.prefix); + domain = fcontext.searcher.getDocSet(prefixFilter, domain); + } + + HLLAgg agg = new HLLAgg(freq.field); + SlotAcc acc = agg.createSlotAcc(fcontext, domain.size(), 1); + acc.collect(domain, 0); + acc.key = "numBuckets"; + acc.setValues(target, 0); + } + private static class Slot { int slot; } @@ -582,6 +586,10 @@ abstract class FacetFieldProcessor extends FacetProcessor { } } + if (freq.numBuckets && !skipThisFacet) { + calculateNumBuckets(res); + } + // If there are just a couple of leaves, and if the domain is large, then // going by term is likely the most efficient? // If the domain is small, or if the number of leaves is large, then doing http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f89f98f/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java ---------------------------------------------------------------------- diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java index c19d55d..228678b 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArray.java @@ -66,7 +66,7 @@ abstract class FacetFieldProcessorByArray extends FacetFieldProcessor { refineResult = refineFacets(); // if we've seen this facet bucket, then refining can be done. If we haven't, we still // only need to continue if we need allBuckets or numBuckets info. - if (skipThisFacet || (!freq.allBuckets && !freq.numBuckets)) return refineResult; + if (skipThisFacet || !freq.allBuckets) return refineResult; } String prefix = freq.prefix; http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4f89f98f/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java ---------------------------------------------------------------------- diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java index 1561b3e..6353576 100644 --- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java +++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacetRefinement.java @@ -244,22 +244,23 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS { client.deleteByQuery("*:*", null); - ModifiableSolrParams p = params("cat_s", "cat_s", "xy_s", "xy_s", "num_d", "num_d", "qw_s", "qw_s"); + ModifiableSolrParams p = params("cat_s", "cat_s", "xy_s", "xy_s", "num_d", "num_d", "qw_s", "qw_s", "er_s","er_s"); String cat_s = p.get("cat_s"); String xy_s = p.get("xy_s"); String qw_s = p.get("qw_s"); + String er_s = p.get("er_s"); // this field is designed to test numBuckets refinement... the first phase will only have a single bucket returned for the top count bucket of cat_s String num_d = p.get("num_d"); - clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", xy_s, "X" ,num_d, -1, qw_s, "Q") ); // A wins count tie - clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", xy_s, "Y", num_d, 3 ) ); + clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", xy_s, "X" ,num_d, -1, qw_s, "Q", er_s,"E") ); // A wins count tie + clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", xy_s, "Y", num_d, 3 ) ); - clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", xy_s, "X", num_d, -5 ) ); // B highest count - clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", xy_s, "Y", num_d, -11, qw_s, "W") ); - clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", xy_s, "X", num_d, 7 ) ); + clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", xy_s, "X", num_d, -5 , er_s,"E") ); // B highest count + clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", xy_s, "Y", num_d, -11, qw_s, "W" ) ); + clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", xy_s, "X", num_d, 7 , er_s,"R") ); // "R" will only be picked up via refinement when parent facet is cat_s - clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", xy_s, "X", num_d, 17, qw_s, "W") ); // A highest count - clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", xy_s, "Y", num_d, -19 ) ); - clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", xy_s, "X", num_d, 11 ) ); + clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", xy_s, "X", num_d, 17, qw_s, "W", er_s,"E") ); // A highest count + clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", xy_s, "Y", num_d, -19 ) ); + clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", xy_s, "X", num_d, 11 ) ); client.commit(); @@ -388,7 +389,6 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS { ); // test filling in missing "allBuckets" - // test filling in "missing" bucket for partially refined facets client.testJQ(params(p, "q", "*:*", "json.facet", "{" + " cat :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, allBuckets:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, allBuckets:true, refine:false} } }" + @@ -402,6 +402,21 @@ public class TestJsonFacetRefinement extends SolrTestCaseHS { ",cat3:{ allBuckets:{count:8}, buckets:[ {val:A, count:4, xy:{buckets:[{count:3, val:X, f:23.0}], allBuckets:{count:4, f:4.0}}}] }" + "}" ); + + // test filling in missing numBuckets + client.testJQ(params(p, "q", "*:*", + "json.facet", "{" + + " cat :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, numBuckets:true, facet:{ er:{type:terms, field:${er_s}, limit:1, overrequest:0, numBuckets:true, refine:false} } }" + + ", cat2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , numBuckets:true, facet:{ er:{type:terms, field:${er_s}, limit:1, overrequest:0, numBuckets:true, refine:true } } }" + + "}" + ) + , "facets=={ count:8" + + ", cat:{ numBuckets:2, buckets:[ {val:A, count:3, er:{numBuckets:1,buckets:[{count:2, val:E}] }}] }" + // the "R" bucket will not be seen w/o refinement + ",cat2:{ numBuckets:2, buckets:[ {val:A, count:4, er:{numBuckets:2,buckets:[{count:2, val:E}] }}] }" + + "}" + ); + + }