lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject [05/34] lucene-solr:jira/solr-9856: SOLR-9931: return 0 for hll on field with no values in bucket
Date Mon, 09 Jan 2017 16:09:55 GMT
SOLR-9931: return 0 for hll on field with no values in bucket


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a810fb32
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a810fb32
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a810fb32

Branch: refs/heads/jira/solr-9856
Commit: a810fb3234ec461e23c76533fbfcc523d4c46faa
Parents: 2cffa2e
Author: yonik <yonik@apache.org>
Authored: Thu Jan 5 17:02:24 2017 -0500
Committer: yonik <yonik@apache.org>
Committed: Thu Jan 5 17:02:24 2017 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +++
 .../org/apache/solr/search/facet/HLLAgg.java    | 13 +++++++---
 .../apache/solr/search/facet/UniqueSlotAcc.java |  2 +-
 .../solr/search/facet/TestJsonFacets.java       | 27 ++++++++++++++++++++
 4 files changed, 41 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a810fb32/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 9de2d76..b747876 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -332,6 +332,9 @@ Bug Fixes
 * SOLR-9917: JSON Facet API percentile function caused a NullPointerException in distributed
mode when
   there were no values in a bucket from a shard. (yonik)
 
+* SOLR-9931: JSON Facet API hll (hyper-log-log) function returned 0 for non-empty buckets
with no field values
+  in local mode, but nothing for distributed mode.  Both modes now return 0.  (yonik)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a810fb32/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
index 005e9fc..72cdd27 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
@@ -27,6 +27,8 @@ import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
 
 public class HLLAgg extends StrAggValueSource {
+  public static Integer NO_VALUES = 0;
+
   protected HLLFactory factory;
 
   public HLLAgg(String field) {
@@ -71,10 +73,15 @@ public class HLLAgg extends StrAggValueSource {
 
   private static class Merger extends FacetSortableMerger {
     HLL aggregate = null;
-    long answer = -1;
+    long answer = -1; // -1 means unset
 
     @Override
     public void merge(Object facetResult, Context mcontext) {
+      if (facetResult instanceof Number) {
+        assert NO_VALUES.equals(facetResult);
+        return;
+      }
+
       SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
       byte[] serialized = ((byte[])map.get("hll"));
       HLL subHLL = HLL.fromBytes(serialized);
@@ -87,7 +94,7 @@ public class HLLAgg extends StrAggValueSource {
 
     private long getLong() {
       if (answer < 0) {
-        answer = aggregate.cardinality();
+        answer = aggregate == null ? 0 : aggregate.cardinality();
       }
       return answer;
     }
@@ -170,7 +177,7 @@ public class HLLAgg extends StrAggValueSource {
 
     public Object getShardValue(int slot) throws IOException {
       HLL hll = sets[slot];
-      if (hll == null) return null;
+      if (hll == null) return NO_VALUES;
       SimpleOrderedMap map = new SimpleOrderedMap();
       map.add("hll", hll.toBytes());
       // optionally use explicit values

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a810fb32/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
index 9f9e9b1..ae542ac 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSlotAcc.java
@@ -73,7 +73,7 @@ abstract class UniqueSlotAcc extends SlotAcc {
 
   private Object getShardHLL(int slot) throws IOException {
     FixedBitSet ords = arr[slot];
-    if (ords == null) return null;  // TODO: when we get to refinements, may be useful to
return something???
+    if (ords == null) return HLLAgg.NO_VALUES;
 
     HLL hll = factory.getHLL();
     long maxOrd = ords.length();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a810fb32/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index d7e1cc0..d8f3ae5 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -870,6 +870,33 @@ public class TestJsonFacets extends SolrTestCaseHS {
             " }"
     );
 
+
+    // stats at top level, matching documents, but no values in the field
+    // NOTE: this represents the current state of what is returned, not the ultimate desired
state.
+    client.testJQ(params(p, "q", "id:3"
+        , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})',
min1:'min(${num_d})', max1:'max(${num_d})'" +
+            ", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})',
unique_date:'unique(${date})'" +
+            ", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})',
hll_date:'hll(${date})'" +
+            ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)' }"
+        )
+        , "facets=={count:1 " +
+            ",sum1:0.0," +
+            " sumsq1:0.0," +
+            " avg1:0.0," +   // TODO: undesirable. omit?
+            " min1:'NaN'," + // TODO: undesirable. omit?
+            " max1:'NaN'," +
+            " numwhere:0," +
+            " unique_num_i:0," +
+            " unique_num_d:0," +
+            " unique_date:0," +
+            " where_hll:0," +
+            " hll_num_i:0," +
+            " hll_num_d:0," +
+            " hll_date:0" +
+            " }"
+    );
+
+
     //
     // tests on a multi-valued field with actual multiple values, just to ensure that we
are
     // using a multi-valued method for the rest of the tests when appropriate.


Mime
View raw message