lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ctarg...@apache.org
Subject [lucene-solr] 10/38: SOLR-14514: add extra checks for picking 'stream' method in JSON facet
Date Fri, 15 Jan 2021 21:45:21 GMT
This is an automated email from the ASF dual-hosted git repository.

ctargett pushed a commit to branch jira/solr-13105-toMerge
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 808f44e603447d9415f027e726d2435c8e5b1564
Author: Munendra S N <munendrasn@apache.org>
AuthorDate: Thu Nov 26 12:01:51 2020 +0530

    SOLR-14514: add extra checks for picking 'stream' method in JSON facet
    
    missing, allBuckets, and numBuckets is not supported with stream method.
    So, avoiding picking stream method when any one of them is enabled even if
    facet sort is 'index asc'
---
 solr/CHANGES.txt                                   |  3 +++
 .../org/apache/solr/search/facet/FacetField.java   |  5 +++-
 .../solr/search/facet/TestCloudJSONFacetSKG.java   | 29 ++++++++--------------
 .../search/facet/TestCloudJSONFacetSKGEquiv.java   | 23 ++++++-----------
 .../apache/solr/search/facet/TestJsonFacets.java   | 28 +++++++++++----------
 solr/solr-ref-guide/src/json-facet-api.adoc        |  2 +-
 6 files changed, 42 insertions(+), 48 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index b6e74bb..a8555a2 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -310,6 +310,9 @@ Bug Fixes
 * SOLR-12539: Handle parsing of values for 'other', 'include', and 'excludeTags' in JSON
facets when specified as
   comma-separated values with extra spaces (hossman, Munendra S N)
 
+* SOLR-14514: Avoid picking 'stream' method in JSON facet when any of 'allBuckets', 'numBuckets',
and 'missing' parameters are enabled
+  (hossman, Munendra S N)
+
 Other Changes
 ---------------------
 
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index 728cd6e..fa1c085 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -105,7 +105,10 @@ public class FacetField extends FacetRequestSorted {
       method = FacetMethod.STREAM;
     }
     if (method == FacetMethod.STREAM && sf.indexed() && !ft.isPointField()
&&
-        // wether we can use stream processing depends on wether this is a shard request,
wether
+        // streaming doesn't support allBuckets, numBuckets or missing
+        // so, don't use stream processor if anyone of them is enabled
+        !(allBuckets || numBuckets || missing) &&
+        // whether we can use stream processing depends on whether this is a shard request,
whether
         // re-sorting has been requested, and if the effective sort during collection is
"index asc"
         ( fcontext.isShard()
           // for a shard request, the effective per-shard sort must be index asc
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
index 0992af8..f2b67d5 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKG.java
@@ -45,18 +45,17 @@ import org.apache.solr.cloud.SolrCloudTestCase;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
-import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
-
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
-
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.search.facet.RelatednessAgg.computeRelatedness;
+import static org.apache.solr.search.facet.RelatednessAgg.roundTo5Digits;
+
 /** 
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> function,
that asserts the 
@@ -65,13 +64,13 @@ import org.slf4j.LoggerFactory;
  * <p>
  * Note that unlike normal facet "count" verification, using a high limit + overrequest isn't
a substitute 
  * for refinement in order to ensure accurate "skg" computation across shards.  For that
reason, this 
- * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain})
and specifices a 
- * <code>domain: { 'query':'*:*' }</code> for every facet, in order to garuntee
that all shards 
+ * tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain})
and specifies a
+ * <code>domain: { 'query':'*:*' }</code> for every facet, in order to guarantee
that all shards
  * participate in all facets, so that the popularity &amp; relatedness values returned
can be proven 
  * with validation requests.
  * </p>
  * <p>
- * (Refinement alone is not enough. Using the '*:*' query as the facet domain is neccessary
to 
+ * (Refinement alone is not enough. Using the '*:*' query as the facet domain is necessary
to
  * prevent situations where a single shardX may return candidate bucket with no child-buckets
due to 
  * the normal facet intersections, but when refined on other shardY(s), can produce "high
scoring" 
  * SKG child-buckets, which would then be missing the foreground/background "size" contributions
from 
@@ -156,7 +155,7 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
       SolrInputDocument doc = sdoc("id", ""+id);
       for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
         // NOTE: we ensure every doc has at least one value in each field
-        // that way, if a term is returned for a parent there there is garunteed to be at
least one
+        // that way, if a term is returned for a parent there there is guaranteed to be at
least one
         // one term in the child facet as well.
         //
         // otherwise, we'd face the risk of a single shardX returning parentTermX as a top
term for
@@ -226,7 +225,7 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
 
   /**
    * Given a (random) field number, returns a random (integer based) value for that field.
-   * NOTE: The number of unique values in each field is constant acording to {@link #UNIQUE_FIELD_VALS}
+   * NOTE: The number of unique values in each field is constant according to {@link #UNIQUE_FIELD_VALS}
    * but the precise <em>range</em> of values will vary for each unique field
number, such that cross field joins 
    * will match fewer documents based on how far apart the field numbers are.
    *
@@ -279,7 +278,7 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
       
       // NOTE that these two queries & facets *should* effectively identical given that
the
       // very large limit value is big enough no shard will ever return that may terms,
-      // but the "limit=-1" case it actaully triggers slightly different code paths
+      // but the "limit=-1" case it actually triggers slightly different code paths
       // because it causes FacetField.returnsPartial() to be "true"
       for (int limit : new int[] { 999999999, -1 }) {
         Map<String,TermFacet> facets = new LinkedHashMap<>();
@@ -769,14 +768,8 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
      *
      *
      * @return a Boolean, may be null
-     * @see <a href="https://issues.apache.org/jira/browse/SOLR-14514">SOLR-14514:
allBuckets ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String sort) {
-
-      if ("index asc".equals(sort)) {
-        return null;
-      }
-      
       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
index eb68662..8e09593 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestCloudJSONFacetSKGEquiv.java
@@ -20,8 +20,8 @@ import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.nio.file.Path;
 import java.nio.file.Paths;
-import java.util.Arrays;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.LinkedHashMap;
@@ -47,22 +47,21 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
-import static org.apache.solr.search.facet.FacetField.FacetMethod;
-import static org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc.SWEEP_COLLECTION_DEBUG_KEY;
-
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
 import org.noggit.JSONUtil;
 import org.noggit.JSONWriter;
 import org.noggit.JSONWriter.Writable;
-  
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.solr.search.facet.FacetField.FacetMethod;
+import static org.apache.solr.search.facet.SlotAcc.SweepingCountSlotAcc.SWEEP_COLLECTION_DEBUG_KEY;
+
 /** 
  * <p>
  * A randomized test of nested facets using the <code>relatedness()</code> function,
that asserts the 
- * results are consistent and equivilent regardless of what <code>method</code>
(ie: FacetFieldProcessor) 
+ * results are consistent and equivalent regardless of what <code>method</code>
(ie: FacetFieldProcessor)
  * and/or <code>{@value RelatednessAgg#SWEEP_COLLECTION}</code> option is requested.
  * </p>
  * <p>
@@ -71,7 +70,7 @@ import org.slf4j.LoggerFactory;
  * because this test does not attempt to prove the results with validation requests.
  * </p>
  * <p>
- * This test only concerns itself with the equivilency of results
+ * This test only concerns itself with the equivalency of results
  * </p>
  * 
  * @see TestCloudJSONFacetSKG
@@ -986,14 +985,8 @@ public class TestCloudJSONFacetSKGEquiv extends SolrCloudTestCase {
      * </p>
      *
      * @return a Boolean, may be null
-     * @see <a href="https://issues.apache.org/jira/browse/SOLR-14514">SOLR-14514:
allBuckets ignored by method:stream</a>
      */
     public static Boolean randomAllBucketsParam(final Random r, final String sort) {
-
-      if ("index asc".equals(sort)) {
-        return null;
-      }
-      
       switch(r.nextInt(4)) {
         case 0: return true;
         case 1: return false;
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index c5a7a1d..166968b 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -488,21 +488,17 @@ public class TestJsonFacets extends SolrTestCaseHS {
     }
 
     // relatedness shouldn't be computed for allBuckets, but it also shouldn't cause any
problems
-    //
-    // NOTE: we can't test this with 'index asc' because STREAM processor
-    // (which test may randomize as default) doesn't support allBuckets
-    // see: https://issues.apache.org/jira/browse/SOLR-14514
-    //
     for (String sort : Arrays.asList("sort:'y desc'",
                                      "sort:'z desc'",
                                      "sort:'skg desc'",
+                                     "sort:'index asc'",
                                      "prelim_sort:'count desc', sort:'skg desc'")) {
-      // the relatedness score of each of our cat_s values is (conviniently) also alphabetical
order,
+      // the relatedness score of each of our cat_s values is (conveniently) also alphabetical
order,
       // (and the same order as 'sum(num_i) desc' & 'min(num_i) desc')
       //
       // So all of these re/sort options should produce identical output (since the num buckets
is < limit)
       // - Testing "index" sort allows the randomized use of "stream" processor as default
to be tested.
-      // - Testing (re)sorts on other stats sanity checks code paths where relatedness()
is a "defered" Agg
+      // - Testing (re)sorts on other stats sanity checks code paths where relatedness()
is a "deferred" Agg
       for (String limit : Arrays.asList(", ", ", limit:5, ", ", limit:-1, ")) {
         // results shouldn't change regardless of our limit param"
         assertJQ(req("q", "cat_s:[* TO *]", "rows", "0",
@@ -524,7 +520,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
                  + "             background_popularity: 0.33333, },"
                  + "   }, "
                  + "   { val:'B', count:3, y:-3.0, z:-5, "
-                 + "     skg : { relatedness: 0.0, " // perfectly average and uncorrolated
+                 + "     skg : { relatedness: 0.0, " // perfectly average and uncorrelated
                  //+ "             foreground_count: 1, "
                  //+ "             foreground_size: 2, "
                  //+ "             background_count: 3, "
@@ -1003,11 +999,14 @@ public class TestJsonFacets extends SolrTestCaseHS {
     // test streaming
     assertJQ(req("q", "*:*", "rows", "0"
             , "json.facet", "{   cat:{terms:{field:'cat_s', method:stream }}" + // won't
stream; need sort:index asc
-                              ", cat2:{terms:{field:'cat_s', method:stream, sort:'index asc'
}}" +
-                              ", cat3:{terms:{field:'cat_s', method:stream, sort:'index asc',
mincount:3 }}" + // mincount
-                              ", cat4:{terms:{field:'cat_s', method:stream, sort:'index asc',
prefix:B }}" + // prefix
-                              ", cat5:{terms:{field:'cat_s', method:stream, sort:'index asc',
offset:1 }}" + // offset
-                " }"
+            ", cat2:{terms:{field:'cat_s', method:stream, sort:'index asc' }}" +
+            ", cat3:{terms:{field:'cat_s', method:stream, sort:'index asc', mincount:3 }}"
+ // mincount
+            ", cat4:{terms:{field:'cat_s', method:stream, sort:'index asc', prefix:B }}"
+ // prefix
+            ", cat5:{terms:{field:'cat_s', method:stream, sort:'index asc', offset:1 }}"
+ // offset
+            ", cat6:{terms:{field:'cat_s', method:stream, sort:'index asc', missing:true
}}" + // missing
+            ", cat7:{terms:{field:'cat_s', method:stream, sort:'index asc', numBuckets:true
}}" + // numBuckets
+            ", cat8:{terms:{field:'cat_s', method:stream, sort:'index asc', allBuckets:true
}}" + // allBuckets
+            " }"
         )
         , "facets=={count:6 " +
             ", cat :{buckets:[{val:B, count:3},{val:A, count:2}]}" +
@@ -1015,6 +1014,9 @@ public class TestJsonFacets extends SolrTestCaseHS {
             ", cat3:{buckets:[{val:B, count:3}]}" +
             ", cat4:{buckets:[{val:B, count:3}]}" +
             ", cat5:{buckets:[{val:B, count:3}]}" +
+            ", cat6:{missing:{count:1}, buckets:[{val:A, count:2},{val:B, count:3}]}" +
+            ", cat7:{numBuckets:2, buckets:[{val:A, count:2},{val:B, count:3}]}" +
+            ", cat8:{allBuckets:{count:5}, buckets:[{val:A, count:2},{val:B, count:3}]}"
+
             " }"
     );
 
diff --git a/solr/solr-ref-guide/src/json-facet-api.adoc b/solr/solr-ref-guide/src/json-facet-api.adoc
index bc336c3..5e83807 100644
--- a/solr/solr-ref-guide/src/json-facet-api.adoc
+++ b/solr/solr-ref-guide/src/json-facet-api.adoc
@@ -220,7 +220,7 @@ This parameter indicates the facet algorithm to use:
 * "uif" UnInvertedField, collect into ordinal array
 * "dvhash" DocValues, collect into hash - improves efficiency over high cardinality fields
 * "enum" TermsEnum then intersect DocSet (stream-able)
-* "stream" Presently equivalent to "enum"
+* "stream" Presently equivalent to "enum" - used for indexed, non-point fields with sort
'index asc' and allBuckets, numBuckets, missing disabled.
 * "smart" Pick the best method for the field type (this is the default)
 
 |prelim_sort |An optional parameter for specifying an approximation of the final `sort` to
use during initial collection of top buckets when the <<json-facet-api.adoc#sorting-facets-by-nested-functions,`sort`
parameter is very costly>>.


Mime
View raw message