lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject svn commit: r1720637 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/handler/component/ solr/core/src/test/org/apache/solr/handler/component/ solr/solrj/ solr/solrj/src/java/org/apache/solr/common/params/
Date Thu, 17 Dec 2015 19:43:07 GMT
Author: jdyer
Date: Thu Dec 17 19:43:07 2015
New Revision: 1720637

URL: http://svn.apache.org/viewvc?rev=1720637&view=rev
Log:
SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter query
results

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
    lucene/dev/branches/branch_5x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1720637&r1=1720636&r2=1720637&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Thu Dec 17 19:43:07 2015
@@ -22,6 +22,9 @@ New Features
 
 * SOLR-8434: Add wildcard support to role, to match any role in RuleBasedAuthorizationPlugin
(noble)
 
+* SOLR-4280: Allow specifying "spellcheck.maxResultsForSuggest" as a percentage of filter
+  query results (Markus Jelsma via James Dyer)
+
 
 Bug Fixes
 ----------------------

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=1720637&r1=1720636&r2=1720637&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
(original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
Thu Dec 17 19:43:07 2015
@@ -42,6 +42,7 @@ import org.apache.lucene.analysis.tokena
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.spell.SuggestMode;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.solr.client.solrj.response.SpellCheckResponse;
@@ -53,11 +54,17 @@ import org.apache.solr.common.params.Sol
 import org.apache.solr.common.params.SpellingParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.SolrConfig;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrEventListener;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.QParser;
+import org.apache.solr.search.QParserPlugin;
+import org.apache.solr.search.SyntaxError;
+import org.apache.solr.search.SolrCache;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.spelling.AbstractLuceneSpellChecker;
 import org.apache.solr.spelling.ConjunctionSolrSpellChecker;
@@ -160,7 +167,9 @@ public class SpellCheckComponent extends
         boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
         float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
         int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT,
0);
-        Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
+        //If specified, this can be a discrete # of results, or a percentage of fq results.
+        Integer maxResultsForSuggest = maxResultsForSuggest(rb);
+        
         ModifiableSolrParams customParams = new ModifiableSolrParams();
         for (String checkerName : getDictionaryNames(params)) {
           customParams.add(getCustomParams(checkerName, params));
@@ -173,6 +182,7 @@ public class SpellCheckComponent extends
         } else {
           hits = hitsInteger.longValue();
         }
+        
         SpellingResult spellingResult = null;
         if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
           SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
@@ -214,7 +224,60 @@ public class SpellCheckComponent extends
       }
     }
   }
-
+  
+  private Integer maxResultsForSuggest(ResponseBuilder rb) {
+    SolrParams params = rb.req.getParams();
+    float maxResultsForSuggestParamValue = params.getFloat(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST,
0.0f);
+    Integer maxResultsForSuggest = null;
+    
+    if (maxResultsForSuggestParamValue > 0.0f) {
+      if (maxResultsForSuggestParamValue == (int) maxResultsForSuggestParamValue) {
+        // If a whole number was passed in, this is a discrete number of documents
+        maxResultsForSuggest = (int) maxResultsForSuggestParamValue;
+      } else {
+        // If a fractional value was passed in, this is the % of documents returned by the
specified filter
+        // If no specified filter, we use the most restrictive filter of the fq parameters
+        String maxResultsFilterQueryString = params.get(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ);
+        
+        int maxResultsByFilters = Integer.MAX_VALUE;
+        SolrIndexSearcher searcher = rb.req.getSearcher();
+        
+        try {
+          if (maxResultsFilterQueryString != null) {
+            // Get the default Lucene query parser
+            QParser parser = QParser.getParser(maxResultsFilterQueryString, QParserPlugin.DEFAULT_QTYPE,
rb.req);              
+            DocSet s = searcher.getDocSet(parser.getQuery());
+            maxResultsByFilters = s.size();
+          } else {
+            List<Query> filters = rb.getFilters();
+
+            // Get the maximum possible hits within these filters (size of most restrictive
filter). 
+            if (filters != null) {
+              for (Query query : filters) {
+                DocSet s = searcher.getDocSet(query);
+                if (s != null) {
+                  maxResultsByFilters = Math.min(s.size(), maxResultsByFilters);
+                }
+              }
+            }
+          }
+        } catch (IOException e){
+          LOG.error(e.toString());
+          return null;
+        } catch (SyntaxError e) {
+          LOG.error(e.toString());
+          return null;
+        }
+        
+        // Recalculate maxResultsForSuggest if filters were specified
+        if (maxResultsByFilters != Integer.MAX_VALUE) {
+          maxResultsForSuggest = Math.round(maxResultsByFilters * maxResultsForSuggestParamValue);
+        }
+      }
+    }
+    return maxResultsForSuggest;
+  }
+  
   @SuppressWarnings("unchecked")
   protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult,
ResponseBuilder rb, String q,
       NamedList response, boolean suggestionsMayOverlap) {
@@ -319,7 +382,7 @@ public class SpellCheckComponent extends
     boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS,
false);
     int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
     int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
-    Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
+    Integer maxResultsForSuggest = maxResultsForSuggest(rb);
     int count = rb.req.getParams().getInt(SPELLCHECK_COUNT, 1);
     int numSug = Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT);
 
@@ -330,7 +393,7 @@ public class SpellCheckComponent extends
         origQuery = params.get(CommonParams.Q);
       }
     }
-
+    
     long hits = rb.grouping() ? rb.totalHitCount : rb.getNumberDocumentsFound();
     boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java?rev=1720637&r1=1720636&r2=1720637&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
(original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
Thu Dec 17 19:43:07 2015
@@ -173,6 +173,10 @@ public class DistributedSpellCheckCompon
         false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",

         collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
         altTermCount, "5", maxResults, "10"));
+    query(buildRequest("lowerfilt:(\"rod fix\")", 
+        false, requestHandlerName, random().nextBoolean(), extended, "true", count, "10",

+        collate, "true", maxCollationTries, "10", maxCollations, "1", collateExtended, "false",
+        altTermCount, "5", maxResults, ".10", "fq", "id:[13 TO 22]"));
     
     //Test word-break spellchecker
     query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)", 

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java?rev=1720637&r1=1720636&r2=1720637&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
(original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
Thu Dec 17 19:43:07 2015
@@ -61,11 +61,11 @@ public class SpellCheckComponentTest ext
     assertU((adoc("id", "2", "lowerfilt", "This is a document")));
     assertU((adoc("id", "3", "lowerfilt", "another document")));
     //bunch of docs that are variants on blue
-    assertU((adoc("id", "4", "lowerfilt", "blue")));
-    assertU((adoc("id", "5", "lowerfilt", "blud")));
-    assertU((adoc("id", "6", "lowerfilt", "boue")));
-    assertU((adoc("id", "7", "lowerfilt", "glue")));
-    assertU((adoc("id", "8", "lowerfilt", "blee")));
+    assertU((adoc("id", "4", "lowerfilt", "this blue")));
+    assertU((adoc("id", "5", "lowerfilt", "this blud")));
+    assertU((adoc("id", "6", "lowerfilt", "this boue")));
+    assertU((adoc("id", "7", "lowerfilt", "this glue")));
+    assertU((adoc("id", "8", "lowerfilt", "this blee")));
     assertU((adoc("id", "9", "lowerfilt", "pixmaa 12345")));
     assertU((commit()));
   }
@@ -80,6 +80,58 @@ public class SpellCheckComponentTest ext
   }
   
   @Test
+  public void testMaximumResultsForSuggest() throws Exception {
+   assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","lowerfilt:(this OR brwn)",
+        SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false",
SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "7")
+        ,"/spellcheck/suggestions/[0]=='brwn'"
+        ,"/spellcheck/suggestions/[1]/numFound==1"
+     );
+    try {
+      assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","lowerfilt:(this OR brwn)",
+          SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false",
SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "6")
+          ,"/spellcheck/suggestions/[1]/numFound==1"
+       );
+      fail("there should have been no suggestions (6<7)");
+    } catch(Exception e) {
+      //correctly threw exception
+    }
+    assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","lowerfilt:(this OR brwn)",
+        "fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /* returns
8, most selective */
+        SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false",
SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".90")
+        ,"/spellcheck/suggestions/[0]=='brwn'"
+        ,"/spellcheck/suggestions/[1]/numFound==1"
+     );
+    try {
+      assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","lowerfilt:(this OR brwn)",
+          "fq", "id:[0 TO 9]", /*returns 10, less selective */ "fq", "lowerfilt:th*", /*
returns 8, most selective */
+          SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false",
SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".80")
+          ,"/spellcheck/suggestions/[1]/numFound==1"
+       );
+      fail("there should have been no suggestions ((.8 * 8)<7)");
+    } catch(Exception e) {
+      //correctly threw exception
+    }
+    
+    
+    assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","lowerfilt:(this OR brwn)",
+        "fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "id:[0
TO 9]", 
+        SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false",
SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".70")
+        ,"/spellcheck/suggestions/[0]=='brwn'"
+        ,"/spellcheck/suggestions/[1]/numFound==1"
+     );
+    try {
+      assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","lowerfilt:(this OR brwn)",
+          "fq", "id:[0 TO 9]", SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ, "lowerfilt:th*",

+          SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false",
SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, ".64")
+          ,"/spellcheck/suggestions/[1]/numFound==1"
+       );
+      fail("there should have been no suggestions ((.64 * 10)<7)");
+    } catch(Exception e) {
+      //correctly threw exception
+    }
+  } 
+  
+  @Test
   public void testExtendedResultsCount() throws Exception {
     assertJQ(req("qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", SpellingParams.SPELLCHECK_BUILD,
"true", "q","bluo", SpellingParams.SPELLCHECK_COUNT,"5", SpellingParams.SPELLCHECK_EXTENDED_RESULTS,"false")
        ,"/spellcheck/suggestions/[0]=='bluo'"

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java?rev=1720637&r1=1720636&r2=1720637&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
(original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
Thu Dec 17 19:43:07 2015
@@ -52,8 +52,10 @@ public interface SpellingParams {
   /**
    * <p>
    * The maximum number of hits the request can return in order to both 
-   * generate spelling suggestions and set the "correctlySpelled" element to "false".   
-   * Note that this parameter is typically of use only in conjunction with "spellcheck.alternativeTermCount".
+   * generate spelling suggestions and set the "correctlySpelled" element to "false". This
can be specified
+   * either as a whole number number of documents, or it can be expressed as a fractional
percentage
+   * of documents returned by a chosen filter query.  By default, the chosen filter is the
most restrictive
+   * fq clause.  This can be overridden with {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ}
.
    * </p>
    * <p>
    * If left unspecified, the default behavior will prevail.  That is, "correctlySpelled"
will be false and suggestions
@@ -67,6 +69,14 @@ public interface SpellingParams {
   public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST = SPELLCHECK_PREFIX + "maxResultsForSuggest";
   
   /**
+   *<p>
+   * To be used when {@link SpellingParams#SPELLCHECK_MAX_RESULTS_FOR_SUGGEST} is expressed
as a fractional percentage.
+   * Specify a filter query whose result count is used to determine the maximum number of
documents.
+   *</p>   
+   */
+  public static final String SPELLCHECK_MAX_RESULTS_FOR_SUGGEST_FQ = SPELLCHECK_MAX_RESULTS_FOR_SUGGEST
+ ".fq";
+  
+  /**
    * When this parameter is set to true and the misspelled word exists in the
    * user field, only words that occur more frequently in the Solr field than
    * the one given will be returned. The default value is false.



Mime
View raw message