lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject svn commit: r1622476 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/handler/component/ core/src/java/org/apache/solr/spelling/ core/src/test/org/apache/solr/handler/component/ core/src/test/org/apache/solr/spelling/
Date Thu, 04 Sep 2014 13:57:05 GMT
Author: jdyer
Date: Thu Sep  4 13:57:05 2014
New Revision: 1622476

URL: http://svn.apache.org/r1622476
Log:
SOLR-6297: Fix for Distributed WordBreakSolrSpellChecker

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Sep  4 13:57:05 2014
@@ -164,6 +164,9 @@ Bug Fixes
 * SOLR-6024: Fix StatsComponent when using docValues="true" multiValued="true"
   (Vitaliy Zhovtyuk & Tomas Fernandez-Lobbe via hossman)
 
+* SOLR-6297: Fix WordBreakSolrSpellChecker to not lose suggestions in shard/cloud 
+  environments (James Dyer)
+
 Other Changes
 ---------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
(original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
Thu Sep  4 13:57:05 2014
@@ -191,18 +191,19 @@ public class SpellCheckComponent extends
         }
         boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);
         
-        NamedList response = new SimpleOrderedMap();
-        
+        NamedList response = new SimpleOrderedMap();        
         NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults);
         response.add("suggestions", suggestions);
         
         if (extendedResults) {
           response.add("correctlySpelled", isCorrectlySpelled);
-        }
-        
+        }        
         if (collate) {
           addCollationsToResponse(params, spellingResult, rb, q, response, spellChecker.isSuggestionsMayOverlap());
         }
+        if (shardRequest) {
+          addOriginalTermsToResponse(response, tokens);
+        }
         
         rb.rsp.add("spellcheck", response);
 
@@ -260,6 +261,14 @@ public class SpellCheckComponent extends
     }
     response.add("collations", collationList);
   }
+  
+  private void addOriginalTermsToResponse(NamedList response, Collection<Token> originalTerms)
{
+    List<String> originalTermStr = new ArrayList<String>();
+    for(Token t : originalTerms) {
+      originalTermStr.add(t.toString());
+    }
+    response.add("originalTerms", originalTermStr);
+  }
 
   /**
    * For every param that is of the form "spellcheck.[dictionary name].XXXX=YYYY, add
@@ -392,8 +401,14 @@ public class SpellCheckComponent extends
     
   @SuppressWarnings("unchecked")
   private void collectShardSuggestions(NamedList nl, SpellCheckMergeData mergeData) {
-    System.out.println(nl);
-    SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);
+    SpellCheckResponse spellCheckResp = new SpellCheckResponse(nl);    
+    Iterable<Object> originalTermStrings = (Iterable<Object>) nl.get("originalTerms");
+    if(originalTermStrings!=null) {
+      mergeData.originalTerms = new HashSet<>();
+      for (Object originalTermObj : originalTermStrings) {
+        mergeData.originalTerms.add(originalTermObj.toString());
+      }
+    }
     for (SpellCheckResponse.Suggestion suggestion : spellCheckResp.getSuggestions()) {
       mergeData.origVsSuggestion.put(suggestion.getToken(), suggestion);
       HashSet<String> suggested = mergeData.origVsSuggested.get(suggestion.getToken());
@@ -615,8 +630,8 @@ public class SpellCheckComponent extends
         }
         
         if (hasFreqInfo) {
-          int tokenFrequency = spellingResult.getTokenFrequency(inputToken);
-          if (tokenFrequency == 0) {
+          Integer tokenFrequency = spellingResult.getTokenFrequency(inputToken);
+          if (tokenFrequency==null || tokenFrequency == 0) {
             hasZeroFrequencyToken = true;
           }
         }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java
(original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/SpellCheckMergeData.java
Thu Sep  4 13:57:05 2014
@@ -17,10 +17,12 @@ package org.apache.solr.handler.componen
  * limitations under the License.
  */
 
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.solr.client.solrj.response.SpellCheckResponse;
@@ -39,5 +41,14 @@ public class SpellCheckMergeData {
   // alternative string -> corresponding SuggestWord object
   public Map<String, SuggestWord> suggestedVsWord = new HashMap<>();
   public Map<String, SpellCheckCollation> collations = new HashMap<>();
+  //The original terms from the user's query.
+  public Set<String> originalTerms = null;
   public int totalNumberShardResponses = 0;
+  
+  public boolean isOriginalToQuery(String term) {
+    if(originalTerms==null) {
+      return true;
+    }
+    return originalTerms.contains(term);
+  }
 }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
(original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
Thu Sep  4 13:57:05 2014
@@ -19,6 +19,7 @@ package org.apache.solr.spelling;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
@@ -167,17 +168,20 @@ public class ConjunctionSolrSpellChecker
             Map.Entry<String,Integer> corr = iter.next();
             combinedResult.add(original, corr.getKey(), corr.getValue());
             Integer tokenFrequency = combinedTokenFrequency.get(original);
-            if(tokenFrequency!=null) {
-              combinedResult.addFrequency(original, tokenFrequency);
-            }
+            combinedResult.addFrequency(original, tokenFrequency==null ? 0 : tokenFrequency);
             if(++numberAdded==numSug) {
               break;
             }
           }
         }        
         if(!anyData) {
+          if(numberAdded==0) {
+            combinedResult.add(original, Collections.<String>emptyList());
+            Integer tokenFrequency = combinedTokenFrequency.get(original);
+            combinedResult.addFrequency(original, tokenFrequency==null ? 0 : tokenFrequency);
+          }
           break;
-        }
+        }        
       }      
     }    
     return combinedResult;

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java Thu
Sep  4 13:57:05 2014
@@ -100,9 +100,11 @@ public abstract class SolrSpellChecker {
     for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet())
{
       String original = entry.getKey();
       
-      //Only use this suggestion if all shards reported it as misspelled.
+      //Only use this suggestion if all shards reported it as misspelled, 
+      //unless it was not a term original to the user's query
+      //(WordBreakSolrSpellChecker can add new terms to the response, and we want to keep
these)
       Integer numShards = mergeData.origVsShards.get(original);
-      if(numShards<mergeData.totalNumberShardResponses) {
+      if(numShards<mergeData.totalNumberShardResponses && mergeData.isOriginalToQuery(original))
{
         continue;
       }
       

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
(original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
Thu Sep  4 13:57:05 2014
@@ -202,6 +202,7 @@ public class WordBreakSolrSpellChecker e
     List<Term> termArr = new ArrayList<>(options.tokens.size() + 2);
     
     List<ResultEntry> breakSuggestionList = new ArrayList<>();
+    List<ResultEntry> noBreakSuggestionList = new ArrayList<>();
     boolean lastOneProhibited = false;
     boolean lastOneRequired = false;
     boolean lastOneprocedesNewBooleanOp = false;
@@ -228,6 +229,9 @@ public class WordBreakSolrSpellChecker e
       if (breakWords) {
         SuggestWord[][] breakSuggestions = wbsp.suggestWordBreaks(thisTerm,
             numSuggestions, ir, options.suggestMode, sortMethod);
+        if(breakSuggestions.length==0) {
+          noBreakSuggestionList.add(new ResultEntry(tokenArr[i], null, 0));
+        }
         for (SuggestWord[] breakSuggestion : breakSuggestions) {
           sb.delete(0, sb.length());
           boolean firstOne = true;
@@ -248,7 +252,9 @@ public class WordBreakSolrSpellChecker e
               freq));
         }
       }
-    }    
+    }
+    breakSuggestionList.addAll(noBreakSuggestionList);
+    
     List<ResultEntry> combineSuggestionList = Collections.emptyList();
     CombineSuggestion[] combineSuggestions = wbsp.suggestWordCombinations(
         termArr.toArray(new Term[termArr.size()]), numSuggestions, ir, options.suggestMode);
@@ -282,33 +288,24 @@ public class WordBreakSolrSpellChecker e
     int combineCount = 0;
     while (lastBreak != null || lastCombine != null) {
       if (lastBreak == null) {
-        result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
-        result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
+        addToResult(result, lastCombine.token, getCombineFrequency(ir, lastCombine.token),
lastCombine.suggestion, lastCombine.freq);
         lastCombine = null;
       } else if (lastCombine == null) {
-        result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
-        result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
+        addToResult(result, lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())),
lastBreak.suggestion, lastBreak.freq);
         lastBreak = null;
       } else if (lastBreak.freq < lastCombine.freq) {
-        result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
-        result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
+        addToResult(result, lastCombine.token, getCombineFrequency(ir, lastCombine.token),
lastCombine.suggestion, lastCombine.freq);        
         lastCombine = null;
       } else if (lastCombine.freq < lastBreak.freq) {
-        result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
-        result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
+        addToResult(result, lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())),
lastBreak.suggestion, lastBreak.freq);
         lastBreak = null;
-      } else if (breakCount >= combineCount) {
-        result.add(lastCombine.token, lastCombine.suggestion, lastCombine.freq);
-        result.addFrequency(lastCombine.token, getCombineFrequency(ir, lastCombine.token));
+      } else if (breakCount >= combineCount) { //TODO: Should reverse >= to < ??S
+        addToResult(result, lastCombine.token, getCombineFrequency(ir, lastCombine.token),
lastCombine.suggestion, lastCombine.freq);        
         lastCombine = null;
       } else {
-        result.add(lastBreak.token, lastBreak.suggestion, lastBreak.freq);
-        result.addFrequency(lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())));
+        addToResult(result, lastBreak.token, ir.docFreq(new Term(field, lastBreak.token.toString())),
lastBreak.suggestion, lastBreak.freq);        
         lastBreak = null;
       }
-      if (result.getSuggestions().size() > numSuggestions) {
-        break;
-      }
       if (lastBreak == null && breakIter.hasNext()) {
         lastBreak = breakIter.next();
         breakCount++;
@@ -320,6 +317,15 @@ public class WordBreakSolrSpellChecker e
     }
     return result;
   }
+  private void addToResult(SpellingResult result, Token token, int tokenFrequency, String
suggestion, int suggestionFrequency) {
+    if(suggestion==null) {
+      result.add(token, Collections.<String>emptyList());
+      result.addFrequency(token, tokenFrequency);
+    } else {
+      result.add(token, suggestion, suggestionFrequency);
+      result.addFrequency(token, tokenFrequency);
+    }
+  }
   
   private int getCombineFrequency(IndexReader ir, Token token) throws IOException {
     String[] words = spacePattern.split(token.toString());

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
(original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
Thu Sep  4 13:57:05 2014
@@ -187,6 +187,12 @@ public class DistributedSpellCheckCompon
     query(buildRequest("lowerfilt:(+quock +redfox +jum +ped)", 
         false, reqHandlerWithWordbreak, random().nextBoolean(), extended, "true", count,
"10", 
         collate, "true", maxCollationTries, "0", maxCollations, "1", collateExtended, "true"));
+    query(buildRequest("lowerfilt:(+rodfix)", 
+        false, reqHandlerWithWordbreak, random().nextBoolean(), extended, "true", count,
"10", 
+        collate, "true", maxCollationTries, "0", maxCollations, "1", collateExtended, "true"));
+    query(buildRequest("lowerfilt:(+son +ata)", 
+        false, reqHandlerWithWordbreak, random().nextBoolean(), extended, "true", count,
"10", 
+        collate, "true", maxCollationTries, "0", maxCollations, "1", collateExtended, "true"));
   }
   private Object[] buildRequest(String q, boolean useSpellcheckQ, String handlerName, boolean
useGrouping, String... addlParams) {
     List<Object> params = new ArrayList<>();

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java?rev=1622476&r1=1622475&r2=1622476&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
(original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
Thu Sep  4 13:57:05 2014
@@ -76,7 +76,7 @@ public class WordBreakSolrSpellCheckerTe
     searcher.decref();
     
     assertTrue(result != null && result.getSuggestions() != null);
-    assertTrue(result.getSuggestions().size()==6);
+    assertTrue(result.getSuggestions().size()==9);
     
     for(Map.Entry<Token, LinkedHashMap<String, Integer>> s : result.getSuggestions().entrySet())
{
       Token orig = s.getKey();
@@ -119,7 +119,28 @@ public class WordBreakSolrSpellCheckerTe
         assertTrue(orig.length()==4);
         assertTrue(corr.length==1);
         assertTrue(corr[0].equals("pi ne"));
-      } else {
+      } else if(orig.toString().equals("pine")) {
+        assertTrue(orig.startOffset()==10);
+        assertTrue(orig.endOffset()==14);
+        assertTrue(orig.length()==4);
+        assertTrue(corr.length==1);
+        assertTrue(corr[0].equals("pi ne"));
+      } else if(orig.toString().equals("apple")) {
+        assertTrue(orig.startOffset()==15);
+        assertTrue(orig.endOffset()==20);
+        assertTrue(orig.length()==5);
+        assertTrue(corr.length==0);
+      } else if(orig.toString().equals("good")) {
+        assertTrue(orig.startOffset()==21);
+        assertTrue(orig.endOffset()==25);
+        assertTrue(orig.length()==4);
+        assertTrue(corr.length==0);
+      } else if(orig.toString().equals("ness")) {
+        assertTrue(orig.startOffset()==26);
+        assertTrue(orig.endOffset()==30);
+        assertTrue(orig.length()==4);
+        assertTrue(corr.length==0);
+      }else {
         fail("Unexpected original result: " + orig);
       }        
     }  



Mime
View raw message