lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From m...@apache.org
Subject lucene-solr:master: LUCENE-7827: AnalysingInfixSuggester omits textgrams when minPrefixChars=0
Date Wed, 23 Aug 2017 20:33:58 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master a3bcf7770 -> 7760b3564


LUCENE-7827: AnalysingInfixSuggester omits textgrams when
minPrefixChars=0

Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7760b356
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7760b356
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7760b356

Branch: refs/heads/master
Commit: 7760b356458e6e71d94b055d23a1095b22b8460b
Parents: a3bcf77
Author: Mikhail Khludnev <mkhl@apache.org>
Authored: Wed Aug 23 19:55:32 2017 +0300
Committer: Mikhail Khludnev <mkhl@apache.org>
Committed: Fri Jun 30 11:07:04 2017 +0300

----------------------------------------------------------------------
 lucene/CHANGES.txt                                    |  3 +++
 .../suggest/analyzing/AnalyzingInfixSuggester.java    | 14 +++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7760b356/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c43220c..c31adcd 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -39,6 +39,9 @@ Optimizations
 * LUCENE-7939: MinShouldMatchSumScorer now leverages two-phase iteration in
   order to be faster when used in conjunctions. (Adrien Grand)
 
+* LUCENE-7827: AnalyzingInfixSuggester doesn't create "textgrams" 
+  when minPrefixChar=0 (Mikhail Khludnev)
+
 Bug Fixes
 
 * LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7760b356/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
index 0ca81c7..a787e6b 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
@@ -107,6 +107,10 @@ import org.apache.lucene.util.RamUsageEstimator;
 
 public class AnalyzingInfixSuggester extends Lookup implements Closeable {
 
+  /** edgegrams for searching short prefixes without Prefix Query 
+   * that's  controlled by {@linkplain #minPrefixChars} */
+  protected final static String TEXTGRAMS_FIELD_NAME = "textgrams";
+
   /** Field name used for the indexed text. */
   protected final static String TEXT_FIELD_NAME = "text";
 
@@ -353,7 +357,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable
{
 
       @Override
       protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents
components) {
-        if (fieldName.equals("textgrams") && minPrefixChars > 0) {
+        assert !(fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars == 0) 
+                : "no need \"textgrams\" when minPrefixChars="+minPrefixChars;
+        if (fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars > 0) {
           // TODO: should use an EdgeNGramTokenFilterFactory here
           TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars);
           return new TokenStreamComponents(components.getTokenizer(), filter);
@@ -410,7 +416,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable
{
     Document doc = new Document();
     FieldType ft = getTextFieldType();
     doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
-    doc.add(new Field("textgrams", textString, ft));
+    if (minPrefixChars>0) {
+      doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
+    }
     doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
     doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
     doc.add(new NumericDocValuesField("weight", weight));
@@ -474,7 +482,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable
{
   protected Query getLastTokenQuery(String token) throws IOException {
     if (token.length() < minPrefixChars) {
       // The leading ngram was directly indexed:
-      return new TermQuery(new Term("textgrams", token));
+      return new TermQuery(new Term(TEXTGRAMS_FIELD_NAME, token));
     }
 
     return new PrefixQuery(new Term(TEXT_FIELD_NAME, token));


Mime
View raw message