lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mikemcc...@apache.org
Subject svn commit: r1556954 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/
Date Thu, 09 Jan 2014 21:16:05 GMT
Author: mikemccand
Date: Thu Jan  9 21:16:05 2014
New Revision: 1556954

URL: http://svn.apache.org/r1556954
Log:
LUCENE-5345: add new BlendedInfixSuggester

Added:
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
      - copied, changed from r1556952, lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
      - copied unchanged from r1556952, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1556954&r1=1556953&r2=1556954&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Jan  9 21:16:05 2014
@@ -30,6 +30,10 @@ New Features
 
 * LUCENE-5369: Added an UpperCaseFilter to make UPPERCASE tokens. (ryan)
 
+* LUCENE-5345: Add a new BlendedInfixSuggester, which is like
+  AnalyzingInfixSuggester but boosts suggestions that matched tokens
+  with lower positions.  (Remi Melisson via Mike McCandless)
+
 
 Build
 

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1556954&r1=1556953&r2=1556954&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
Thu Jan  9 21:16:05 2014
@@ -111,10 +111,14 @@ public class AnalyzingInfixSuggester ext
   /** {@link IndexSearcher} used for lookups. */
   protected IndexSearcher searcher;
 
-  /** null if payloads were not indexed: */
-  private BinaryDocValues payloadsDV;
-  private BinaryDocValues textDV;
-  private NumericDocValues weightsDV;
+  /** DocValuesField holding the payloads; null if payloads were not indexed. */
+  protected BinaryDocValues payloadsDV;
+
+  /** DocValuesField holding each suggestion's text. */
+  protected BinaryDocValues textDV;
+
+  /** DocValuesField holding each suggestion's weight. */
+  protected NumericDocValues weightsDV;
 
   /** Default minimum number of leading characters before
    *  PrefixQuery is used (4). */
@@ -214,9 +218,7 @@ public class AnalyzingInfixSuggester ext
                           getIndexWriterConfig(matchVersion, gramAnalyzer));
       BytesRef text;
       Document doc = new Document();
-      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
-      ft.setIndexOptions(IndexOptions.DOCS_ONLY);
-      ft.setOmitNorms(true);
+      FieldType ft = getTextFieldType();
       Field textField = new Field(TEXT_FIELD_NAME, "", ft);
       doc.add(textField);
 
@@ -314,6 +316,18 @@ public class AnalyzingInfixSuggester ext
     }
   }
 
+  /**
+   * Subclass can override this method to change the field type of the text field
+   * e.g. to change the index options
+   */
+  protected FieldType getTextFieldType(){
+    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+    ft.setIndexOptions(IndexOptions.DOCS_ONLY);
+    ft.setOmitNorms(true);
+
+    return ft;
+  }
+
   @Override
   public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num)
{
     return lookup(key, num, true, true);
@@ -415,35 +429,13 @@ public class AnalyzingInfixSuggester ext
       // Slower way if postings are not pre-sorted by weight:
       // hits = searcher.search(query, null, num, new Sort(new SortField("weight", SortField.Type.LONG,
true)));
 
-      List<LookupResult> results = new ArrayList<LookupResult>();
-      BytesRef scratch = new BytesRef();
-      for (int i=0;i<hits.scoreDocs.length;i++) {
-        ScoreDoc sd = hits.scoreDocs[i];
-        textDV.get(sd.doc, scratch);
-        String text = scratch.utf8ToString();
-        long score = weightsDV.get(sd.doc);
-
-        BytesRef payload;
-        if (payloadsDV != null) {
-          payload = new BytesRef();
-          payloadsDV.get(sd.doc, payload);
-        } else {
-          payload = null;
-        }
-
-        LookupResult result;
+      List<LookupResult> results = createResults(hits, num, key, doHighlight, matchedTokens,
prefixToken);
 
-        if (doHighlight) {
-          Object highlightKey = highlight(text, matchedTokens, prefixToken);
-          result = new LookupResult(highlightKey.toString(), highlightKey, score, payload);
-        } else {
-          result = new LookupResult(text, score, payload);
-        }
-        results.add(result);
-      }
       //System.out.println((System.currentTimeMillis() - t0) + " msec for infix suggest");
       //System.out.println(results);
+
       return results;
+
     } catch (IOException ioe) {
       throw new RuntimeException(ioe);
     } finally {
@@ -451,6 +443,46 @@ public class AnalyzingInfixSuggester ext
     }
   }
 
+  /**
+   * Create the results based on the search hits.
+   * Can be overridden by subclass to add particular behavior (e.g. weight transformation)
+   * @throws IOException If there are problems reading fields from the underlying Lucene
index.
+   */
+  protected List<LookupResult> createResults(TopDocs hits, int num, CharSequence charSequence,
+                                             boolean doHighlight, Set<String> matchedTokens,
String prefixToken)
+      throws IOException {
+
+    List<LookupResult> results = new ArrayList<LookupResult>();
+    BytesRef scratch = new BytesRef();
+    for (int i=0;i<hits.scoreDocs.length;i++) {
+      ScoreDoc sd = hits.scoreDocs[i];
+      textDV.get(sd.doc, scratch);
+      String text = scratch.utf8ToString();
+      long score = weightsDV.get(sd.doc);
+
+      BytesRef payload;
+      if (payloadsDV != null) {
+        payload = new BytesRef();
+        payloadsDV.get(sd.doc, payload);
+      } else {
+        payload = null;
+      }
+
+      LookupResult result;
+
+      if (doHighlight) {
+        Object highlightKey = highlight(text, matchedTokens, prefixToken);
+        result = new LookupResult(highlightKey.toString(), highlightKey, score, payload);
+      } else {
+        result = new LookupResult(text, score, payload);
+      }
+
+      results.add(result);
+    }
+
+    return results;
+  }
+
   /** Subclass can override this to tweak the Query before
    *  searching. */
   protected Query finishQuery(BooleanQuery in, boolean allTermsRequired) {

Copied: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
(from r1556952, lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?p2=lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java&p1=lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java&r1=1556952&r2=1556954&rev=1556954&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
(original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
Thu Jan  9 21:16:05 2014
@@ -138,7 +138,7 @@ public class BlendedInfixSuggester exten
                                                     boolean doHighlight, Set<String>
matchedTokens, String prefixToken)
       throws IOException {
 
-    TreeSet<Lookup.LookupResult> results = new TreeSet<>(LOOKUP_COMP);
+    TreeSet<Lookup.LookupResult> results = new TreeSet<Lookup.LookupResult>(LOOKUP_COMP);
 
     // we reduce the num to the one initially requested
     int actualNum = num / numFactor;
@@ -180,7 +180,7 @@ public class BlendedInfixSuggester exten
       boundedTreeAdd(results, result, actualNum);
     }
 
-    return new ArrayList<>(results.descendingSet());
+    return new ArrayList<Lookup.LookupResult>(results.descendingSet());
   }
 
   /**



Mime
View raw message