lucene-java-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dor...@apache.org
Subject svn commit: r607591 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/queryParser/ src/test/org/apache/lucene/analysis/ src/test/org/apache/lucene/queryParser/ src/test/org/apache/lucene/search/
Date Sun, 30 Dec 2007 21:19:18 GMT
Author: doronc
Date: Sun Dec 30 13:19:17 2007
New Revision: 607591

URL: http://svn.apache.org/viewvc?rev=607591&view=rev
Log:
LUCENE-1095: option added to StopFilter and QueryParser to consider position increments.

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
    lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
    lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
    lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Dec 30 13:19:17 2007
@@ -266,6 +266,14 @@
     
 11. LUCENE-1019: CustomScoreQuery enhanced to support multiple 
     ValueSource queries. (Kyle Maxwell via Doron Cohen)
+    
+12. LUCENE-1095: Added an option to StopFilter to increase 
+    positionIncrement of the token succeeding a stopped token.
+    Disabled by default. Similar option added to QueryParser 
+    to consider token positions when creating PhraseQuery 
+    and MultiPhraseQuery. Disabled by default (so by default
+    the query parser ignores position increments).
+    (Doron Cohen)  
  
 
 Optimizations

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/StopFilter.java Sun Dec 30 13:19:17
2007
@@ -27,7 +27,10 @@
 
 public final class StopFilter extends TokenFilter {
 
+  private static boolean ENABLE_POSITION_INCREMENTS_DEFAULT = false;
+
   private final CharArraySet stopWords;
+  private boolean enablePositionIncrements = ENABLE_POSITION_INCREMENTS_DEFAULT;
 
   /**
    * Construct a token stream filtering the given input.
@@ -111,11 +114,58 @@
    */
   public final Token next(Token result) throws IOException {
     // return the first non-stop word found
+    int skippedPositions = 0;
     while((result = input.next(result)) != null) {
-      if (!stopWords.contains(result.termBuffer(), 0, result.termLength))
+      if (!stopWords.contains(result.termBuffer(), 0, result.termLength)) {
+        if (enablePositionIncrements) {
+          result.setPositionIncrement(result.getPositionIncrement() + skippedPositions);
+        }
         return result;
+      }
+      skippedPositions += result.getPositionIncrement();
     }
     // reached EOS -- return null
     return null;
+  }
+
+  /**
+   * @see #setEnablePositionIncrementsDefault(boolean). 
+   */
+  public static boolean getEnablePositionIncrementsDefault() {
+    return ENABLE_POSITION_INCREMENTS_DEFAULT;
+  }
+
+  /**
+   * Set the default position increments behavior of every StopFilter created from now on.
+   * <p>
+   * Note: behavior of a single StopFilter instance can be modified 
+   * with {@link #setEnablePositionIncrements(boolean)}.
+   * This static method allows control over behavior of classes using StopFilters internally,

+   * for example {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer}.

+   * <p>
+   * Default : false.
+   * @see #setEnablePositionIncrements(boolean).
+   */
+  public static void setEnablePositionIncrementsDefault(boolean defaultValue) {
+    ENABLE_POSITION_INCREMENTS_DEFAULT = defaultValue;
+  }
+
+  /**
+   * @see #setEnablePositionIncrements(boolean). 
+   */
+  public boolean getEnablePositionIncrements() {
+    return enablePositionIncrements;
+  }
+
+  /**
+   * Set to <code>true</code> to make <b>this</b> StopFilter enable
position increments to result tokens.
+   * <p>
+   * When set, when a token is stopped (omitted), the position increment of 
+   * the following token is incremented.  
+   * <p>
+   * Default: see {@link #setEnablePositionIncrementsDefault(boolean)}.
+   */
+  public void setEnablePositionIncrements(boolean enable) {
+    this.enablePositionIncrements = enable;
   }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.java Sun Dec 30 13:19:17
2007
@@ -100,6 +100,7 @@
   boolean lowercaseExpandedTerms = true;
   boolean useOldRangeQuery= false;
   boolean allowLeadingWildcard = false;
+  boolean enablePositionIncrements = false;
 
   Analyzer analyzer;
   String field;
@@ -234,13 +235,34 @@
   }
 
   /**
-   * @see #setAllowLeadingWildcard
+   * @see #setAllowLeadingWildcard(boolean)
    */
   public boolean getAllowLeadingWildcard() {
     return allowLeadingWildcard;
   }
 
   /**
+   * Set to <code>true</code> to enable position increments in result query.
+   * <p>
+   * When set, result phrase and multi-phrase queries will
+   * be aware of position increments.
+   * Useful when e.g. a StopFilter increases the position increment of
+   * the token that follows an omitted token.
+   * <p>
+   * Default: false.
+   */
+  public void setEnablePositionIncrements(boolean enable) {
+    this.enablePositionIncrements = enable;
+  }
+
+  /**
+   * @see #setEnablePositionIncrements(boolean)
+   */
+  public boolean getEnablePositionIncrements() {
+    return enablePositionIncrements;
+  }
+
+  /**
    * Sets the boolean operator of the QueryParser.
    * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
    * are considered optional: for example <code>capital of Hungary</code> is
equal to
@@ -478,27 +500,42 @@
           MultiPhraseQuery mpq = new MultiPhraseQuery();
           mpq.setSlop(phraseSlop);
           List multiTerms = new ArrayList();
+          int position = -1;
           for (int i = 0; i < v.size(); i++) {
             t = (org.apache.lucene.analysis.Token) v.elementAt(i);
-            if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
-              mpq.add((Term[])multiTerms.toArray(new Term[0]));
+            if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+              if (enablePositionIncrements) {
+                mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+              } else {
+                mpq.add((Term[])multiTerms.toArray(new Term[0]));
+              }
               multiTerms.clear();
             }
+            position += t.getPositionIncrement();
             multiTerms.add(new Term(field, t.termText()));
           }
-          mpq.add((Term[])multiTerms.toArray(new Term[0]));
+          if (enablePositionIncrements) {
+            mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+          } else {
+            mpq.add((Term[])multiTerms.toArray(new Term[0]));
+          }
           return mpq;
         }
       }
       else {
-        PhraseQuery q = new PhraseQuery();
-        q.setSlop(phraseSlop);
+        PhraseQuery pq = new PhraseQuery();
+        pq.setSlop(phraseSlop);
+        int position = -1;
         for (int i = 0; i < v.size(); i++) {
-          q.add(new Term(field, ((org.apache.lucene.analysis.Token)
-              v.elementAt(i)).termText()));
-
+          t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+          if (enablePositionIncrements) {
+            position += t.getPositionIncrement();
+            pq.add(new Term(field, t.termText()),position);
+          } else {
+            pq.add(new Term(field, t.termText()));
+          }
         }
-        return q;
+        return pq;
       }
     }
   }
@@ -1262,12 +1299,6 @@
     finally { jj_save(0, xla); }
   }
 
-  final private boolean jj_3R_3() {
-    if (jj_scan_token(STAR)) return true;
-    if (jj_scan_token(COLON)) return true;
-    return false;
-  }
-
   final private boolean jj_3R_2() {
     if (jj_scan_token(TERM)) return true;
     if (jj_scan_token(COLON)) return true;
@@ -1281,6 +1312,12 @@
     jj_scanpos = xsp;
     if (jj_3R_3()) return true;
     }
+    return false;
+  }
+
+  final private boolean jj_3R_3() {
+    if (jj_scan_token(STAR)) return true;
+    if (jj_scan_token(COLON)) return true;
     return false;
   }
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/QueryParser.jj Sun Dec 30 13:19:17
2007
@@ -124,6 +124,7 @@
   boolean lowercaseExpandedTerms = true;
   boolean useOldRangeQuery= false;  
   boolean allowLeadingWildcard = false;
+  boolean enablePositionIncrements = false;
 
   Analyzer analyzer;
   String field;
@@ -258,13 +259,34 @@
   }
 
   /**
-   * @see #setAllowLeadingWildcard
+   * @see #setAllowLeadingWildcard(boolean)
    */
   public boolean getAllowLeadingWildcard() {
     return allowLeadingWildcard;
   }
 
   /**
+   * Set to <code>true</code> to enable position increments in result query.
+   * <p>
+   * When set, result phrase and multi-phrase queries will
+   * be aware of position increments.
+   * Useful when e.g. a StopFilter increases the position increment of
+   * the token that follows an omitted token.
+   * <p>
+   * Default: false.
+   */
+  public void setEnablePositionIncrements(boolean enable) {
+    this.enablePositionIncrements = enable;
+  }
+
+  /**
+   * @see #setEnablePositionIncrements(boolean)
+   */
+  public boolean getEnablePositionIncrements() {
+    return enablePositionIncrements;
+  }
+
+  /**
    * Sets the boolean operator of the QueryParser.
    * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
    * are considered optional: for example <code>capital of Hungary</code> is
equal to
@@ -502,27 +524,42 @@
           MultiPhraseQuery mpq = new MultiPhraseQuery();
           mpq.setSlop(phraseSlop);          
           List multiTerms = new ArrayList();
+          int position = -1;
           for (int i = 0; i < v.size(); i++) {
             t = (org.apache.lucene.analysis.Token) v.elementAt(i);
-            if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
-              mpq.add((Term[])multiTerms.toArray(new Term[0]));
+            if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+              if (enablePositionIncrements) {
+                mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+              } else {
+                mpq.add((Term[])multiTerms.toArray(new Term[0]));
+              }
               multiTerms.clear();
             }
+            position += t.getPositionIncrement();
             multiTerms.add(new Term(field, t.termText()));
           }
-          mpq.add((Term[])multiTerms.toArray(new Term[0]));
+          if (enablePositionIncrements) {
+            mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+          } else {
+            mpq.add((Term[])multiTerms.toArray(new Term[0]));
+          }
           return mpq;
         }
       }
       else {
-        PhraseQuery q = new PhraseQuery();
-        q.setSlop(phraseSlop);
+        PhraseQuery pq = new PhraseQuery();
+        pq.setSlop(phraseSlop);
+        int position = -1;
         for (int i = 0; i < v.size(); i++) {
-          q.add(new Term(field, ((org.apache.lucene.analysis.Token) 
-              v.elementAt(i)).termText()));
-
+          t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+          if (enablePositionIncrements) {
+            position += t.getPositionIncrement();
+            pq.add(new Term(field, t.termText()),position);
+          } else {
+            pq.add(new Term(field, t.termText()));
+          }
         }
-        return q;
+        return pq;
       }
     }
   }

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java Sun Dec 30
13:19:17 2007
@@ -64,7 +64,33 @@
     while ((token = stream.next()) != null) {
       String text = token.termText();
       assertFalse(stopWordsSet.contains(text));
+      assertEquals(1,token.getPositionIncrement()); // by default stop tokenizer does not
apply increments.
     }
   }
-  
+
+  public void testStopListPositions() throws IOException {
+    boolean defaultEnable = StopFilter.getEnablePositionIncrementsDefault();
+    StopFilter.setEnablePositionIncrementsDefault(true);
+    try {
+      Set stopWordsSet = new HashSet();
+      stopWordsSet.add("good");
+      stopWordsSet.add("test");
+      stopWordsSet.add("analyzer");
+      StopAnalyzer newStop = new StopAnalyzer((String[])stopWordsSet.toArray(new String[3]));
+      StringReader reader = new StringReader("This is a good test of the english stop analyzer
with positions");
+      int expectedIncr[] =                  { 1,   1, 1,          3, 1,  1,      1,     
      2,   1};
+      TokenStream stream = newStop.tokenStream("test", reader);
+      assertNotNull(stream);
+      Token token = null;
+      int i = 0;
+      while ((token = stream.next()) != null) {
+        String text = token.termText();
+        assertFalse(stopWordsSet.contains(text));
+        assertEquals(expectedIncr[i++],token.getPositionIncrement());
+      }
+    } finally {
+      StopFilter.setEnablePositionIncrementsDefault(defaultEnable);
+    }
+  }
+
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/analysis/TestStopFilter.java Sun Dec 30 13:19:17
2007
@@ -16,10 +16,12 @@
  * limitations under the License.
  */
 
+import org.apache.lucene.util.English;
 import org.apache.lucene.util.LuceneTestCase;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.ArrayList;
 import java.util.Set;
 
 /**
@@ -27,6 +29,8 @@
  */
 public class TestStopFilter extends LuceneTestCase {
 
+  private final static boolean VERBOSE = false;
+  
   // other StopFilter functionality is already tested by TestStopAnalyzer
 
   public void testExactCase() throws IOException {
@@ -56,4 +60,69 @@
     assertEquals(null, stream.next());
   }
 
+  /**
+   * Test Position increments applied by StopFilter with and without enabling this option.
+   */
+  public void testStopPositons() throws IOException {
+    StringBuffer sb = new StringBuffer();
+    ArrayList a = new ArrayList();
+    for (int i=0; i<20; i++) {
+      String w = English.intToEnglish(i).trim();
+      sb.append(w).append(" ");
+      if (i%3 != 0) a.add(w);
+    }
+    log(sb.toString());
+    String stopWords[] = (String[]) a.toArray(new String[0]);
+    for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
+    Set stopSet = StopFilter.makeStopSet(stopWords);
+    // with increments
+    StringReader reader = new StringReader(sb.toString());
+    StopFilter stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+    doTestStopPositons(stpf,true);
+    // without increments
+    reader = new StringReader(sb.toString());
+    stpf = new StopFilter(new WhitespaceTokenizer(reader), stopSet);
+    doTestStopPositons(stpf,false);
+    // with increments, concatenating two stop filters
+    ArrayList a0 = new ArrayList();
+    ArrayList a1 = new ArrayList();
+    for (int i=0; i<a.size(); i++) {
+      if (i%2==0) { 
+        a0.add(a.get(i));
+      } else {
+        a1.add(a.get(i));
+      }
+    }
+    String stopWords0[] = (String[]) a0.toArray(new String[0]);
+    for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
+    String stopWords1[] = (String[]) a1.toArray(new String[0]);
+    for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
+    Set stopSet0 = StopFilter.makeStopSet(stopWords0);
+    Set stopSet1 = StopFilter.makeStopSet(stopWords1);
+    reader = new StringReader(sb.toString());
+    StopFilter stpf0 = new StopFilter(new WhitespaceTokenizer(reader), stopSet0); // first
part of the set
+    stpf0.setEnablePositionIncrements(true);
+    StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
+    doTestStopPositons(stpf01,true);
+  }
+  
+  private void doTestStopPositons(StopFilter stpf, boolean enableIcrements) throws IOException
{
+    log("---> test with enable-increments-"+(enableIcrements?"enabled":"disabled"));
+    stpf.setEnablePositionIncrements(enableIcrements);
+    for (int i=0; i<20; i+=3) {
+      Token t = stpf.next();
+      log("Token "+i+": "+t);
+      String w = English.intToEnglish(i).trim();
+      assertEquals("expecting token "+i+" to be "+w,w,t.termText());
+      assertEquals("all but first token must have position increment of 3",enableIcrements?(i==0?1:3):1,t.getPositionIncrement());
+    }
+    assertNull(stpf.next());
+  }
+  
+  // print debug info depending on VERBOSE
+  private static void log(String s) {
+    if (VERBOSE) {
+      System.out.println(s);
+    }
+  }
 }

Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Sun Dec
30 13:19:17 2007
@@ -838,17 +838,40 @@
   public void testStopwords() throws Exception {
     QueryParser qp = new QueryParser("a", new StopAnalyzer(new String[]{"the", "foo"}));
     Query result = qp.parse("a:the OR a:foo");
-    assertTrue("result is null and it shouldn't be", result != null);
+    assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
     assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery)
result).clauses().size() == 0);
     result = qp.parse("a:woo OR a:the");
-    assertTrue("result is null and it shouldn't be", result != null);
+    assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a TermQuery", result instanceof TermQuery);
     result = qp.parse("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)");
-    assertTrue("result is null and it shouldn't be", result != null);
+    assertNotNull("result is null and it shouldn't be", result);
     assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
     System.out.println("Result: " + result);
     assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery)
result).clauses().size() == 2);
+  }
+
+  public void testPositionIncrement() throws Exception {
+    boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
+    StopFilter.setEnablePositionIncrementsDefault(true);
+    try {
+      QueryParser qp = new QueryParser("a", new StopAnalyzer(new String[]{"the", "in", "are",
"this"}));
+      qp.setEnablePositionIncrements(true);
+      String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
+      //               0         2                      5           7  8
+      int expectedPositions[] = {1,3,4,6,9};
+      PhraseQuery pq = (PhraseQuery) qp.parse(qtxt);
+      //System.out.println("Query text: "+qtxt);
+      //System.out.println("Result: "+pq);
+      Term t[] = pq.getTerms();
+      int pos[] = pq.getPositions();
+      for (int i = 0; i < t.length; i++) {
+        //System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
+        assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]);
+      }
+    } finally {
+      StopFilter.setEnablePositionIncrementsDefault(dflt);
+    }
   }
 
   public void testMatchAllDocs() throws Exception {

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java?rev=607591&r1=607590&r2=607591&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestPositionIncrement.java Sun Dec
30 13:19:17 2007
@@ -19,11 +19,14 @@
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.Hits;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
@@ -80,6 +83,20 @@
     hits = searcher.search(q);
     assertEquals(0, hits.length());
 
+    // same as previous, just specify positions explicitely.
+    q = new PhraseQuery(); 
+    q.add(new Term("field", "1"),0);
+    q.add(new Term("field", "2"),1);
+    hits = searcher.search(q);
+    assertEquals(0, hits.length());
+
+    // specifying correct positions should find the phrase.
+    q = new PhraseQuery();
+    q.add(new Term("field", "1"),0);
+    q.add(new Term("field", "2"),2);
+    hits = searcher.search(q);
+    assertEquals(1, hits.length());
+
     q = new PhraseQuery();
     q.add(new Term("field", "2"));
     q.add(new Term("field", "3"));
@@ -92,6 +109,28 @@
     hits = searcher.search(q);
     assertEquals(0, hits.length());
 
+    // phrase query would find it when correct positions are specified. 
+    q = new PhraseQuery();
+    q.add(new Term("field", "3"),0);
+    q.add(new Term("field", "4"),0);
+    hits = searcher.search(q);
+    assertEquals(1, hits.length());
+
+    // phrase query should fail for non existing searched term 
+    // even if there exist another searched terms in the same searched position. 
+    q = new PhraseQuery();
+    q.add(new Term("field", "3"),0);
+    q.add(new Term("field", "9"),0);
+    hits = searcher.search(q);
+    assertEquals(0, hits.length());
+
+    // multi-phrase query should succed for non existing searched term
+    // because there exist another searched terms in the same searched position. 
+    MultiPhraseQuery mq = new MultiPhraseQuery();
+    mq.add(new Term[]{new Term("field", "3"),new Term("field", "9")},0);
+    hits = searcher.search(mq);
+    assertEquals(1, hits.length());
+
     q = new PhraseQuery();
     q.add(new Term("field", "2"));
     q.add(new Term("field", "4"));
@@ -115,6 +154,50 @@
     q.add(new Term("field", "5"));
     hits = searcher.search(q);
     assertEquals(0, hits.length());
+
+    // analyzer to introduce stopwords and increment gaps 
+    Analyzer stpa = new Analyzer() {
+      final WhitespaceAnalyzer a = new WhitespaceAnalyzer();
+      public TokenStream tokenStream(String fieldName, Reader reader) {
+        TokenStream ts = a.tokenStream(fieldName,reader);
+        return new StopFilter(ts,new String[]{"stop"});
+      }
+    };
+
+    // should not find "1 2" because there is a gap of 1 in the index
+    QueryParser qp = new QueryParser("field",stpa);
+    q = (PhraseQuery) qp.parse("\"1 2\"");
+    hits = searcher.search(q);
+    assertEquals(0, hits.length());
+
+    // omitted stop word cannot help because stop filter swallows the increments. 
+    q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+    hits = searcher.search(q);
+    assertEquals(0, hits.length());
+
+    // query parser alone won't help, because stop filter swallows the increments. 
+    qp.setEnablePositionIncrements(true);
+    q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+    hits = searcher.search(q);
+    assertEquals(0, hits.length());
+
+    boolean dflt = StopFilter.getEnablePositionIncrementsDefault();
+    try {
+      // stop filter alone won't help, because query parser swallows the increments. 
+      qp.setEnablePositionIncrements(false);
+      StopFilter.setEnablePositionIncrementsDefault(true);
+      q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+      hits = searcher.search(q);
+      assertEquals(0, hits.length());
+      
+      // when both qp qnd stopFilter propagate increments, we should find the doc.
+      qp.setEnablePositionIncrements(true);
+      q = (PhraseQuery) qp.parse("\"1 stop 2\"");
+      hits = searcher.search(q);
+      assertEquals(1, hits.length());
+    } finally {
+      StopFilter.setEnablePositionIncrementsDefault(dflt);
+    }
   }
 
   /**



Mime
View raw message