From Erik Hatcher <>
Subject StopFilter - positionIncrement change
Date Wed, 26 Nov 2003 11:19:37 GMT
While I want to see Lucene 1.3 FINAL released soon, I also want to see 
if a fix for StopFilter is warranted.  I've fixed it locally but have 
been slow to get around committing it.  The test case is below that 
will demonstrate the problem with phrase queries and stop words.  The 
fix involves setting the position increments appropriately when stop 
words are removed.

Should this be committed before 1.3 FINAL or after?  I'm +1 on before.


package org.apache.lucene.analysis;

import junit.framework.TestCase;
import java.util.ArrayList;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

public class TestStopAnalyzer extends TestCase {
   private StopAnalyzer stopAnalyzer = new StopAnalyzer();

   public Token[] tokensFromAnalyzer(Analyzer analyzer, String text)
                                                   throws Exception {
     TokenStream stream =
       analyzer.tokenStream("contents", new StringReader(text));
     ArrayList tokenList = new ArrayList();
     while (true) {
       Token token =;
       if (token == null) break;


     return (Token[]) tokenList.toArray(new Token[0]);

   public void testNoHoles() throws Exception {
     Token[] tokens = tokensFromAnalyzer(stopAnalyzer,
                                         "non-stop words");

     assertEquals(3, tokens.length);

     // ensure all words are in successive positions
     assertEquals("non", 1, tokens[0].getPositionIncrement());
     assertEquals("stop", 1, tokens[1].getPositionIncrement());
     assertEquals("words", 1, tokens[2].getPositionIncrement());

   public void testHoles() throws Exception {
     Token[] tokens = tokensFromAnalyzer(stopAnalyzer,
                                         "the stop words are here");

     assertEquals(3, tokens.length);

     // check for the holes noted by position gaps
     assertEquals("stop", 2, tokens[0].getPositionIncrement());
     assertEquals("words", 1, tokens[1].getPositionIncrement());
     assertEquals("here", 2, tokens[2].getPositionIncrement());

   public void testPhraseQuery() throws Exception {
     RAMDirectory directory = new RAMDirectory();
     IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true);
     Document doc = new Document();
     doc.add(Field.Text("field", "the stop words are here"));

     IndexSearcher searcher = new IndexSearcher(directory);

     // valid exact phrase query
     PhraseQuery query = new PhraseQuery();
     query.add(new Term("field","stop"));
     query.add(new Term("field","words"));
     Hits hits =;
     assertEquals(1, hits.length());

     // incorrect attempt at exact phrase query over stop word hole
     query = new PhraseQuery();
     query.add(new Term("field", "words"));
     query.add(new Term("field", "here"));
     hits =;
     assertEquals(0, hits.length());

     // add some slop, and match over the hole
     hits =;
     assertEquals(1, hits.length());


