lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Raf <r.ventag...@gmail.com>
Subject Error using multireader searcher in Lucene 2.9
Date Fri, 02 Oct 2009 11:09:47 GMT
Hello,
I have tried to switch my application from Lucene 2.4.1 to Lucene 2.9, but I
have found a problem.
My searcher uses a MultiReader and, when I try to do a search using a custom
filter based on a bitset, it does not behave as it did in Lucene 2.4.
It looks like the new searcher does not use the "offset" when it reads the
subreaders docIds...

I have written a self-contained test to show the problem:

import static org.junit.Assert.assertEquals;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.OpenBitSet;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class Lucene_2_9SearcherTest {

    private Directory dir1 = new RAMDirectory();
    private Directory dir2 = new RAMDirectory();
    private Analyzer analyzer = new WhitespaceAnalyzer();

    @Before
    public void setUp() throws Exception {
        this.createIndex1();
        this.createIndex2();
    }

    @After
    public void tearDown() throws Exception {
    }

    @Test
    public void testSearchWithMultiReader() throws CorruptIndexException,
IOException {

        IndexReader reader = this.getMultiReader();

        OpenBitSet bitSet = new OpenBitSet(10);
        bitSet.fastSet(1);
        bitSet.fastSet(2);
        bitSet.fastSet(6);

        Filter filter = new DocIdSetFilter(bitSet);

        DocIdSetIterator docIdIt = filter.getDocIdSet(reader).iterator();
        int numDocs = 0;
        System.out.println("Filter extraction:");
        while (docIdIt.next()) {
            System.out.println("Extracted: " + docIdIt.doc() + " --> " +
reader.document(docIdIt.doc()).getField("text").stringValue());
            numDocs++;
        }

        assertEquals(3, numDocs);

        IndexSearcher searcher = new IndexSearcher(reader);
        TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), filter,
10);
        int totSearchDocs = topDocs.totalHits;
        // assertEquals(3, totSearchDocs);

        ScoreDoc[] hits = topDocs.scoreDocs;
        System.out.println("\nSearcher extraction:");
        for (ScoreDoc sd : hits) {
            System.out.println("Extracted: " + sd.doc + " --> " +
reader.document(sd.doc).getField("text").stringValue());
        }

    }

    private void createIndex1() throws CorruptIndexException,
LockObtainFailedException, IOException {

        IndexWriter writer = new IndexWriter(dir1, analyzer, true,
MaxFieldLength.UNLIMITED);

        Document doc = new Document();
        doc.add(new Field("text", "a", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        doc = new Document();
        doc.add(new Field("text", "b", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        doc = new Document();
        doc.add(new Field("text", "c", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        doc = new Document();
        doc.add(new Field("text", "d", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        doc = new Document();
        doc.add(new Field("text", "e", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        writer.optimize();
        writer.close();
    }

    private void createIndex2() throws CorruptIndexException,
LockObtainFailedException, IOException {

        IndexWriter writer = new IndexWriter(dir2, analyzer, true,
MaxFieldLength.UNLIMITED);

        Document doc = new Document();
        doc.add(new Field("text", "x", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        doc = new Document();
        doc.add(new Field("text", "y", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        doc = new Document();
        doc.add(new Field("text", "z", Field.Store.YES,
Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);

        writer.optimize();
        writer.close();
    }

    private IndexReader getMultiReader() throws CorruptIndexException,
IOException {
        IndexReader[] subReaders = new IndexReader[] {
IndexReader.open(dir1, false), IndexReader.open(dir2, false) };
        MultiReader reader = new MultiReader(subReaders);

        return (reader);
    }

    private class DocIdSetFilter extends Filter {

        private static final long serialVersionUID = 1L;

        private DocIdSet myBitset;

        public DocIdSetFilter(DocIdSet bitset) {
            this.myBitset = bitset;
        }

        @Override
        public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
            return (this.myBitset);
        }

    }

}


In Lucene 2.4.1 the output is:
Filter extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y

Searcher extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y

while in Lucene 2.9 I have:
Filter extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y

Searcher extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y
Extracted: 7 --> z


Is it a bug in the new Lucene searcher or am I missing something?
Thanks,

Bye
Raf

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message