lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mark Miller <markrmil...@gmail.com>
Subject Re: Error using multireader searcher in Lucene 2.9
Date Fri, 02 Oct 2009 12:30:58 GMT
Sorry Raf - technically your not allowed to use internal Lucene id's
that way. It happened to work in the past if you didn't use
MultiSearcher, but its not promised by the API, and no longer works as
you'd expect in 2.9.

You have to figure out another approach that doesn't use the internal
ids (eg assume id 0 is the first doc and that there is only one id 0
when building a filter - the filter has to just work respective to any
IndexReader given it - not make any assumptions about ids).

Raf wrote:
> Hello,
> I have tried to switch my application from Lucene 2.4.1 to Lucene 2.9, but I
> have found a problem.
> My searcher uses a MultiReader and, when I try to do a search using a custom
> filter based on a bitset, it does not behave as it did in Lucene 2.4.
> It looks like the new searcher does not use the "offset" when it reads the
> subreaders docIds...
>
> I have written a self-contained test to show the problem:
>
> import static org.junit.Assert.assertEquals;
> import java.io.IOException;
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.WhitespaceAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.index.CorruptIndexException;
> import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.MultiReader;
> import org.apache.lucene.index.IndexWriter.MaxFieldLength;
> import org.apache.lucene.search.DocIdSet;
> import org.apache.lucene.search.DocIdSetIterator;
> import org.apache.lucene.search.Filter;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.MatchAllDocsQuery;
> import org.apache.lucene.search.ScoreDoc;
> import org.apache.lucene.search.TopDocs;
> import org.apache.lucene.store.Directory;
> import org.apache.lucene.store.LockObtainFailedException;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.util.OpenBitSet;
> import org.junit.After;
> import org.junit.Before;
> import org.junit.Test;
>
> public class Lucene_2_9SearcherTest {
>
>     private Directory dir1 = new RAMDirectory();
>     private Directory dir2 = new RAMDirectory();
>     private Analyzer analyzer = new WhitespaceAnalyzer();
>
>     @Before
>     public void setUp() throws Exception {
>         this.createIndex1();
>         this.createIndex2();
>     }
>
>     @After
>     public void tearDown() throws Exception {
>     }
>
>     @Test
>     public void testSearchWithMultiReader() throws CorruptIndexException,
> IOException {
>
>         IndexReader reader = this.getMultiReader();
>
>         OpenBitSet bitSet = new OpenBitSet(10);
>         bitSet.fastSet(1);
>         bitSet.fastSet(2);
>         bitSet.fastSet(6);
>
>         Filter filter = new DocIdSetFilter(bitSet);
>
>         DocIdSetIterator docIdIt = filter.getDocIdSet(reader).iterator();
>         int numDocs = 0;
>         System.out.println("Filter extraction:");
>         while (docIdIt.next()) {
>             System.out.println("Extracted: " + docIdIt.doc() + " --> " +
> reader.document(docIdIt.doc()).getField("text").stringValue());
>             numDocs++;
>         }
>
>         assertEquals(3, numDocs);
>
>         IndexSearcher searcher = new IndexSearcher(reader);
>         TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), filter,
> 10);
>         int totSearchDocs = topDocs.totalHits;
>         // assertEquals(3, totSearchDocs);
>
>         ScoreDoc[] hits = topDocs.scoreDocs;
>         System.out.println("\nSearcher extraction:");
>         for (ScoreDoc sd : hits) {
>             System.out.println("Extracted: " + sd.doc + " --> " +
> reader.document(sd.doc).getField("text").stringValue());
>         }
>
>     }
>
>     private void createIndex1() throws CorruptIndexException,
> LockObtainFailedException, IOException {
>
>         IndexWriter writer = new IndexWriter(dir1, analyzer, true,
> MaxFieldLength.UNLIMITED);
>
>         Document doc = new Document();
>         doc.add(new Field("text", "a", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         doc = new Document();
>         doc.add(new Field("text", "b", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         doc = new Document();
>         doc.add(new Field("text", "c", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         doc = new Document();
>         doc.add(new Field("text", "d", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         doc = new Document();
>         doc.add(new Field("text", "e", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         writer.optimize();
>         writer.close();
>     }
>
>     private void createIndex2() throws CorruptIndexException,
> LockObtainFailedException, IOException {
>
>         IndexWriter writer = new IndexWriter(dir2, analyzer, true,
> MaxFieldLength.UNLIMITED);
>
>         Document doc = new Document();
>         doc.add(new Field("text", "x", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         doc = new Document();
>         doc.add(new Field("text", "y", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         doc = new Document();
>         doc.add(new Field("text", "z", Field.Store.YES,
> Field.Index.NOT_ANALYZED));
>         writer.addDocument(doc);
>
>         writer.optimize();
>         writer.close();
>     }
>
>     private IndexReader getMultiReader() throws CorruptIndexException,
> IOException {
>         IndexReader[] subReaders = new IndexReader[] {
> IndexReader.open(dir1, false), IndexReader.open(dir2, false) };
>         MultiReader reader = new MultiReader(subReaders);
>
>         return (reader);
>     }
>
>     private class DocIdSetFilter extends Filter {
>
>         private static final long serialVersionUID = 1L;
>
>         private DocIdSet myBitset;
>
>         public DocIdSetFilter(DocIdSet bitset) {
>             this.myBitset = bitset;
>         }
>
>         @Override
>         public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
>             return (this.myBitset);
>         }
>
>     }
>
> }
>
>
> In Lucene 2.4.1 the output is:
> Filter extraction:
> Extracted: 1 --> b
> Extracted: 2 --> c
> Extracted: 6 --> y
>
> Searcher extraction:
> Extracted: 1 --> b
> Extracted: 2 --> c
> Extracted: 6 --> y
>
> while in Lucene 2.9 I have:
> Filter extraction:
> Extracted: 1 --> b
> Extracted: 2 --> c
> Extracted: 6 --> y
>
> Searcher extraction:
> Extracted: 1 --> b
> Extracted: 2 --> c
> Extracted: 6 --> y
> Extracted: 7 --> z
>
>
> Is it a bug in the new Lucene searcher or am I missing something?
> Thanks,
>
> Bye
> Raf
>
>   


-- 
- Mark

http://www.lucidimagination.com




---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message