lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Uwe Schindler (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (LUCENE-3442) QueryWrapperFilter gets null DocIdSetIterator when wrapping TermQuery
Date Tue, 20 Sep 2011 18:55:11 GMT

     [ https://issues.apache.org/jira/browse/LUCENE-3442?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Uwe Schindler updated LUCENE-3442:
----------------------------------

      Description: 
If you try to get the iterator for the DocIdSet returned by a QueryWrapperFilter which wraps
a TermQuery you get null instead of an iterator that returns the same documents as the search
on the TermQuery.

Code demonstrating the issue:

{code:java}
import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;

public class TestQueryWrapperFilterIterator {
   public static void main(String[] args) {
		try {
			IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_34, new WhitespaceAnalyzer(Version.LUCENE_34));
			iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
			RAMDirectory dir = new RAMDirectory();
		
			IndexWriter writer = new IndexWriter(dir, iwconfig);
			Document d = new Document();
			d.add(new Field("id", "1001", Store.YES, Index.NOT_ANALYZED));
			d.add(new Field("text", "headline one group one", Store.YES, Index.ANALYZED));
			d.add(new Field("group", "grp1", Store.YES, Index.NOT_ANALYZED));
		    writer.addDocument(d);
			writer.commit();
			writer.close();
			
			IndexReader rdr = IndexReader.open(dir);
			IndexSearcher searcher = new IndexSearcher(rdr);
			
			TermQuery tq = new TermQuery(new Term("text", "headline"));
			
			TopDocs results = searcher.search(tq, 5);
			System.out.println("Number of search results: " + results.totalHits);
			
			Filter f = new QueryWrapperFilter(tq);
			
			DocIdSet dis = f.getDocIdSet(rdr);
			
			DocIdSetIterator it = dis.iterator();
			if (it != null) {
				int docId = it.nextDoc();
				while (docId != DocIdSetIterator.NO_MORE_DOCS) {
					Document doc = rdr.document(docId);
					System.out.println("Iterator doc: " + doc.get("id"));
					docId = it.nextDoc();
				}
			} else {
				System.out.println("Iterator was null: ");
			}
			
			searcher.close();
			rdr.close();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		}

	}
}
{code}

  was:
If you try to get the iterator for the DocIdSet returned by a QueryWrapperFilter which wraps
a TermQuery you get null instead of an iterator that returns the same documents as the search
on the TermQuery.

Code demonstrating the issue:


import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;

public class TestQueryWrapperFilterIterator {
   public static void main(String[] args) {
		try {
			IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_34, new WhitespaceAnalyzer(Version.LUCENE_34));
			iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
			RAMDirectory dir = new RAMDirectory();
		
			IndexWriter writer = new IndexWriter(dir, iwconfig);
			Document d = new Document();
			d.add(new Field("id", "1001", Store.YES, Index.NOT_ANALYZED));
			d.add(new Field("text", "headline one group one", Store.YES, Index.ANALYZED));
			d.add(new Field("group", "grp1", Store.YES, Index.NOT_ANALYZED));
		    writer.addDocument(d);
			writer.commit();
			writer.close();
			
			IndexReader rdr = IndexReader.open(dir);
			IndexSearcher searcher = new IndexSearcher(rdr);
			
			TermQuery tq = new TermQuery(new Term("text", "headline"));
			
			TopDocs results = searcher.search(tq, 5);
			System.out.println("Number of search results: " + results.totalHits);
			
			Filter f = new QueryWrapperFilter(tq);
			
			DocIdSet dis = f.getDocIdSet(rdr);
			
			DocIdSetIterator it = dis.iterator();
			if (it != null) {
				int docId = it.nextDoc();
				while (docId != DocIdSetIterator.NO_MORE_DOCS) {
					Document doc = rdr.document(docId);
					System.out.println("Iterator doc: " + doc.get("id"));
					docId = it.nextDoc();
				}
			} else {
				System.out.println("Iterator was null: ");
			}
			
			searcher.close();
			rdr.close();
		} catch (IOException ioe) {
			ioe.printStackTrace();
		}

	}
}


    Fix Version/s: 3.5

The issue lies in the fact that an optimization in TermQuery prevents it's Weight.scorer()
method to behave correctly when no atomic reader is passed in. This is no longer supported
in Lucene trunk, but in 3.x the weight should still be able to work on composite readers.
The sample code provided does this exactly: It calls QWF.getDocIdSet on a non-atomic IndexReader.
QWF calls TermWeight.scorer() and this one returns null, because the composite reader is not
in its DF cache.

The fix is easy: Don't early exit in scorer() if the reader passed in is not atomic.

> QueryWrapperFilter gets null DocIdSetIterator when wrapping TermQuery
> ---------------------------------------------------------------------
>
>                 Key: LUCENE-3442
>                 URL: https://issues.apache.org/jira/browse/LUCENE-3442
>             Project: Lucene - Java
>          Issue Type: Bug
>          Components: core/search
>    Affects Versions: 3.4
>         Environment: java 1.6.0_27
>            Reporter: Dan
>            Assignee: Uwe Schindler
>            Priority: Minor
>             Fix For: 3.5
>
>
> If you try to get the iterator for the DocIdSet returned by a QueryWrapperFilter which
wraps a TermQuery you get null instead of an iterator that returns the same documents as the
search on the TermQuery.
> Code demonstrating the issue:
> {code:java}
> import java.io.IOException;
> import org.apache.lucene.analysis.WhitespaceAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.Field.Index;
> import org.apache.lucene.document.Field.Store;
> import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.IndexWriterConfig;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.util.Version;
> import org.apache.lucene.search.DocIdSet;
> import org.apache.lucene.search.DocIdSetIterator;
> import org.apache.lucene.search.Filter;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.QueryWrapperFilter;
> import org.apache.lucene.search.TermQuery;
> import org.apache.lucene.search.TopDocs;
> public class TestQueryWrapperFilterIterator {
>    public static void main(String[] args) {
> 		try {
> 			IndexWriterConfig iwconfig = new IndexWriterConfig(Version.LUCENE_34, new WhitespaceAnalyzer(Version.LUCENE_34));
> 			iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
> 			RAMDirectory dir = new RAMDirectory();
> 		
> 			IndexWriter writer = new IndexWriter(dir, iwconfig);
> 			Document d = new Document();
> 			d.add(new Field("id", "1001", Store.YES, Index.NOT_ANALYZED));
> 			d.add(new Field("text", "headline one group one", Store.YES, Index.ANALYZED));
> 			d.add(new Field("group", "grp1", Store.YES, Index.NOT_ANALYZED));
> 		    writer.addDocument(d);
> 			writer.commit();
> 			writer.close();
> 			
> 			IndexReader rdr = IndexReader.open(dir);
> 			IndexSearcher searcher = new IndexSearcher(rdr);
> 			
> 			TermQuery tq = new TermQuery(new Term("text", "headline"));
> 			
> 			TopDocs results = searcher.search(tq, 5);
> 			System.out.println("Number of search results: " + results.totalHits);
> 			
> 			Filter f = new QueryWrapperFilter(tq);
> 			
> 			DocIdSet dis = f.getDocIdSet(rdr);
> 			
> 			DocIdSetIterator it = dis.iterator();
> 			if (it != null) {
> 				int docId = it.nextDoc();
> 				while (docId != DocIdSetIterator.NO_MORE_DOCS) {
> 					Document doc = rdr.document(docId);
> 					System.out.println("Iterator doc: " + doc.get("id"));
> 					docId = it.nextDoc();
> 				}
> 			} else {
> 				System.out.println("Iterator was null: ");
> 			}
> 			
> 			searcher.close();
> 			rdr.close();
> 		} catch (IOException ioe) {
> 			ioe.printStackTrace();
> 		}
> 	}
> }
> {code}

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Mime
View raw message