lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jake Mannix <jake.man...@gmail.com>
Subject Re: Lucene 2.9.0 / BooleanQuery problem
Date Thu, 29 Oct 2009 03:57:22 GMT
Hi Michel,

  I don't have time to look in too much detail right now, but I'll bet ya $5
it's because
your query is for "sector:IT" - 'IT' lowercases to 'it' which is in the
default stopword
list, and if you're not careful about how you query with this, you'll end up
with TermQuery
instances which hit nothing, because this term may be stop-listed out of
your index!

  Can you run the test again with no stop words in your query, and see what
it
gives?

  -jake

On Wed, Oct 28, 2009 at 7:12 PM, Michel Nadeau <akaris@gmail.com> wrote:

> Hi !
>
> I spent all night trying to get a simple BooleanQuery working and I really
> can't figure out what is my problem. See this very simple program :
>
> public class test {
>
>    @SuppressWarnings("deprecation")
>    public static void main(String[] args) throws ParseException,
> CorruptIndexException, LockObtainFailedException, IOException
>    {
>        // Open index directory
>        File idx = new File("d:/Java/index/");
>
>        // Create IndexWriter
>        IndexWriter writer = new IndexWriter(idx, new
> StandardAnalyzer(Version.LUCENE_CURRENT), true,
> IndexWriter.MaxFieldLength.LIMITED);
>
>        //
>        // Add some documents to the index
>        //
>
>        Document doc = new Document();
>        doc.add(new Field("firstname", "Mike", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("phone", "111-222-3333", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("sector", "IT", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("group", "group", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("content", "blue this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
>        writer.addDocument(doc);
>
>        doc = new Document();
>        doc.add(new Field("firstname", "Pascale", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("lastname", "Lavoie", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("phone", "333-222-1111", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("sector", "Accounting", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("group", "othergroup", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("content", "red this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
>        writer.addDocument(doc);
>
>        doc = new Document();
>        doc.add(new Field("firstname", "Kaven", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("lastname", "Rouseau", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("phone", "222-333-1111", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("sector", "IT", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("group", "group", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("content", "red this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
>        writer.addDocument(doc);
>
>        doc = new Document();
>        doc.add(new Field("firstname", "Mike", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("lastname", "Peters", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("phone", "444-444-4444", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("sector", "IT", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("group", "othergroup", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("content", "this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
>        writer.addDocument(doc);
>
>        doc = new Document();
>        doc.add(new Field("firstname", "Marie-Pier", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("lastname", "Nadeau", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("phone", "123-123-1234", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("sector", "Accounting", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("group", "group", Field.Store.YES,
> Field.Index.ANALYZED));
>        doc.add(new Field("content", "this is some content",
> Field.Store.YES, Field.Index.ANALYZED));
>        writer.addDocument(doc);
>
>        // Optimize + close index
>        writer.optimize();
>        writer.close();
>
>        // Open IndexReader/Searcher
>        IndexReader reader = IndexReader.open(idx, true);
>        Searcher searcher = new IndexSearcher(reader);
>
>        //
>        // Test normal query
>        //
>        String m_field = "content";
>        String m_query = "sector:IT AND group:group";
>        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
>        QueryParser parser = new QueryParser(m_field, analyzer);
>        Query query = parser.parse(m_query);
>
>        // Search!
>        System.out.println("Normal query: " + m_query + "\n");
>        doStreamingSearch(searcher, query);
>
>        System.out.println("===========================================\n");
>
>        //
>        // Test BooleanQuery
>        //
>        BooleanQuery query2 = new BooleanQuery();
>        query2.add(new TermQuery(new Term("sector", "IT")), Occur.MUST);
>        query2.add(new TermQuery(new Term("group", "group")), Occur.MUST);
>
>        // Search!
>        System.out.println("BooleanQuery:\n");
>        doStreamingSearch(searcher, query2);
>        System.out.println("Done!");
>    }
>
>    public static void doStreamingSearch(final Searcher searcher, Query
> query) throws IOException
>    {
>        Collector streamingHitCollector = new Collector()
>        {
>            private Scorer scorer;
>            private int docBase;
>
>            public void collect(int doc) throws IOException
>            {
>                Document theDoc = searcher.doc(doc);
>
>                System.out.println("+ First Name   =
> "+theDoc.get("firstname"));
>                System.out.println("+ Last Name    =
> "+theDoc.get("lastname"));
>                System.out.println("+ Phone Number = "+theDoc.get("phone"));
>                System.out.println("+ Sector       =
> "+theDoc.get("sector"));
>                System.out.println("+ Group        = "+theDoc.get("group"));
>                System.out.println("+ Content      =
> "+theDoc.get("content"));
>                System.out.println("");
>            }
>
>            public boolean acceptsDocsOutOfOrder()
>            {
>                return true;
>            }
>
>            public void setNextReader(IndexReader reader, int docBase)
> throws IOException
>            {
>                this.docBase = docBase;
>            }
>
>            public void setScorer(Scorer scorer) throws IOException
>            {
>                this.scorer = scorer;
>            }
>        };
>        searcher.search(query, streamingHitCollector);
>    }
>
> }
>
> And here's the output of the program :
>
> Normal query: *sector:IT AND group:group*
>
> + First Name   = Mike
> + Last Name    = Nadeau
> + Phone Number = 111-222-3333
> + Sector       = IT
> + Group        = group
> + Content      = blue this is some content
>
> + First Name   = Kaven
> + Last Name    = Rouseau
> + Phone Number = 222-333-1111
> + Sector       = IT
> + Group        = group
> + Content      = red this is some content
>
> + First Name   = Marie-Pier
> + Last Name    = Nadeau
> + Phone Number = 123-123-1234
> *+ Sector       = Accounting*
> + Group        = group
> + Content      = this is some content
>
> ===========================================
>
> BooleanQuery:
>
> Done!
>
> So I have 2 important problems:
>
> (1) the normal query (sector:IT AND group:group) - how can it return
> "Marie-Pier Nadeau", which has "Accounting" as group?
>
> (2) the BooleanQuery - why isn't it returning anything?
>
> Thanks for your help!
>
> - Mike
> akaris@gmail.com
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message