lucene-general mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Rahman Mukras" <rahman.muk...@googlemail.com>
Subject Re: Adding a PhraseQuery to a PhraseQuery
Date Wed, 23 May 2007 11:26:45 GMT
>> > Hi all, I would like to add a PhraseQuery to a PhraseQuery so that
> >> > I can be
> >> > able to allow slop between phrases and terms. Something like:
> >> >
> >> > ----
> >> > PhraseQuery mainPQ = new PhraseQuery();
> >> > PhraseQuery subPQ   = new PhraseQuery();
> >> >
> >> > subPQ.add(new Term("contents","great"));
> >> > subPQ.add(new Term("contents","actor"));
> >> >
> >> > mainPQ.add(subPQ);
> >> > mainPQ.add(new Term("contents","Jean"));
> >> > mainPQ.setSlop(20);
> >>
> >> You'll need to use SpanNearQuery instead.
> >>
> >>         Erik
> >>
> >
> > Thanks for the tip Erik, but I cant figure out how to write the
> > query. Could
> > you possibly give me an example for the above problem.
>
> Sorry for not providing details earlier.  The below code is from the
> "Lucene in Action" code you can get from lucenebook.com.  It's a
> JUnit test, developed for Lucene 1.4.x - so there may be something
> that needs upgrading in it.
>
> Let me know if you have any other questions.
>
>         Erik
>
> package lia.advsearching;
>
> import junit.framework.TestCase;
> import lia.analysis.AnalyzerUtils;
> import org.apache.lucene.analysis.Analyzer;
> import org.apache.lucene.analysis.Token;
> import org.apache.lucene.analysis.WhitespaceAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.search.Hits;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.PhraseQuery;
> import org.apache.lucene.search.Query;
> import org.apache.lucene.search.spans.SpanFirstQuery;
> import org.apache.lucene.search.spans.SpanNearQuery;
> import org.apache.lucene.search.spans.SpanNotQuery;
> import org.apache.lucene.search.spans.SpanOrQuery;
> import org.apache.lucene.search.spans.SpanQuery;
> import org.apache.lucene.search.spans.SpanTermQuery;
> import org.apache.lucene.search.spans.Spans;
> import org.apache.lucene.store.RAMDirectory;
>
> import java.io.IOException;
>
> public class SpanQueryTest extends TestCase {
>    private RAMDirectory directory;
>    private IndexSearcher searcher;
>    private IndexReader reader;
>
>    private SpanTermQuery quick;
>    private SpanTermQuery brown;
>    private SpanTermQuery red;
>    private SpanTermQuery fox;
>    private SpanTermQuery lazy;
>    private SpanTermQuery sleepy;
>    private SpanTermQuery dog;
>    private SpanTermQuery cat;
>    private Analyzer analyzer;
>
>    protected void setUp() throws Exception {
>      directory = new RAMDirectory();
>
>      analyzer = new WhitespaceAnalyzer();
>      IndexWriter writer = new IndexWriter(directory,
>          analyzer, true);
>
>      Document doc = new Document();
>      doc.add(Field.Text("f",
>          "the quick brown fox jumps over the lazy dog"));
>      writer.addDocument(doc);
>
>      doc = new Document();
>      doc.add(Field.Text("f",
>          "the quick red fox jumps over the sleepy cat"));
>      writer.addDocument(doc);
>
>      writer.close();
>
>      searcher = new IndexSearcher(directory);
>      reader = IndexReader.open(directory);
>
>      quick = new SpanTermQuery(new Term("f", "quick"));
>      brown = new SpanTermQuery(new Term("f", "brown"));
>      red = new SpanTermQuery(new Term("f", "red"));
>      fox = new SpanTermQuery(new Term("f", "fox"));
>      lazy = new SpanTermQuery(new Term("f", "lazy"));
>      sleepy = new SpanTermQuery(new Term("f", "sleepy"));
>      dog = new SpanTermQuery(new Term("f", "dog"));
>      cat = new SpanTermQuery(new Term("f", "cat"));
>    }
>
>    private void assertOnlyBrownFox(Query query)throws Exception {
>      Hits hits = searcher.search(query);
>      assertEquals(1, hits.length());
>      assertEquals("wrong doc", 0, hits.id(0));
>    }
>
>    private void assertBothFoxes(Query query) throws Exception {
>      Hits hits = searcher.search(query);
>      assertEquals(2, hits.length());
>    }
>
>    private void assertNoMatches(Query query) throws Exception {
>      Hits hits = searcher.search(query);
>      assertEquals(0, hits.length());
>    }
>
>    public void testSpanTermQuery() throws Exception {
>      assertOnlyBrownFox(brown);
>      dumpSpans(brown);
>    }
>
>    public void testSpanFirstQuery() throws Exception {
>      SpanFirstQuery sfq = new SpanFirstQuery(brown, 2);
>      assertNoMatches(sfq);
>
>      dumpSpans(sfq);
>
>      sfq = new SpanFirstQuery(brown, 3);
>      dumpSpans(sfq);
>      assertOnlyBrownFox(sfq);
>    }
>
>    public void testSpanNearQuery() throws Exception {
>      SpanQuery[] quick_brown_dog =
>          new SpanQuery[]{quick, brown, dog};
>      SpanNearQuery snq =
>          new SpanNearQuery(quick_brown_dog, 0, true);
>      assertNoMatches(snq);
>      dumpSpans(snq);
>
>      snq = new SpanNearQuery(quick_brown_dog, 4, true);
>      assertNoMatches(snq);
>      dumpSpans(snq);
>
>      snq = new SpanNearQuery(quick_brown_dog, 5, true);
>      assertOnlyBrownFox(snq);
>      dumpSpans(snq);
>
>      // interesting - even a sloppy phrase query would require
>      // more slop to match
>      snq = new SpanNearQuery(new SpanQuery[]{lazy, fox}, 3, false);
>      assertOnlyBrownFox(snq);
>      dumpSpans(snq);
>
>      PhraseQuery pq = new PhraseQuery();
>      pq.add(new Term("f", "lazy"));
>      pq.add(new Term("f", "fox"));
>      pq.setSlop(4);
>      assertNoMatches(pq);
>
>      pq.setSlop(5);
>      assertOnlyBrownFox(pq);
>    }
>
>    public void testSpanNotQuery() throws Exception {
>      SpanNearQuery quick_fox =
>          new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true);
>      assertBothFoxes(quick_fox);
>      dumpSpans(quick_fox);
>
>      SpanNotQuery quick_fox_dog = new SpanNotQuery(quick_fox, dog);
>      assertBothFoxes(quick_fox_dog);
>      dumpSpans(quick_fox_dog);
>
>      SpanNotQuery no_quick_red_fox =
>          new SpanNotQuery(quick_fox, red);
>      assertOnlyBrownFox(no_quick_red_fox);
>      dumpSpans(no_quick_red_fox);
>    }
>
>    public void testSpanOrQuery() throws Exception {
>      SpanNearQuery quick_fox =
>          new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true);
>
>      SpanNearQuery lazy_dog =
>          new SpanNearQuery(new SpanQuery[]{lazy, dog}, 0, true);
>
>      SpanNearQuery sleepy_cat =
>          new SpanNearQuery(new SpanQuery[]{sleepy, cat}, 0, true);
>
>      SpanNearQuery qf_near_ld =
>          new SpanNearQuery(
>              new SpanQuery[]{quick_fox, lazy_dog}, 3, true);
>      assertOnlyBrownFox(qf_near_ld);
>      dumpSpans(qf_near_ld);
>
>      SpanNearQuery qf_near_sc =
>          new SpanNearQuery(
>              new SpanQuery[]{quick_fox, sleepy_cat}, 3, true);
>      dumpSpans(qf_near_sc);
>
>      SpanOrQuery or = new SpanOrQuery(
>          new SpanQuery[]{qf_near_ld, qf_near_sc});
>      assertBothFoxes(or);
>      dumpSpans(or);
>    }
>
>    public void testPlay() throws Exception {
>      SpanOrQuery or = new SpanOrQuery(new SpanQuery[]{quick, fox});
>      dumpSpans(or);
>
>      SpanNearQuery quick_fox =
>          new SpanNearQuery(new SpanQuery[]{quick, fox}, 1, true);
>      SpanFirstQuery sfq = new SpanFirstQuery(quick_fox, 4);
>      dumpSpans(sfq);
>
>      dumpSpans(new SpanTermQuery(new Term("f", "the")));
>
>      SpanNearQuery quick_brown =
>          new SpanNearQuery(new SpanQuery[]{quick, brown}, 0, false);
>      dumpSpans(quick_brown);
>
>    }
>
>    private void dumpSpans(SpanQuery query) throws IOException {
>      Spans spans = query.getSpans(reader);
>      System.out.println(query + ":");
>      int numSpans = 0;
>
>      Hits hits = searcher.search(query);
>      float[] scores = new float[2];
>      for (int i = 0; i < hits.length(); i++) {
>        scores[hits.id(i)] = hits.score(i);
>      }
>
>      while (spans.next()) {
>        numSpans++;
>
>        int id = spans.doc();
>        Document doc = reader.document(id);
>
>        // for simplicity - assume tokens are in sequential,
>        // positions, starting from 0
>        Token[] tokens = AnalyzerUtils.tokensFromAnalysis(
>            analyzer, doc.get("f"));
>        StringBuffer buffer = new StringBuffer();
>        buffer.append("   ");
>        for (int i = 0; i < tokens.length; i++) {
>          if (i == spans.start()) {
>            buffer.append("<");
>          }
>          buffer.append(tokens[i].termText());
>          if (i + 1 == spans.end()) {
>            buffer.append(">");
>          }
>          buffer.append(" ");
>        }
>        buffer.append("(" + scores[id] + ") ");
>        System.out.println(buffer);
> //      System.out.println(searcher.explain(query, id));
>      }
>
>      if (numSpans == 0) {
>        System.out.println("   No spans");
>      }
>      System.out.println();
>    }
> }
>
>

Thanks for the resource Erik

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message