lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jonathan Mamou <MA...@il.ibm.com>
Subject Re: [jira] Commented: (LUCENE-1001) Add Payload retrieval to Spans
Date Mon, 17 Nov 2008 15:59:30 GMT



Hi,

Here is the relevant code. I would expect to obtain
10
pos: 10
pos: 11

while I obtain
10
pos: 0
pos: 11




import java.io.StringReader;
import java.util.Collection;
import java.util.Iterator;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.spans.PayloadSpans;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;


public class Test {

      public static void main (String args[]) throws Exception{
            IndexWriter writer = new IndexWriter(args[0], new
TestPayloadAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
            Document doc = new Document();
            doc.add();new Field("content", new StringReader("a b c d e f g
h i j a k")));
            writer.addDocument(doc);
            writer.close();

            IndexSearcher is = new IndexSearcher(args[0]););
            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"
));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"
));
            SpanQuery[] sqs = {stq1,stq2};
            SpanNearQuery snq = new SpanNearQuery(sqs,1,true);
            PayloadSpans spans = snq.getPayloadSpans(is.getIndexReader());

            TopDocs topDocs = is.search(snq,1);

            for (int i = 0; i < topDocs.scoreDocs.length; i++) {
                  while) (spans.next()) {
                        System.out.println(spans.start());
                        Collection<byte[]> payloads = spans.getPayload();
                        for (Iterator<byte[]> it = payloads.iterator();
it.hasNext();) {
                              System.out.println(new String(it.next()));
                        }
                  }}
            }
      }
}}

-------------------------------------------------------------------------------------------------------------------------------------
import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;


public class TestPayloadAnalyzer extends Analyzer {

    public TokenStream tokenStream(String fieldName, Reader reader) {
      TokenStream result = new LowerCaseTokenizer(reader);
      result = new PayloadFilter(result, fieldName);
      return result;
    }
  }

  class PayloadFilter extends TokenFilter {
    String fieldName;
    int pos;

    public PayloadFilter(TokenStream input, String fieldName) {
      super(input);
      this.fieldName = fieldName;
      pos = 0;

    }

    public Token next() throws IOException {
      Token result = input.next();
      if (result != null) {
        String token = new String(result.termBuffer(), 0, result.termLength
());
        result.setPayload(),new Payload(("pos: " + pos).getBytes()));
        pos += result.getPositionIncrement();
      }
      return} result;
    }
  }


Jonathan


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org


Mime
View raw message