lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Sean O'Connor <sean...@gmail.com>
Subject Using LUCENE-2878 in Solr, e.g. PositionQueryComponent
Date Tue, 24 Jul 2012 19:00:08 GMT
Hi,
I've been trying to use the new/experimental positions code from 
LUCENE_2878 in solr. I've extended oashc.QueryComponent as a chance to 
learn the code a bit. I've been able to access (scorer) frequencies, but 
not interval positions. I'm not sure of the best method for sharing my 
test and component, so feel free to point me in a better direction. For 
now I'll include the component and test case below.

NOTES:

  * I picked the schema field 'test_posofftv' in an attempt to use a
    field stored with vectors, positions, offsets...
  * I'm not clear of the difference between
      o searcher.createNormalizedWeight(query) and
      o query.createWeight()
      o query.createWeight() seems to need a rewritten query, so perhaps
        searcher... is the right approach?
  * It looks like a simple queries (Boolean, Term,...) createWeight
    resolve to MatchOnlyTermScorers, which throw an
    UnsupportedOperationException.
      o Most likely I am missing something obvious in setup/implementation.
      o The phrase query gets me a functioning IntervalIterator, but
        null Intervals
  * I am naively setting parameters in the weight.scorer(), particularly
    scoreDocsInOrder and topScorer
  * I'm a bit lost on Scorer.advance(n), IntervalIterator.next(), and
    IntervalIterator.scorerAdvanced(n)



// ----------------- Start Component code ---------------------------
package org.apache.solr.handler.component;

import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.positions.Interval;
import org.apache.lucene.search.positions.IntervalIterator;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.List;


public class PositionFooComponent extends QueryComponent {
     public static Logger log = 
LoggerFactory.getLogger(PositionFooComponent.class);

     /**
      * Just testing -- let parent do the actual work, just want to see 
if I can access positions here
      * I not to be able to getLiveDocs() -- I'm doing something fubar 
but not sure what....
      */
     @Override
     public void process(ResponseBuilder rb) throws IOException {
         super.process(rb);      // useless for this setup?
         log.info("PositionFooComponent process: " + rb);       // 
sanity check that we are actually being called

         SolrQueryRequest req = rb.req;
         SolrIndexSearcher searcher = req.getSearcher();

         //I'm not clear on the best practice/rules with contexts & 
leaves, could be the source of my problems?
         IndexReaderContext topReaderContext = 
searcher.getTopReaderContext();
         List<AtomicReaderContext> leaves = topReaderContext.leaves();

         Query userQuery = rb.getQuery();        // anything special 
about query type and positions?
         log.debug("RB.queries: {}", userQuery);

         Query rewrite = searcher.rewrite(userQuery);        // saw a 
position test call rewrite, is this necesary?

         for (AtomicReaderContext arc : leaves) {
             log.info("\nContext docbase:{} -- ord:{}", arc.docBase, 
arc.ord);
             AtomicReader r = arc.reader();

             Weight weight = 
searcher.createNormalizedWeight(userQuery);     // not sure of 
difference between searcher.createNormalizedWeight and query.createWeight
             Scorer scorer = weight.scorer(arc, true, true, true, true, 
true, r.getLiveDocs());
             int zero = scorer.advance(0);
             IntervalIterator positions = scorer.positions();
             Interval interval = positions.next();
             log.info("position begin (will error if interval==null): 
{}", interval.begin);      // interval is coming back null, I'm doing 
something wrong...

             Weight fooWeight = rewrite.createWeight(searcher);
             Scorer fooScorer = fooWeight.scorer(arc, true, true, true, 
true, true, r.getLiveDocs());
             int fooZero = fooScorer.advance(0);
             IntervalIterator fooPositions = scorer.positions();
//            int fooZero2 = fooPositions.scorerAdvanced(fooZero);
             Interval fooInterval = fooPositions.next();
             log.info("position begin (will error if interval==null): 
{}", fooInterval.begin);      // interval is coming back null, I'm doing 
something wrong...

         }
     }
}


// ---------------------- Start test case ----------------------------
package org.apache.solr.handler.component;

import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;


public class PositionFooComponentTest extends SolrTestCaseJ4 {
     public static final String FIELD = "test_posofftv";
   //    public static final String FIELD = "subject";

       @BeforeClass
       public static void beforeClass() throws Exception {
           initCore("solrconfig-foo.xml", "schema.xml");

           assertU(adoc("id", "1", FIELD, "one two three four"));
           assertU(adoc("id", "2", FIELD, "two three four"));
           assertU(adoc("id", "3", FIELD, "three four"));
           assertU(adoc("id", "4", FIELD, "four"));
           assertU(commit());
       }


       public void testTwoThree() {
           SolrQueryRequest sqr = req(CommonParams.DF, FIELD, 
CommonParams.Q, "(\"two three\"~3)", CommonParams.FL, "id, " + FIELD + 
", score", CommonParams.QT, "foo");
           assertQ("Error in testing?", sqr, "//*[@numFound='2']");      //
       }
}


// ---------------- Start solrconfig-foo.xml --------------------------
<?xml version="1.0" encoding="UTF-8" ?>
<config>
   <luceneMatchVersion>LUCENE_50</luceneMatchVersion>
   <dataDir>${solr.data.dir:}</dataDir>

   <!--<directoryFactory name="DirectoryFactory" 
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>-->

   <searchComponent name="fooComponent" 
class="org.apache.solr.handler.component.PositionFooComponent">
     <lst name="defaults">
       <str name="fl">test_posofftv</str>
     </lst>
   </searchComponent>

   <requestHandler name="foo" 
class="org.apache.solr.handler.component.SearchHandler">
       <lst name="defaults">
           <!--<str name="defType">sdr</str>-->
           <str name="fl">id,test_posofftv</str>
       </lst>
       <arr name="components">
           <str>fooComponent</str>
       </arr>
   </requestHandler>


</config>


The assertQ should be ignored, it is just a quick way to get the query 
to run (I am overdue to digging into the test framework and learning to 
use it properly). At the moment, the Intervals in the component come 
back null. I assume this is because I am misusing the new code. If 
anyone could straighten me out on how to 'properly' use the positions 
code in Solr I would be most grateful.

Thanks,

Sean


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Mime
View raw message