lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Xavier Sanchez Loro <xav...@trovit.com>
Subject Help retrieving BinaryDocValues
Date Wed, 05 Feb 2014 20:00:54 GMT
Hi,
I have some problems working with BinaryDocValues. The code below works 
well with a few thousands of documents, but with more than 65000 
documents it does not return the correct BinaryDocValues after the docId 
(with docBase rebasing) reaches a certain id. From this point on, it 
cycles returning the BinaryDocValues of first docs. I'm working wiht 
lucene/solr 4.3.

I tested this code indexing 100000 documents, each with a 
"binary_ids_campaigns" value equal to docId. After docId 65500 aprox. it 
return BinaryDocValues corresponding to first doc ids. I have followed 
the API instructions on how to rebase the docId, but I guess I'm missing 
something. If someone could point me in the right direction, I would 
really appreciate it.

Best regards,
Xavier

public void computeVals(ResponseBuilder rb, SolrCore core, final 
CampaignObserver observer) {
     RefCounted<SolrIndexSearcher> searchHolder = null;
     try {
       searchHolder = core.getNewestSearcher(false);
       AtomicReader reader = searchHolder.get().getAtomicReader();
       SolrIndexSearcher searcher = searchHolder.get();
       idsCampaigns = reader.getBinaryDocValues("binary_ids_campaigns");
       final float[] topscore = new float[]{Float.NEGATIVE_INFINITY};
       CpcCollector delegate = new CpcCollector(reader, topscore, 
observer, maxCpc, idsCampaigns, maxDocCpc);
       DocSet filter = null;
       //Only filter in ppc, not for search, in search only apply sorting
       SolrIndexSearcher.ProcessedFilter pf = 
searcher.getProcessedFilter(filter, rb.getFilters());
       //Check for existing filters, apply them
       if (pf != null && pf.filter != null) {
         searcher.search(rb.getQuery(), pf.filter, delegate);
       } else {
         searcher.search(rb.getQuery(), delegate);
       }
       float[] collectedTopscore = delegate.getTopscore();
       maxOrganicScore = collectedTopscore[0];
       maxCpc = delegate.getMaxCpc();
       if (core.getName().indexOf("ppc") > -1) {
         filter = delegate.getDocSet();
         List<Query> filters = rb.getFilters();
         if (filters == null) {
           filters = new ArrayList<Query>();
         }
         filters.add(new FilteredQuery(rb.getQuery(), 
filter.getTopFilter()));
         rb.setFilters(filters);
       }
     } catch (Exception e) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
               "Error loading FieldCache.Ints for idcampaigns field", e);
     } finally {
       if (searchHolder != null) {
         searchHolder.decref();
       }
     }
   }

   ----------------- Collector code -----------------------

   public class CpcCollector extends Collector {
   private static Logger log = LoggerFactory.getLogger(CpcCollector.class);
   private SortedIntDocSet docSet = null;
   Scorer scorer;
   private final float[] topscore;
   private CampaignObserver observer;
   private float maxCpc;
   private com.carrotsearch.hppc.IntArrayList idDocs;
   private BinaryDocValues values;
   private com.carrotsearch.hppc.IntFloatOpenHashMap maxDocCpc;//Maximum 
cpc per document
   private int docBase = 0;

   /**
    *
    * @param reader
    * @param topscore
    * @param observer
    * @param ids
    * @param maxCpc
    * @param values
    */
   public CpcCollector(IndexReader reader, final float[] topscore, 
CampaignObserver observer, float maxCpc, BinaryDocValues values, 
com.carrotsearch.hppc.IntFloatOpenHashMap maxDocCpc) {

     this.topscore = topscore;
     this.observer = observer;
     this.maxCpc = maxCpc;
     idDocs = new com.carrotsearch.hppc.IntArrayList();
     this.maxDocCpc = maxDocCpc;
     this.values = values;
   }

   @Override
   public void setScorer(Scorer scorer) throws IOException {
     this.scorer = scorer;
   }

   @Override
   public void collect(int doc) throws IOException {
     float score = scorer.score();
     if (score > getTopscore()[0]) {
       topscore[0] = score;
     }
     BytesRef term = new BytesRef();
     values.get(doc + docBase, term);
     int size = (int) term.bytes[term.offset] * 4 + 1;
     byte[] docValues = new byte[size];
     ByteBuffer.wrap(term.bytes, term.offset, size).get(docValues, 0, size);
     int[] campIds = observer.parseBinaryIdsOldSkoolWayArray(docValues);
     if (campIds != null) {
       float cpc = observer.getMaxActiveCpc(campIds);
       getMaxDocCpc().put(doc + docBase, cpc);
       if (cpc > 0) {
         if (cpc > getMaxCpc()) {
           maxCpc = cpc;
         }
         //active campaign
         idDocs.add(doc + docBase);
       }
     }
   }

   @Override
   public boolean acceptsDocsOutOfOrder() {
     return true;//podria ser tru
   }

   @Override
   public void setNextReader(AtomicReaderContext context) throws 
IOException {
     this.docBase = context.docBase;
   }

   /**
    * @return the topscore
    */
   public float[] getTopscore() {
     return topscore;
   }

   /**
    * @return the maxCpc
    */
   public float getMaxCpc() {
     return maxCpc;
   }

   /**
    * @return the docSet
    */
   public SortedIntDocSet getDocSet() {
     docSet = new SortedIntDocSet(idDocs.toArray());
     return docSet;
   }

   /**
    * @return the maxDocCpc
    */
   public com.carrotsearch.hppc.IntFloatOpenHashMap getMaxDocCpc() {
     return maxDocCpc;
   }

}







---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message