incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject [2/2] git commit: Fixing a bug where if the first query for a given segment is exited early then the primedoc bitset is truncated and never rebuilt. The result of this is the given segment always has truncated searches as well as strange results until t
Date Thu, 17 Apr 2014 02:52:43 GMT
Fixing a bug where if the first query for a given segment is exited early then the primedoc
bitset is truncated and never rebuilt.  The result of this is the given segment always has
truncated searches as well as strange results until the table is disabled and reenabled or
the cluster is restarted.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/c716c847
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/c716c847
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/c716c847

Branch: refs/heads/apache-blur-0.2
Commit: c716c847987fa416b64cc589760e7227dbb902a5
Parents: 6b89354
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Wed Apr 16 22:43:11 2014 -0400
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Wed Apr 16 22:43:11 2014 -0400

----------------------------------------------------------------------
 .../blur/lucene/search/PrimeDocCache.java       | 70 +++++++++-----------
 1 file changed, 33 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/c716c847/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java b/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
index 133522f..0c31359 100644
--- a/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
+++ b/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
@@ -22,14 +22,12 @@ import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.blur.log.Log;
 import org.apache.blur.log.LogFactory;
-import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.ReaderClosedListener;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.OpenBitSet;
 
 public class PrimeDocCache {
@@ -38,52 +36,50 @@ public class PrimeDocCache {
 
   public static final OpenBitSet EMPTY_BIT_SET = new OpenBitSet();
 
-  private static Map<Term,Map<Object, OpenBitSet>> termPrimeDocMap = new ConcurrentHashMap<Term,
Map<Object,OpenBitSet>>();
+  private static Map<Term, Map<Object, OpenBitSet>> termPrimeDocMap = new ConcurrentHashMap<Term,
Map<Object, OpenBitSet>>();
 
   /**
    * The way this method is called via warm up methods the likelihood of
    * creating multiple bitsets during a race condition is very low, that's why
    * this method is not synced.
    */
-  public static OpenBitSet getPrimeDocBitSet(Term primeDocTerm, IndexReader reader) throws
IOException {
+  public static OpenBitSet getPrimeDocBitSet(Term primeDocTerm, AtomicReader reader) throws
IOException {
     Object key = reader.getCoreCacheKey();
     final Map<Object, OpenBitSet> primeDocMap = getPrimeDocMap(primeDocTerm);
     OpenBitSet bitSet = primeDocMap.get(key);
     if (bitSet == null) {
-      reader.addReaderClosedListener(new ReaderClosedListener() {
-        @Override
-        public void onClose(IndexReader reader) {
-          Object key = reader.getCoreCacheKey();
-          LOG.debug("Current size [" + primeDocMap.size() + "] Prime Doc BitSet removing
for segment [" + reader + "]");
-          primeDocMap.remove(key);
-        }
-      });
-      LOG.debug("Prime Doc BitSet missing for segment [" + reader + "] current size [" +
primeDocMap.size() + "]");
-      final OpenBitSet bs = new OpenBitSet(reader.maxDoc());
-      primeDocMap.put(key, bs);
-      IndexSearcher searcher = new IndexSearcher(reader);
-      searcher.search(new TermQuery(primeDocTerm), new Collector() {
-
-        @Override
-        public void setScorer(Scorer scorer) throws IOException {
-
-        }
+      synchronized (reader) {
+        reader.addReaderClosedListener(new ReaderClosedListener() {
+          @Override
+          public void onClose(IndexReader reader) {
+            Object key = reader.getCoreCacheKey();
+            LOG.debug("Current size [" + primeDocMap.size() + "] Prime Doc BitSet removing
for segment [" + reader
+                + "]");
+            primeDocMap.remove(key);
+          }
+        });
+        LOG.debug("Prime Doc BitSet missing for segment [" + reader + "] current size ["
+ primeDocMap.size() + "]");
+        final OpenBitSet bs = new OpenBitSet(reader.maxDoc());
 
-        @Override
-        public void setNextReader(AtomicReaderContext atomicReaderContext) throws IOException
{
+        DocsEnum termDocsEnum = reader.termDocsEnum(primeDocTerm);
+        if (termDocsEnum == null) {
+          return bs;
         }
-
-        @Override
-        public void collect(int doc) throws IOException {
-          bs.set(doc);
+        int docFreq = reader.docFreq(primeDocTerm);
+        int doc;
+        int count = 0;
+        while ((doc = termDocsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+          bs.fastSet(doc);
+          count++;
         }
-
-        @Override
-        public boolean acceptsDocsOutOfOrder() {
-          return false;
+        if (count == docFreq) {
+          primeDocMap.put(key, bs);
+        } else {
+          LOG.info("PrimeDoc for reader [{0}] not stored, because count [{1}] and freq [{2}]
do not match.", reader,
+              count, docFreq);
         }
-      });
-      return bs;
+        return bs;
+      }
     }
     return bitSet;
   }


Mime
View raw message