incubator-blur-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amccu...@apache.org
Subject git commit: Fixing a bug in the primedoc cache that causes the bitset to be rebuilt if there are any deletes in the segment.
Date Tue, 13 Jan 2015 03:10:09 GMT
Repository: incubator-blur
Updated Branches:
  refs/heads/master e5c8a34cd -> 81129a0ce


Fixing a bug in the primedoc cache that causes the bitset to be rebuilt if there are any deletes
in the segment.


Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/81129a0c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/81129a0c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/81129a0c

Branch: refs/heads/master
Commit: 81129a0ce5979a22f8efeaa6abbbcaf627e39f76
Parents: e5c8a34
Author: Aaron McCurry <amccurry@gmail.com>
Authored: Mon Jan 12 22:09:56 2015 -0500
Committer: Aaron McCurry <amccurry@gmail.com>
Committed: Mon Jan 12 22:09:56 2015 -0500

----------------------------------------------------------------------
 .../org/apache/blur/utils/BlurUtilsTest.java    |  2 ++
 .../blur/lucene/search/PrimeDocCache.java       | 25 ++++++++++++++------
 .../BaseSpatialFieldTypeDefinitionTest.java     |  3 +++
 3 files changed, 23 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/81129a0c/blur-core/src/test/java/org/apache/blur/utils/BlurUtilsTest.java
----------------------------------------------------------------------
diff --git a/blur-core/src/test/java/org/apache/blur/utils/BlurUtilsTest.java b/blur-core/src/test/java/org/apache/blur/utils/BlurUtilsTest.java
index dce02e8..e3691ee 100644
--- a/blur-core/src/test/java/org/apache/blur/utils/BlurUtilsTest.java
+++ b/blur-core/src/test/java/org/apache/blur/utils/BlurUtilsTest.java
@@ -250,6 +250,7 @@ public class BlurUtilsTest {
     IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer());
     IndexWriter writer = new IndexWriter(directory, conf);
     Document doc = new Document();
+    doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
     doc.add(new StringField("a", "b", Store.YES));
     doc.add(new StringField("family", "f1", Store.YES));
 
@@ -267,6 +268,7 @@ public class BlurUtilsTest {
     IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer());
     IndexWriter writer = new IndexWriter(directory, conf);
     Document doc = new Document();
+    doc.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
     doc.add(new StringField("a", "b", Store.YES));
     doc.add(new StringField("family", "f2", Store.YES));
 

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/81129a0c/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
----------------------------------------------------------------------
diff --git a/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java b/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
index 995ed7e..4faa954 100644
--- a/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
+++ b/blur-query/src/main/java/org/apache/blur/lucene/search/PrimeDocCache.java
@@ -25,9 +25,12 @@ import org.apache.blur.log.Log;
 import org.apache.blur.log.LogFactory;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReader.ReaderClosedListener;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.OpenBitSet;
 
@@ -65,23 +68,31 @@ public class PrimeDocCache {
         LOG.debug("Prime Doc BitSet missing for segment [" + reader + "] current size ["
+ primeDocMap.size() + "]");
         final OpenBitSet bs = new OpenBitSet(reader.maxDoc());
 
-        DocsEnum termDocsEnum = reader.termDocsEnum(primeDocTerm);
-        if (termDocsEnum == null) {
-          return bs;
+        Fields fields = reader.fields();
+        if (fields == null) {
+          throw new IOException("Missing all fields.");
         }
+        Terms terms = fields.terms(primeDocTerm.field());
+        if (terms == null) {
+          throw new IOException("Missing prime doc field [" + primeDocTerm.field() + "].");
+        }
+        TermsEnum termsEnum = terms.iterator(null);
+        if (!termsEnum.seekExact(primeDocTerm.bytes(), true)) {
+          throw new IOException("Missing prime doc term [" + primeDocTerm + "].");
+        }
+
+        DocsEnum docsEnum = termsEnum.docs(null, null);
         int docFreq = reader.docFreq(primeDocTerm);
         int doc;
         int count = 0;
-        while ((doc = termDocsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
           bs.fastSet(doc);
           count++;
         }
         if (count == docFreq) {
           primeDocMap.put(key, bs);
         } else {
-          // @TODO deal with deletes correctly... docFreq does not reflect
-          // deletes
-          LOG.info("PrimeDoc for reader [{0}] not stored, because count [{1}] and freq [{2}]
do not match.", reader,
+          LOG.warn("PrimeDoc for reader [{0}] not stored, because count [{1}] and freq [{2}]
do not match.", reader,
               count, docFreq);
         }
         return bs;

http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/81129a0c/blur-query/src/test/java/org/apache/blur/analysis/type/spatial/BaseSpatialFieldTypeDefinitionTest.java
----------------------------------------------------------------------
diff --git a/blur-query/src/test/java/org/apache/blur/analysis/type/spatial/BaseSpatialFieldTypeDefinitionTest.java
b/blur-query/src/test/java/org/apache/blur/analysis/type/spatial/BaseSpatialFieldTypeDefinitionTest.java
index 4bc06f8..d5bcfdb 100644
--- a/blur-query/src/test/java/org/apache/blur/analysis/type/spatial/BaseSpatialFieldTypeDefinitionTest.java
+++ b/blur-query/src/test/java/org/apache/blur/analysis/type/spatial/BaseSpatialFieldTypeDefinitionTest.java
@@ -33,6 +33,8 @@ import org.apache.blur.utils.BlurConstants;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
@@ -66,6 +68,7 @@ public abstract class BaseSpatialFieldTypeDefinitionTest {
     IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, fieldManager.getAnalyzerForIndex());
 
     IndexWriter writer = new IndexWriter(_dir, conf);
+    fields.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
     writer.addDocument(fields);
     writer.close();
 


Mime
View raw message