lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rm...@apache.org
Subject svn commit: r1640472 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/index/ lucene/core/src/test/org/apache/lucene/index/
Date Wed, 19 Nov 2014 03:18:58 GMT
Author: rmuir
Date: Wed Nov 19 03:18:57 2014
New Revision: 1640472

URL: http://svn.apache.org/r1640472
Log:
LUCENE-6062: pass correct fieldinfos to dv producer when the segment has updates

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValues.java
Wed Nov 19 03:18:57 2014
@@ -37,8 +37,7 @@ final class SegmentDocValues {
 
   private final Map<Long,RefCount<DocValuesProducer>> genDVProducers = new HashMap<>();
 
-  private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, IOContext
context, Directory dir,
-      DocValuesFormat dvFormat, final Long gen, FieldInfos infos) throws IOException {
+  private RefCount<DocValuesProducer> newDocValuesProducer(SegmentCommitInfo si, Directory
dir, final Long gen, FieldInfos infos) throws IOException {
     Directory dvDir = dir;
     String segmentSuffix = "";
     if (gen.longValue() != -1) {
@@ -47,7 +46,8 @@ final class SegmentDocValues {
     }
 
     // set SegmentReadState to list only the fields that are relevant to that gen
-    SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, context, segmentSuffix);
+    SegmentReadState srs = new SegmentReadState(dvDir, si.info, infos, IOContext.READ, segmentSuffix);
+    DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
     return new RefCount<DocValuesProducer>(dvFormat.fieldsProducer(srs)) {
       @SuppressWarnings("synthetic-access")
       @Override
@@ -61,11 +61,10 @@ final class SegmentDocValues {
   }
 
   /** Returns the {@link DocValuesProducer} for the given generation. */
-  synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, IOContext
context, Directory dir, 
-      DocValuesFormat dvFormat, FieldInfos infos) throws IOException {
+  synchronized DocValuesProducer getDocValuesProducer(long gen, SegmentCommitInfo si, Directory
dir, FieldInfos infos) throws IOException {
     RefCount<DocValuesProducer> dvp = genDVProducers.get(gen);
     if (dvp == null) {
-      dvp = newDocValuesProducer(si, context, dir, dvFormat, gen, infos);
+      dvp = newDocValuesProducer(si, dir, gen, infos);
       assert dvp != null;
       genDVProducers.put(gen, dvp);
     } else {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
Wed Nov 19 03:18:57 2014
@@ -26,10 +26,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.Bits;
@@ -49,28 +47,37 @@ class SegmentDocValuesProducer extends D
   final Set<DocValuesProducer> dvProducers = Collections.newSetFromMap(new IdentityHashMap<DocValuesProducer,Boolean>());
   final List<Long> dvGens = new ArrayList<>();
   
-  SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos fieldInfos, SegmentDocValues
segDocValues, DocValuesFormat dvFormat) throws IOException {
+  /**
+   * Creates a new producer that handles updated docvalues fields
+   * @param si commit point
+   * @param dir directory
+   * @param coreInfos fieldinfos for the segment
+   * @param allInfos all fieldinfos including updated ones
+   * @param segDocValues producer map
+   */
+  SegmentDocValuesProducer(SegmentCommitInfo si, Directory dir, FieldInfos coreInfos, FieldInfos
allInfos, SegmentDocValues segDocValues) throws IOException {
     boolean success = false;
     try {
       Version ver = si.info.getVersion();
       if (ver != null && ver.onOrAfter(Version.LUCENE_4_9_0)) {
         DocValuesProducer baseProducer = null;
-        for (FieldInfo fi : fieldInfos) {
+        for (FieldInfo fi : allInfos) {
           if (fi.getDocValuesType() == DocValuesType.NONE) {
             continue;
           }
           long docValuesGen = fi.getDocValuesGen();
           if (docValuesGen == -1) {
             if (baseProducer == null) {
-              // the base producer gets all the fields, so the Codec can validate properly
-              baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ,
dir, dvFormat, fieldInfos);
+              // the base producer gets the original fieldinfos it wrote
+              baseProducer = segDocValues.getDocValuesProducer(docValuesGen, si, dir, coreInfos);
               dvGens.add(docValuesGen);
               dvProducers.add(baseProducer);
             }
             dvProducersByField.put(fi.name, baseProducer);
           } else {
             assert !dvGens.contains(docValuesGen);
-            final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen,
si, IOContext.READ, dir, dvFormat, new FieldInfos(new FieldInfo[] { fi }));
+            // otherwise, producer sees only the one fieldinfo it wrote
+            final DocValuesProducer dvp = segDocValues.getDocValuesProducer(docValuesGen,
si, dir, new FieldInfos(new FieldInfo[] { fi }));
             dvGens.add(docValuesGen);
             dvProducers.add(dvp);
             dvProducersByField.put(fi.name, dvp);
@@ -81,7 +88,7 @@ class SegmentDocValuesProducer extends D
         // FieldInfos could belong to the same dvGen. Therefore need to make sure
         // we initialize each DocValuesProducer once per gen.
         Map<Long,List<FieldInfo>> genInfos = new HashMap<>();
-        for (FieldInfo fi : fieldInfos) {
+        for (FieldInfo fi : allInfos) {
           if (fi.getDocValuesType() == DocValuesType.NONE) {
             continue;
           }
@@ -100,9 +107,9 @@ class SegmentDocValuesProducer extends D
           if (docValuesGen == -1) {
             // we need to send all FieldInfos to gen=-1, but later we need to
             // record the DVP only for the "true" gen=-1 fields (not updated)
-            dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir,
dvFormat, fieldInfos);
+            dvp = segDocValues.getDocValuesProducer(docValuesGen, si, dir, coreInfos);
           } else {
-            dvp = segDocValues.getDocValuesProducer(docValuesGen, si, IOContext.READ, dir,
dvFormat, new FieldInfos(infos.toArray(new FieldInfo[infos.size()])));
+            dvp = segDocValues.getDocValuesProducer(docValuesGen, si, dir, new FieldInfos(infos.toArray(new
FieldInfo[infos.size()])));
           }
           dvGens.add(docValuesGen);
           dvProducers.add(dvp);

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
Wed Nov 19 03:18:57 2014
@@ -25,7 +25,6 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.FieldsProducer;
@@ -159,15 +158,14 @@ public final class SegmentReader extends
    */
   private DocValuesProducer initDocValuesProducer() throws IOException {
     final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
-    final DocValuesFormat dvFormat = si.info.getCodec().docValuesFormat();
 
     if (!fieldInfos.hasDocValues()) {
       return null;
     } else if (si.hasFieldUpdates()) {
-      return new SegmentDocValuesProducer(si, dir, fieldInfos, segDocValues, dvFormat);
+      return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues);
     } else {
       // simple case, no DocValues updates
-      return segDocValues.getDocValuesProducer(-1L, si, IOContext.READ, dir, dvFormat, fieldInfos);
+      return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos);
     }
   }
   

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java?rev=1640472&r1=1640471&r2=1640472&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
(original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java
Wed Nov 19 03:18:57 2014
@@ -19,6 +19,13 @@ import org.apache.lucene.document.Numeri
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.store.NRTCachingDirectory;
@@ -782,6 +789,92 @@ public class TestNumericDocValuesUpdates
   }
   
   @Test
+  public void testUpdateSegmentWithNoDocValues2() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    // prevent merges, otherwise by the time updates are applied
+    // (writer.close()), the segments might have merged and that update becomes
+    // legit.
+    conf.setMergePolicy(NoMergePolicy.INSTANCE);
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    // first segment with NDV
+    Document doc = new Document();
+    doc.add(new StringField("id", "doc0", Store.NO));
+    doc.add(new NumericDocValuesField("ndv", 3));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("id", "doc4", Store.NO)); // document without 'ndv' field
+    writer.addDocument(doc);
+    writer.commit();
+    
+    // second segment with no NDV, but another dv field "foo"
+    doc = new Document();
+    doc.add(new StringField("id", "doc1", Store.NO));
+    doc.add(new NumericDocValuesField("foo", 3));
+    writer.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("id", "doc2", Store.NO)); // document that isn't updated
+    writer.addDocument(doc);
+    writer.commit();
+    
+    // update document in the first segment - should not affect docsWithField of
+    // the document without NDV field
+    writer.updateNumericDocValue(new Term("id", "doc0"), "ndv", 5L);
+    
+    // update document in the second segment - field should be added and we should
+    // be able to handle the other document correctly (e.g. no NPE)
+    writer.updateNumericDocValue(new Term("id", "doc1"), "ndv", 5L);
+    writer.close();
+
+    DirectoryReader reader = DirectoryReader.open(dir);
+    for (LeafReaderContext context : reader.leaves()) {
+      LeafReader r = context.reader();
+      NumericDocValues ndv = r.getNumericDocValues("ndv");
+      Bits docsWithField = r.getDocsWithField("ndv");
+      assertNotNull(docsWithField);
+      assertTrue(docsWithField.get(0));
+      assertEquals(5L, ndv.get(0));
+      assertFalse(docsWithField.get(1));
+      assertEquals(0L, ndv.get(1));
+    }
+    reader.close();
+    
+    TestUtil.checkIndex(dir);
+    
+    conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    writer = new IndexWriter(dir, conf);
+    writer.forceMerge(1);
+    writer.close();
+    
+    reader = DirectoryReader.open(dir);
+    LeafReader ar = getOnlySegmentReader(reader);
+    assertEquals(DocValuesType.NUMERIC, ar.getFieldInfos().fieldInfo("foo").getDocValuesType());
+    IndexSearcher searcher = new IndexSearcher(reader);
+    TopFieldDocs td;
+    // doc0
+    td = searcher.search(new TermQuery(new Term("id", "doc0")), 1, 
+                         new Sort(new SortField("ndv", SortField.Type.LONG)));
+    assertEquals(5L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    // doc1
+    td = searcher.search(new TermQuery(new Term("id", "doc1")), 1, 
+                         new Sort(new SortField("ndv", SortField.Type.LONG), new SortField("foo",
SortField.Type.LONG)));
+    assertEquals(5L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    assertEquals(3L, ((FieldDoc)td.scoreDocs[0]).fields[1]);
+    // doc2
+    td = searcher.search(new TermQuery(new Term("id", "doc2")), 1, 
+        new Sort(new SortField("ndv", SortField.Type.LONG)));
+    assertEquals(0L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    // doc4
+    td = searcher.search(new TermQuery(new Term("id", "doc4")), 1, 
+        new Sort(new SortField("ndv", SortField.Type.LONG)));
+    assertEquals(0L, ((FieldDoc)td.scoreDocs[0]).fields[0]);
+    reader.close();
+    
+    dir.close();
+  }
+  
+  @Test
   public void testUpdateSegmentWithPostingButNoDocValues() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));



Mime
View raw message