lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject [2/4] lucene-solr:jira/solr-12259: More fixes, still broken.
Date Mon, 17 Dec 2018 23:28:29 GMT
More fixes, still broken.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d5a7b307
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d5a7b307
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d5a7b307

Branch: refs/heads/jira/solr-12259
Commit: d5a7b3071fdbd2db493c77aef15f5882aea1a270
Parents: 37bbad3
Author: Andrzej Bialecki <ab@apache.org>
Authored: Wed Dec 12 20:29:18 2018 +0100
Committer: Andrzej Bialecki <ab@apache.org>
Committed: Wed Dec 12 20:29:18 2018 +0100

----------------------------------------------------------------------
 .../org/apache/lucene/index/SegmentReader.java  |   2 +-
 .../index/AddDocValuesMergePolicyFactory.java   |  47 +++--
 .../solr/uninverting/UninvertingReader.java     | 207 ++++++++++++++-----
 .../solr/cloud/BasicDistributedZkTest.java      |   2 +-
 .../index/WrapperMergePolicyFactoryTest.java    |   2 +-
 .../cloud/AbstractFullDistribZkTestBase.java    |   2 +-
 6 files changed, 191 insertions(+), 71 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5a7b307/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
index b368b96..535e21e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java
@@ -72,7 +72,7 @@ public final class SegmentReader extends CodecReader {
    * @throws CorruptIndexException if the index is corrupt
    * @throws IOException if there is a low-level IO error
    */
-  SegmentReader(SegmentCommitInfo si, int createdVersionMajor, IOContext context) throws
IOException {
+  public SegmentReader(SegmentCommitInfo si, int createdVersionMajor, IOContext context)
throws IOException {
     this.si = si.clone();
     this.originalSi = si;
     this.metaData = new LeafMetaData(createdVersionMajor, si.info.getMinVersion(), si.info.getIndexSort());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5a7b307/solr/core/src/java/org/apache/solr/index/AddDocValuesMergePolicyFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/index/AddDocValuesMergePolicyFactory.java
b/solr/core/src/java/org/apache/solr/index/AddDocValuesMergePolicyFactory.java
index 9cfee8c..e96535d 100644
--- a/solr/core/src/java/org/apache/solr/index/AddDocValuesMergePolicyFactory.java
+++ b/solr/core/src/java/org/apache/solr/index/AddDocValuesMergePolicyFactory.java
@@ -198,7 +198,7 @@ public class AddDocValuesMergePolicyFactory extends WrapperMergePolicyFactory
{
             if (sb.length() > 0) {
               sb.append(' ');
             }
-            sb.append(info.toString() + "(" + source + "," + shouldRewrite + "," + clazz
+ ")");
+            sb.append(info.info.name + "(" + source + "," + shouldRewrite + "," + clazz +
")");
           }
         }
         if (needWrapping > 0) {
@@ -233,20 +233,23 @@ public class AddDocValuesMergePolicyFactory extends WrapperMergePolicyFactory
{
       StringBuilder b = new StringBuilder();
       final int numSegments = oneMerge.segments.size();
       for(int i=0;i<numSegments;i++) {
-        if (i > 0) {
-          b.append('\n');
-        }
-        b.append(oneMerge.segments.get(i).toString());
-        b.append('#');
+        b.append("\n* ");
+        b.append(oneMerge.segments.get(i).info.name);
+        b.append(":");
+        b.append("\n\tsource: ");
         Map<String, String> diag = oneMerge.segments.get(i).info.getDiagnostics();
         b.append(diag.get("source"));
         if (diag.get("class") != null) {
-          b.append('#');
+          b.append("\n\tclass: ");
           b.append(diag.get("class"));
-          if (diag.get("segString") != null) {
-            b.append("#ss=");
-            b.append(diag.get("segString"));
-          }
+        }
+        if (diag.get("wrapping") != null) {
+          b.append("\n\twrapping: ");
+          b.append(diag.get("wrapping"));
+        }
+        if (diag.get("segString") != null) {
+          b.append("\n\tsegString: ");
+          b.append(diag.get("segString").replaceAll("\n", "\n\t| "));
         }
       }
       return b.toString();
@@ -273,6 +276,7 @@ public class AddDocValuesMergePolicyFactory extends WrapperMergePolicyFactory
{
      */
 
     private String shouldRewrite(SegmentCommitInfo info) {
+      String rewriteReason = null;
       // Need to get a reader for this segment
       try (SegmentReader reader = new SegmentReader(info, Version.LUCENE_8_0_0.major, IOContext.DEFAULT))
{
         // check the marker, if defined
@@ -284,27 +288,24 @@ public class AddDocValuesMergePolicyFactory extends WrapperMergePolicyFactory
{
 //        }
         StringBuilder sb = new StringBuilder();
         for (FieldInfo fi : reader.getFieldInfos()) {
-          if (fi.getDocValuesType() != DocValuesType.NONE) {
-            Map<String, Object> dvStats = UninvertingReader.getDVStats(reader, fi);
-            if (!((Integer)dvStats.get("numDocs")).equals((Integer)dvStats.get("present")))
{
-              throw new RuntimeException("segment: " + info.toString() + " " + fi.name +
", dvStats: " + dvStats + " diag: " + info.info.getDiagnostics());
-            }
-          }
           if (mapping.apply(fi) != null) {
             if (sb.length() > 0) {
               sb.append(',');
             }
             sb.append(fi.name);
+            sb.append(UninvertingReader.getDVStats(reader, fi).toString());
           }
         }
 //        return sb.toString();
-        return sb.length() > 0 ? sb.toString() : null;
+        rewriteReason = sb.length() > 0 ? sb.toString() : null;
       } catch (IOException e) {
         // It's safer to rewrite the segment if there's an error, although it may lead to
a lot of work.
         log.warn("Error opening a reader for segment {}, will rewrite segment", info.toString());
         count("shouldRewriteError");
-        return "error " + e.getMessage();
+        rewriteReason = "error " + e.getMessage();
       }
+      if (rewriteReason == null) rewriteReason = "forced";
+      return rewriteReason;
     }
 
     @Override
@@ -409,7 +410,9 @@ public class AddDocValuesMergePolicyFactory extends WrapperMergePolicyFactory
{
     @Override
     public void setMergeInfo(SegmentCommitInfo info) {
       super.setMergeInfo(info);
-      info.info.getDiagnostics().put(DIAGNOSTICS_MARKER_PROP, marker);
+      if (marker != null) {
+        info.info.getDiagnostics().put(DIAGNOSTICS_MARKER_PROP, marker);
+      }
       info.info.getDiagnostics().put("class", getClass().getSimpleName());
       info.info.getDiagnostics().put("segString", AddDVMergePolicy.segString(this));
       if (metaPairs != null && metaPairs.length > 1) {
@@ -418,7 +421,9 @@ public class AddDocValuesMergePolicyFactory extends WrapperMergePolicyFactory
{
           len--;
         }
         for (int i = 0; i < len; i += 2) {
-          info.info.getDiagnostics().put(metaPairs[i], metaPairs[i + 1]);
+          if (metaPairs[i] != null && metaPairs[i + 1] != null) {
+            info.info.getDiagnostics().put(metaPairs[i], metaPairs[i + 1]);
+          }
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5a7b307/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index e804635..5b07e61 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -18,15 +18,19 @@ package org.apache.solr.uninverting;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.function.Function;
 
+import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CodecReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
@@ -37,7 +41,11 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.solr.uninverting.FieldCache.CacheEntry;
 
@@ -234,52 +242,7 @@ public class UninvertingReader extends FilterLeafReader {
     // Calculate a new FieldInfos that has DocValuesType where we didn't before
     ArrayList<FieldInfo> newFieldInfos = new ArrayList<>(in.getFieldInfos().size());
     for (FieldInfo fi : in.getFieldInfos()) {
-      DocValuesType type = fi.getDocValuesType();
-      // fields which currently don't have docValues, but are uninvertable (indexed or points
data present)
-      if (type == DocValuesType.NONE &&
-          (fi.getIndexOptions() != IndexOptions.NONE || (fi.getPointNumBytes() > 0 &&
fi.getPointDataDimensionCount() == 1))) {
-        Type t = mapping.apply(fi.name); // could definitely return null, thus still can't
uninvert it
-        if (t != null) {
-          if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT ||
t == Type.DOUBLE_POINT) {
-            // type uses points
-            if (fi.getPointDataDimensionCount() == 0) {
-              continue;
-            }
-          } else {
-            // type uses inverted index
-            if (fi.getIndexOptions() == IndexOptions.NONE) {
-              continue;
-            }
-          }
-          switch(t) {
-            case INTEGER_POINT:
-            case LONG_POINT:
-            case FLOAT_POINT:
-            case DOUBLE_POINT:
-            case LEGACY_INTEGER:
-            case LEGACY_LONG:
-            case LEGACY_FLOAT:
-            case LEGACY_DOUBLE:
-              type = DocValuesType.NUMERIC;
-              break;
-            case BINARY:
-              type = DocValuesType.BINARY;
-              break;
-            case SORTED:
-              type = DocValuesType.SORTED;
-              break;
-            case SORTED_SET_BINARY:
-            case SORTED_SET_INTEGER:
-            case SORTED_SET_FLOAT:
-            case SORTED_SET_LONG:
-            case SORTED_SET_DOUBLE:
-              type = DocValuesType.SORTED_SET;
-              break;
-            default:
-              throw new AssertionError();
-          }
-        }
-      }
+      DocValuesType type = shouldWrap(fi, mapping);
       if (type != fi.getDocValuesType()) { // we changed it
         wrap = true;
         newFieldInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
@@ -297,6 +260,56 @@ public class UninvertingReader extends FilterLeafReader {
     }
   }
 
+  public static DocValuesType shouldWrap(FieldInfo fi, Function<String, Type> mapping)
{
+    DocValuesType type = fi.getDocValuesType();
+    // fields which currently don't have docValues, but are uninvertable (indexed or points
data present)
+    if (type == DocValuesType.NONE &&
+        (fi.getIndexOptions() != IndexOptions.NONE || (fi.getPointNumBytes() > 0 &&
fi.getPointDataDimensionCount() == 1))) {
+      Type t = mapping.apply(fi.name); // could definitely return null, thus still can't
uninvert it
+      if (t != null) {
+        if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t
== Type.DOUBLE_POINT) {
+          // type uses points
+          if (fi.getPointDataDimensionCount() == 0) {
+            return null;
+          }
+        } else {
+          // type uses inverted index
+          if (fi.getIndexOptions() == IndexOptions.NONE) {
+            return null;
+          }
+        }
+        switch(t) {
+          case INTEGER_POINT:
+          case LONG_POINT:
+          case FLOAT_POINT:
+          case DOUBLE_POINT:
+          case LEGACY_INTEGER:
+          case LEGACY_LONG:
+          case LEGACY_FLOAT:
+          case LEGACY_DOUBLE:
+            type = DocValuesType.NUMERIC;
+            break;
+          case BINARY:
+            type = DocValuesType.BINARY;
+            break;
+          case SORTED:
+            type = DocValuesType.SORTED;
+            break;
+          case SORTED_SET_BINARY:
+          case SORTED_SET_INTEGER:
+          case SORTED_SET_FLOAT:
+          case SORTED_SET_LONG:
+          case SORTED_SET_DOUBLE:
+            type = DocValuesType.SORTED_SET;
+            break;
+          default:
+            throw new AssertionError();
+        }
+      }
+    }
+    return type;
+  }
+
   final Function<String, Type> mapping;
   final FieldInfos fieldInfos;
 
@@ -445,6 +458,108 @@ public class UninvertingReader extends FilterLeafReader {
     return new FieldCacheStats(totalSize, info);
   }
 
+  public static Map<String, Object> getDVStats(CodecReader reader, FieldInfo fi) throws
IOException {
+    DocValuesType type = fi.getDocValuesType();
+    try {
+      int present = 0;
+      int zeroOrNull = 0;
+      Bits liveDocs = reader.getLiveDocs();
+      DocValuesProducer producer = reader.getDocValuesReader();
+      int expected = reader.numDocs();
+      int deletedButPresent = 0;
+      switch (type) {
+        case NUMERIC:
+          NumericDocValues ndv = reader.getNumericDocValues(fi.name);
+          while (ndv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            if (liveDocs != null && !liveDocs.get(ndv.docID())) {
+              deletedButPresent++;
+            }
+            long num = ndv.longValue();
+            if (num == 0) {
+              zeroOrNull++;
+            }
+            present++;
+          }
+          break;
+        case BINARY:
+          BinaryDocValues bdv = reader.getBinaryDocValues(fi.name);
+          while (bdv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            if (liveDocs != null && !liveDocs.get(bdv.docID())) {
+              deletedButPresent++;
+            }
+            BytesRef bytes = bdv.binaryValue();
+            if (bytes == null || bytes.length == 0) {
+              zeroOrNull++;
+            }
+            present++;
+          }
+          break;
+        case SORTED:
+          SortedDocValues sdv = reader.getSortedDocValues(fi.name);
+          while (sdv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            if (liveDocs != null && !liveDocs.get(sdv.docID())) {
+              deletedButPresent++;
+            }
+            BytesRef bytes = sdv.binaryValue();
+            if (bytes == null || bytes.length == 0) {
+              zeroOrNull++;
+            }
+            present++;
+          }
+          break;
+        case SORTED_NUMERIC:
+          SortedNumericDocValues sndv = reader.getSortedNumericDocValues(fi.name);
+          while (sndv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            if (liveDocs != null && !liveDocs.get(sndv.docID())) {
+              deletedButPresent++;
+            }
+            if (sndv.docValueCount() > 0) {
+              for (int j = 0; j < sndv.docValueCount(); j++) {
+                long val = sndv.nextValue();
+              }
+              present++;
+            } else {
+              zeroOrNull++;
+            }
+          }
+          break;
+        case SORTED_SET:
+          SortedSetDocValues ssdv = reader.getSortedSetDocValues(fi.name);
+          while (ssdv.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            if (liveDocs != null && !liveDocs.get(ssdv.docID())) {
+              deletedButPresent++;
+            }
+            if (ssdv.getValueCount() > 0) {
+              long ord;
+              boolean allPresent = true;
+              while ((ord = ssdv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+                BytesRef term = ssdv.lookupOrd(ord);
+                if (term == null || term.length == 0) {
+                  allPresent = false;
+                }
+              }
+              if (!allPresent) {
+                zeroOrNull++;
+              }
+              present++;
+            } else {
+              zeroOrNull++;
+            }
+          }
+          break;
+      }
+      Map<String, Object> result = new HashMap<>();
+      result.put("numDocs", reader.numDocs());
+      result.put("expected", expected);
+      result.put("present", present);
+      result.put("nullOrZero", zeroOrNull);
+      result.put("delPresent", deletedButPresent);
+      return result;
+    } catch (IOException e) {
+      return Collections.singletonMap("error", e.getMessage());
+    }
+  }
+
   public static int getUninvertedStatsSize() {
     return FieldCache.DEFAULT.getCacheEntries().length;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5a7b307/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
index c95ae85..895fa29 100644
--- a/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/BasicDistributedZkTest.java
@@ -239,7 +239,7 @@ public class BasicDistributedZkTest extends AbstractFullDistribZkTestBase
{
     indexr(id, 16, "SubjectTerms_mfacet", new String[]  {"test 1", "test 2", "test3"});
     String[] vals = new String[100];
     for (int i=0; i<100; i++) {
-      vals[i] = "test " + i;
+      vals[i] = "code/test " + i;
     }
     indexr(id, 17, "SubjectTerms_mfacet", vals);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5a7b307/solr/core/src/test/org/apache/solr/index/WrapperMergePolicyFactoryTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/index/WrapperMergePolicyFactoryTest.java b/solr/core/src/test/org/apache/solr/index/WrapperMergePolicyFactoryTest.java
index d416e13..1d830ca 100644
--- a/solr/core/src/test/org/apache/solr/index/WrapperMergePolicyFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/index/WrapperMergePolicyFactoryTest.java
@@ -52,7 +52,7 @@ public class WrapperMergePolicyFactoryTest extends SolrTestCaseJ4 {
     final double testMaxMergedSegmentMB = defaultTMP.getMaxMergedSegmentMB() * 10;
 
     final MergePolicyFactoryArgs args = new MergePolicyFactoryArgs();
-    args.add(WrapperMergePolicyFactory.WRAPPED_PREFIX, "test");
+    args.add(WrapperMergePolicyFactory.WRAPPED_PREFIX, "code/test");
     args.add("test.class", TieredMergePolicyFactory.class.getName());
     args.add("test.maxMergeAtOnce", testMaxMergeAtOnce);
     args.add("test.maxMergedSegmentMB", testMaxMergedSegmentMB);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d5a7b307/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
index 7ff4226..cebd4c7 100644
--- a/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
+++ b/solr/test-framework/src/java/org/apache/solr/cloud/AbstractFullDistribZkTestBase.java
@@ -1235,7 +1235,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
         "test3"});
     String[] vals = new String[100];
     for (int i = 0; i < 100; i++) {
-      vals[i] = "test " + i;
+      vals[i] = "code/test " + i;
     }
     indexr(id, 17, "SubjectTerms_mfacet", vals);
 


Mime
View raw message