lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From andyetitmo...@apache.org
Subject svn commit: r1676075 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/update/SolrIndexSplitter.java
Date Sun, 26 Apr 2015 00:06:08 GMT
Author: andyetitmoves
Date: Sun Apr 26 00:06:08 2015
New Revision: 1676075

URL: http://svn.apache.org/r1676075
Log:
SOLR-5213: Log when shard splitting unexpectedly leads to documents going to zero or multiple
sub-shards

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1676075&r1=1676074&r2=1676075&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Apr 26 00:06:08 2015
@@ -228,6 +228,9 @@ Other Changes
 * SOLR-7391: Use a time based expiration cache for one off HDFS FileSystem instances.
   (Mark Miller)
 
+* SOLR-5213: Log when shard splitting unexpectedly leads to documents going to
+  no or multiple shards (Christine Poerschke, Ramkumar Aiyengar)
+
 ==================  5.1.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java?rev=1676075&r1=1676074&r2=1676075&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java Sun
Apr 26 00:06:08 2015
@@ -168,6 +168,12 @@ public class SolrIndexSplitter {
     BytesRef term = null;
     PostingsEnum postingsEnum = null;
 
+    int[] docsMatchingRanges = null;
+    if (ranges != null) {
+      // +1 because documents can belong to *zero*, one, several or all ranges in rangesArr
+      docsMatchingRanges = new int[rangesArr.length+1];
+    }
+
     CharsRefBuilder idRef = new CharsRefBuilder();
     for (;;) {
       term = termsEnum.next();
@@ -203,11 +209,37 @@ public class SolrIndexSplitter {
           docSets[currPartition].set(doc);
           currPartition = (currPartition + 1) % numPieces;
         } else  {
+          int matchingRangesCount = 0;
           for (int i=0; i<rangesArr.length; i++) {      // inner-loop: use array here
for extra speed.
             if (rangesArr[i].includes(hash)) {
               docSets[i].set(doc);
+              ++matchingRangesCount;
             }
           }
+          docsMatchingRanges[matchingRangesCount]++;
+        }
+      }
+    }
+
+    if (docsMatchingRanges != null) {
+      for (int ii = 0; ii < docsMatchingRanges.length; ii++) {
+        if (0 == docsMatchingRanges[ii]) continue;
+        switch (ii) {
+          case 0:
+            // document loss
+            log.error("Splitting {}: {} documents belong to no shards and will be dropped",
+                reader, docsMatchingRanges[ii]);
+            break;
+          case 1:
+            // normal case, each document moves to one of the sub-shards
+            log.info("Splitting {}: {} documents will move into a sub-shard",
+                reader, docsMatchingRanges[ii]);
+            break;
+          default:
+            // document duplication
+            log.error("Splitting {}: {} documents will be moved to multiple ({}) sub-shards",
+                reader, docsMatchingRanges[ii], ii);
+            break;
         }
       }
     }



Mime
View raw message