lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ans...@apache.org
Subject lucene-solr:master: SOLR-5004: Allow a shard to be split into 'n' sub-shards using the collections API
Date Mon, 22 Oct 2018 22:01:47 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master fcaea07f3 -> d799fd53c


SOLR-5004: Allow a shard to be split into 'n' sub-shards using the collections API


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d799fd53
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d799fd53
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d799fd53

Branch: refs/heads/master
Commit: d799fd53c7cd3a83442d6010dc48802d2fd8c7fb
Parents: fcaea07
Author: Anshum Gupta <anshum@apache.org>
Authored: Mon Oct 22 14:59:10 2018 -0700
Committer: Anshum Gupta <anshum@apache.org>
Committed: Mon Oct 22 15:01:24 2018 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../cloud/api/collections/SplitShardCmd.java    |  23 +++--
 .../solr/handler/admin/CollectionsHandler.java  |   9 +-
 .../org/apache/solr/cloud/SplitShardTest.java   | 102 +++++++++++++++++++
 solr/solr-ref-guide/src/collections-api.adoc    |   5 +
 .../solrj/request/CollectionAdminRequest.java   |  12 +++
 .../solr/common/params/CommonAdminParams.java   |   2 +
 7 files changed, 149 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f26e180..81748b3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -151,6 +151,9 @@ New Features
 
 * SOLR-12846: Added support for "host" variable in autoscaling policy rules (noble)
 
+* SOLR-5004: Splitshard collections API now supports splitting into more than 2 sub-shards
directly i.e. by providing a
+  numSubShards parameter (Christine Poerschke, Anshum Gupta)
+
 
 Other Changes
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index 2e68f91..aa4909d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -74,10 +74,15 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.AD
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
+import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
 
 
 public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final int MIN_NUM_SUB_SHARDS = 2;
+  // This is an arbitrary number that seems reasonable at this time.
+  private static final int MAX_NUM_SUB_SHARDS = 8;
+  private static final int DEFAULT_NUM_SUB_SHARDS = 2;
 
   private final OverseerCollectionMessageHandler ocmh;
 
@@ -122,7 +127,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd
{
     }
 
     // find the leader for the shard
-    Replica parentShardLeader = null;
+    Replica parentShardLeader;
     try {
       parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice.get(), 10000);
     } catch (InterruptedException e) {
@@ -326,7 +331,7 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd
{
           requestMap);
       t.stop();
 
-      log.debug("Index on shard: " + nodeName + " split into two successfully");
+      log.debug("Index on shard: {} split into {} successfully", nodeName, subShardNames.size());
 
       t = timings.sub("applyBufferedUpdates");
       // apply buffered updates on sub-shards
@@ -680,13 +685,13 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd
{
                                 List<DocRouter.Range> subRanges, List<String>
subSlices, List<String> subShardNames,
                                   boolean firstReplicaNrt) {
     String splitKey = message.getStr("split.key");
+    String rangesStr = message.getStr(CoreAdminParams.RANGES);
+
     DocRouter.Range range = parentSlice.getRange();
     if (range == null) {
       range = new PlainIdRouter().fullRange();
     }
     DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
-
-    String rangesStr = message.getStr(CoreAdminParams.RANGES);
     if (rangesStr != null) {
       String[] ranges = rangesStr.split(",");
       if (ranges.length == 0 || ranges.length == 1) {
@@ -740,8 +745,14 @@ public class SplitShardCmd implements OverseerCollectionMessageHandler.Cmd
{
         }
       }
     } else {
-      // todo: fixed to two partitions?
-      subRanges.addAll(router.partitionRange(2, range));
+      int numSubShards = message.getInt(NUM_SUB_SHARDS, DEFAULT_NUM_SUB_SHARDS);
+      log.info("{} set at: {}", NUM_SUB_SHARDS, numSubShards);
+
+      if(numSubShards < MIN_NUM_SUB_SHARDS || numSubShards > MAX_NUM_SUB_SHARDS)
+        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+            "A shard can only be split into "+MIN_NUM_SUB_SHARDS+" to " + MAX_NUM_SUB_SHARDS
+            + " subshards in one split request. Provided "+NUM_SUB_SHARDS+"=" + numSubShards);
+      subRanges.addAll(router.partitionRange(numSubShards, range));
     }
 
     for (int i = 0; i < subRanges.size(); i++) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 276ab94..dfb3c6b 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -143,6 +143,7 @@ import static org.apache.solr.common.params.CollectionAdminParams.WITH_COLLECTIO
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
 import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
+import static org.apache.solr.common.params.CommonAdminParams.NUM_SUB_SHARDS;
 import static org.apache.solr.common.params.CommonAdminParams.SPLIT_METHOD;
 import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
 import static org.apache.solr.common.params.CommonParams.NAME;
@@ -638,6 +639,7 @@ public class CollectionsHandler extends RequestHandlerBase implements
Permission
       String shard = req.getParams().get(SHARD_ID_PROP);
       String rangesStr = req.getParams().get(CoreAdminParams.RANGES);
       String splitKey = req.getParams().get("split.key");
+      String numSubShards = req.getParams().get(NUM_SUB_SHARDS);
 
       if (splitKey == null && shard == null) {
         throw new SolrException(ErrorCode.BAD_REQUEST, "At least one of shard, or split.key
should be specified.");
@@ -650,6 +652,10 @@ public class CollectionsHandler extends RequestHandlerBase implements
Permission
         throw new SolrException(ErrorCode.BAD_REQUEST,
             "Only one of 'ranges' or 'split.key' should be specified");
       }
+      if (numSubShards != null && (splitKey != null || rangesStr != null)) {
+        throw new SolrException(ErrorCode.BAD_REQUEST,
+            "numSubShards can not be specified with split.key or ranges parameters");
+      }
 
       Map<String, Object> map = copy(req.getParams(), null,
           COLLECTION_PROP,
@@ -658,7 +664,8 @@ public class CollectionsHandler extends RequestHandlerBase implements
Permission
           CoreAdminParams.RANGES,
           WAIT_FOR_FINAL_STATE,
           TIMING,
-          SPLIT_METHOD);
+          SPLIT_METHOD,
+          NUM_SUB_SHARDS);
       return copyPropertiesWithPrefix(req.getParams(), map, COLL_PROP_PREFIX);
     }),
     DELETESHARD_OP(DELETESHARD, (req, rsp, h) -> {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java b/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java
new file mode 100644
index 0000000..a2a2dca
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/SplitShardTest.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.cloud;
+
+import java.io.IOException;
+
+import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.common.SolrException;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class SplitShardTest extends SolrCloudTestCase {
+
+  private final String COLLECTION_NAME = "splitshardtest-collection";
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(1)
+        .addConfig("conf", configset("cloud-minimal"))
+        .configure();
+  }
+
+  @Before
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+  }
+
+  @After
+  @Override
+  public void tearDown() throws Exception {
+    super.tearDown();
+    cluster.deleteAllCollections();
+  }
+
+  @Test
+  public void doTest() throws IOException, SolrServerException {
+    CollectionAdminRequest
+        .createCollection(COLLECTION_NAME, "conf", 2, 1)
+        .setMaxShardsPerNode(100)
+        .process(cluster.getSolrClient());
+    CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
+        .setNumSubShards(5)
+        .setShardName("shard1");
+    splitShard.process(cluster.getSolrClient());
+    waitForState("Timed out waiting for sub shards to be active. Number of active shards="
+
+            cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION_NAME).getActiveSlices().size(),
+        COLLECTION_NAME, activeClusterShape(6, 1));
+
+    try {
+      splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME).setShardName("shard2").setNumSubShards(10);
+      splitShard.process(cluster.getSolrClient());
+      fail("SplitShard should throw an exception when numSubShards > 8");
+    } catch (HttpSolrClient.RemoteSolrException ex) {
+      assertTrue(ex.getMessage().contains("A shard can only be split into 2 to 8 subshards
in one split request."));
+    }
+
+    try {
+      splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME).setShardName("shard2").setNumSubShards(1);
+      splitShard.process(cluster.getSolrClient());
+      fail("SplitShard should throw an exception when numSubShards < 2");
+    } catch (HttpSolrClient.RemoteSolrException ex) {
+      assertTrue(ex.getMessage().contains("A shard can only be split into 2 to 8 subshards
in one split request. Provided numSubShards=1"));
+    }
+  }
+
+  @Test
+  public void multipleOptionsSplitTest() throws IOException, SolrServerException {
+    CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(COLLECTION_NAME)
+        .setNumSubShards(5)
+        .setRanges("0-c,d-7fffffff")
+        .setShardName("shard1");
+    boolean expectedException = false;
+    try {
+      splitShard.process(cluster.getSolrClient());
+      fail("An exception should have been thrown");
+    } catch (SolrException ex) {
+      expectedException = true;
+    }
+    assertTrue("Expected SolrException but it didn't happen", expectedException);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/solr-ref-guide/src/collections-api.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/collections-api.adoc b/solr/solr-ref-guide/src/collections-api.adoc
index 0e4112d..a5021da 100644
--- a/solr/solr-ref-guide/src/collections-api.adoc
+++ b/solr/solr-ref-guide/src/collections-api.adoc
@@ -287,6 +287,11 @@ This parameter can be used to split a shard using a route key such that
all docu
 +
 For example, suppose `split.key=A!` hashes to the range `12-15` and belongs to shard 'shard1'
with range `0-20`. Splitting by this route key would yield three sub-shards with ranges `0-11`,
`12-15` and `16-20`. Note that the sub-shard with the hash range of the route key may also
contain documents for other route keys whose hash ranges overlap.
 
+`numSubShards`::
+The number of sub-shards to split the parent shard into. Allowed values for this are in the
range of 2-8 and defaults to 2.
++
+This parameter can only be used when ranges or split.key are not specified.
+
 `splitMethod`::
 Currently two methods of shard splitting are supported:
 * `splitMethod=rewrite` (default) after selecting documents to retain in each partition this
method creates sub-indexes from

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
index 0f6de19..4f26984 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
@@ -1153,6 +1153,7 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
     protected String splitKey;
     protected String shard;
     protected String splitMethod;
+    protected Integer numSubShards;
 
     private Properties properties;
 
@@ -1164,6 +1165,15 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
     public SplitShard setRanges(String ranges) { this.ranges = ranges; return this; }
     public String getRanges() { return ranges; }
 
+    public Integer getNumSubShards() {
+      return numSubShards;
+    }
+
+    public SplitShard setNumSubShards(Integer numSubShards) {
+      this.numSubShards = numSubShards;
+      return this;
+    }
+
     public SplitShard setSplitMethod(String splitMethod) {
       this.splitMethod = splitMethod;
       return this;
@@ -1210,6 +1220,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
       params.set("split.key", this.splitKey);
       params.set(CoreAdminParams.RANGES, ranges);
       params.set(CommonAdminParams.SPLIT_METHOD, splitMethod);
+      if(numSubShards != null)
+        params.set("numSubShards", numSubShards);
 
       if(properties != null) {
         addProperties(params, properties);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d799fd53/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
index c12ee32..c080342 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
@@ -27,6 +27,8 @@ public interface CommonAdminParams
   String IN_PLACE_MOVE = "inPlaceMove";
   /** Method to use for shard splitting. */
   String SPLIT_METHOD = "splitMethod";
+  /** **/
+  String NUM_SUB_SHARDS = "numSubShards";
   /** Timeout for replicas to become active. */
   String TIMEOUT = "timeout";
 }


Mime
View raw message