lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject lucene-solr:jira/solr-11458-2: SOLR-11458: WIP: * Add "shared_storage" replica property and fix the check in MoveReplicaCmd. * Attempt to roll-back changes in moveHdfsReplica if some of them fail. * Add a flag "inPlaceMove" to let users (and the autoscal
Date Wed, 08 Nov 2017 20:04:47 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-11458-2 [created] 7ccf122de


SOLR-11458: WIP:
* Add "shared_storage" replica property and fix the check in MoveReplicaCmd.
* Attempt to roll-back changes in moveHdfsReplica if some of them fail.
* Add a flag "inPlaceMove" to let users (and the autoscaling framework) to decide to
* Add missing required params in v2 API.
* Catch a few spurious errors in HdfsDirectory.
allow in-place HDFS move.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7ccf122d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7ccf122d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7ccf122d

Branch: refs/heads/jira/solr-11458-2
Commit: 7ccf122de7d93fbe9a03431e811d0b51f3d11bc4
Parents: 70d1d94
Author: Andrzej Bialecki <ab@apache.org>
Authored: Wed Nov 8 21:00:56 2017 +0100
Committer: Andrzej Bialecki <ab@apache.org>
Committed: Wed Nov 8 21:00:56 2017 +0100

----------------------------------------------------------------------
 .../src/java/org/apache/solr/cloud/Assign.java  |  3 +-
 .../org/apache/solr/cloud/MoveReplicaCmd.java   | 64 ++++++++++++++++----
 .../cloud/OverseerCollectionMessageHandler.java |  2 +-
 .../org/apache/solr/cloud/ZkController.java     |  1 +
 .../apache/solr/core/HdfsDirectoryFactory.java  | 12 ++--
 .../solr/handler/admin/CollectionsHandler.java  |  2 +
 .../apache/solr/cloud/MoveReplicaHDFSTest.java  | 15 +++++
 .../org/apache/solr/cloud/MoveReplicaTest.java  | 54 ++++++++++++++---
 .../solrj/request/CollectionAdminRequest.java   | 14 +++++
 .../apache/solr/common/cloud/ZkStateReader.java |  1 +
 .../solr/common/params/CommonAdminParams.java   |  7 ++-
 .../collections.collection.Commands.json        | 24 ++++++--
 12 files changed, 168 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/java/org/apache/solr/cloud/Assign.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/Assign.java b/solr/core/src/java/org/apache/solr/cloud/Assign.java
index 36663e4..ed8726b 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Assign.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Assign.java
@@ -459,7 +459,8 @@ public class Assign {
     if (createNodeList != null) { // Overrides petty considerations about maxShardsPerNode
       if (createNodeList.size() != nodeNameVsShardCount.size()) {
         throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-            "At least one of the node(s) specified are not currently active, no action taken.");
+            "At least one of the node(s) specified " + createNodeList + " are not currently
active "
+                + nodeNameVsShardCount.keySet() + ", no action taken.");
       }
       return nodeNameVsShardCount;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index a2ed407..5149c47 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -31,6 +31,7 @@ import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.util.NamedList;
@@ -45,6 +46,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
 import static org.apache.solr.common.cloud.ZkStateReader.REPLICA_PROP;
 import static org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
+import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
 import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
 
 public class MoveReplicaCmd implements Cmd{
@@ -63,10 +65,11 @@ public class MoveReplicaCmd implements Cmd{
 
   private void moveReplica(ClusterState clusterState, ZkNodeProps message, NamedList results)
throws Exception {
     log.debug("moveReplica() : {}", Utils.toJSONString(message));
-    ocmh.checkRequired(message, COLLECTION_PROP, "targetNode");
+    ocmh.checkRequired(message, COLLECTION_PROP, CollectionParams.TARGET_NODE);
     String collection = message.getStr(COLLECTION_PROP);
-    String targetNode = message.getStr("targetNode");
+    String targetNode = message.getStr(CollectionParams.TARGET_NODE);
     boolean waitForFinalState = message.getBool(WAIT_FOR_FINAL_STATE, false);
+    boolean inPlaceMove = message.getBool(IN_PLACE_MOVE, true);
     int timeout = message.getInt("timeout", 10 * 60); // 10 minutes
 
     String async = message.getStr(ASYNC);
@@ -75,6 +78,9 @@ public class MoveReplicaCmd implements Cmd{
     if (coll == null) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Collection: " + collection
+ " does not exist");
     }
+    if (!clusterState.getLiveNodes().contains(targetNode)) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Target node: " + targetNode
+ " not in live nodes: " + clusterState.getLiveNodes());
+    }
     Replica replica = null;
     if (message.containsKey(REPLICA_PROP)) {
       String replicaName = message.getStr(REPLICA_PROP);
@@ -103,7 +109,7 @@ public class MoveReplicaCmd implements Cmd{
       }
     }
 
-    log.info("Replica will be moved {}", replica);
+    log.info("Replica will be moved to node {}: {}", targetNode, replica);
     Slice slice = null;
     for (Slice s : coll.getSlices()) {
       if (s.getReplicas().contains(replica)) {
@@ -112,9 +118,13 @@ public class MoveReplicaCmd implements Cmd{
     }
     assert slice != null;
     Object dataDir = replica.get("dataDir");
-    if (dataDir != null && dataDir.toString().startsWith("hdfs:/")) {
+    boolean isSharedFS = replica.getBool(ZkStateReader.SHARED_STORAGE_PROP, false) &&
dataDir != null;
+
+    if (isSharedFS && inPlaceMove) {
+      log.debug("-- moveHdfsReplica");
       moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll,
replica, slice, timeout, waitForFinalState);
     } else {
+      log.debug("-- moveNormalReplica (inPlaceMove=" + inPlaceMove + ", isSharedFS=" + isSharedFS);
       moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice, timeout,
waitForFinalState);
     }
   }
@@ -135,10 +145,10 @@ public class MoveReplicaCmd implements Cmd{
       NamedList deleteResult = new NamedList();
       ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
       if (deleteResult.get("failure") != null) {
-        String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s
shard=%s name=%s",
-            coll.getName(), slice.getName(), replica.getName());
+        String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s
shard=%s name=%s, failure=%s",
+            coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
         log.warn(errorString);
-        results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
+        results.add("failure", errorString);
         return;
       }
 
@@ -165,17 +175,45 @@ public class MoveReplicaCmd implements Cmd{
         CoreAdminParams.NODE, targetNode,
         CoreAdminParams.CORE_NODE_NAME, replica.getName(),
         CoreAdminParams.NAME, replica.getCoreName(),
+        WAIT_FOR_FINAL_STATE, String.valueOf(waitForFinalState),
         SKIP_CREATE_REPLICA_IN_CLUSTER_STATE, skipCreateReplicaInClusterState,
         CoreAdminParams.ULOG_DIR, ulogDir.substring(0, ulogDir.lastIndexOf(UpdateLog.TLOG_NAME)),
         CoreAdminParams.DATA_DIR, dataDir);
     if(async!=null) addReplicasProps.getProperties().put(ASYNC, async);
     NamedList addResult = new NamedList();
-    ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, addResult, null);
+    try {
+      ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, addResult,
null);
+    } catch (Exception e) {
+      // fatal error - try rolling back?
+      log.warn("Error adding replica " + addReplicasProps + " - trying to roll back...",
e);
+      addReplicasProps = addReplicasProps.plus(CoreAdminParams.NODE, replica.getNodeName());
+      NamedList rollback = new NamedList();
+      ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, rollback, null);
+      if (rollback.get("failure") != null) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Fatal error during
MOVEREPLICA of " + replica
+            + ", collection may be inconsistent: " + rollback.get("failure"));
+      }
+      return;
+    }
     if (addResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s
shard=%s" +
-          " on node=%s", coll.getName(), slice.getName(), targetNode);
+          " on node=%s, failure=%s", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
       log.warn(errorString);
       results.add("failure", errorString);
+      log.debug("--- trying to roll back...");
+      // try to roll back
+      addReplicasProps = addReplicasProps.plus(CoreAdminParams.NODE, replica.getNodeName());
+      NamedList rollback = new NamedList();
+      try {
+        ocmh.addReplica(ocmh.zkStateReader.getClusterState(), addReplicasProps, rollback,
null);
+      } catch (Exception e) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Fatal error during
MOVEREPLICA of " + replica
+            + ", collection may be inconsistent!", e);
+      }
+      if (rollback.get("failure") != null) {
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Fatal error during
MOVEREPLICA of " + replica
+            + ", collection may be inconsistent! Failure: " + rollback.get("failure"));
+      }
       return;
     } else {
       String successString = String.format(Locale.ROOT, "MOVEREPLICA action completed successfully,
moved replica=%s at node=%s " +
@@ -205,7 +243,7 @@ public class MoveReplicaCmd implements Cmd{
     }
     if (addResult.get("failure") != null) {
       String errorString = String.format(Locale.ROOT, "Failed to create replica for collection=%s
shard=%s" +
-          " on node=%s", coll.getName(), slice.getName(), targetNode);
+          " on node=%s, failure=", coll.getName(), slice.getName(), targetNode, addResult.get("failure"));
       log.warn(errorString);
       results.add("failure", errorString);
       if (watcher != null) { // unregister
@@ -239,10 +277,10 @@ public class MoveReplicaCmd implements Cmd{
     NamedList deleteResult = new NamedList();
     ocmh.deleteReplica(clusterState, removeReplicasProps, deleteResult, null);
     if (deleteResult.get("failure") != null) {
-      String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s
shard=%s name=%s",
-          coll.getName(), slice.getName(), replica.getName());
+      String errorString = String.format(Locale.ROOT, "Failed to cleanup replica collection=%s
shard=%s name=%s, failure=%s",
+          coll.getName(), slice.getName(), replica.getName(), deleteResult.get("failure"));
       log.warn(errorString);
-      results.add("failure", errorString + ", because of : " + deleteResult.get("failure"));
+      results.add("failure", errorString);
     } else {
       String successString = String.format(Locale.ROOT, "MOVEREPLICA action completed successfully,
moved replica=%s at node=%s " +
           "to replica=%s at node=%s", replica.getCoreName(), replica.getNodeName(), newCoreName,
targetNode);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index f4e43b0..5be9a4d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -698,7 +698,7 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler,
       if (result.size() == coreNames.size()) {
         return result;
       } else {
-        log.debug("Expecting {} cores but found {}", coreNames.size(), result.size());
+        log.debug("Expecting {} cores but found {}", coreNames, result);
       }
       if (timeout.hasTimedOut()) {
         throw new SolrException(ErrorCode.SERVER_ERROR, "Timed out waiting to see all replicas:
" + coreNames + " in cluster state.");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index cc4a590..365da65 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -1417,6 +1417,7 @@ public class ZkController {
         }
         if (core != null && core.getDirectoryFactory().isSharedStorage()) {
           if (core.getDirectoryFactory().isSharedStorage()) {
+            props.put(ZkStateReader.SHARED_STORAGE_PROP, "true");
             props.put("dataDir", core.getDataDir());
             UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
             if (ulog != null) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
index 260a991..f248152 100644
--- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
@@ -18,6 +18,7 @@ package org.apache.solr.core;
 
 import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.net.URLEncoder;
@@ -551,18 +552,21 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements
Sol
           return accept;
         }
       });
+    } catch (FileNotFoundException fnfe) {
+      // already deleted - ignore
+      LOG.debug("Old index directory already deleted - skipping...", fnfe);
     } catch (IOException ioExc) {
       LOG.error("Error checking for old index directories to clean-up.", ioExc);
     }
-    
+
+    if (oldIndexDirs == null || oldIndexDirs.length == 0)
+      return; // nothing to clean-up
+
     List<Path> oldIndexPaths = new ArrayList<>(oldIndexDirs.length);
     for (FileStatus ofs : oldIndexDirs) {
       oldIndexPaths.add(ofs.getPath());
     }
 
-    if (oldIndexDirs == null || oldIndexDirs.length == 0)
-      return; // nothing to clean-up
-
     Collections.sort(oldIndexPaths, Collections.reverseOrder());
     
     Set<String> livePaths = getLivePaths();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 95d1a1c..11e9566 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -130,6 +130,7 @@ import static org.apache.solr.common.cloud.ZkStateReader.TLOG_REPLICAS;
 import static org.apache.solr.common.params.CollectionAdminParams.COUNT_PROP;
 import static org.apache.solr.common.params.CollectionParams.CollectionAction.*;
 import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
+import static org.apache.solr.common.params.CommonAdminParams.IN_PLACE_MOVE;
 import static org.apache.solr.common.params.CommonAdminParams.WAIT_FOR_FINAL_STATE;
 import static org.apache.solr.common.params.CommonParams.NAME;
 import static org.apache.solr.common.params.CommonParams.VALUE_LONG;
@@ -917,6 +918,7 @@ public class CollectionsHandler extends RequestHandlerBase implements
Permission
           CollectionParams.SOURCE_NODE,
           CollectionParams.TARGET_NODE,
           WAIT_FOR_FINAL_STATE,
+          IN_PLACE_MOVE,
           "replica",
           "shard");
     }),

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
index 70c4e46..f5e9e7e 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSTest.java
@@ -22,8 +22,10 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.solr.cloud.hdfs.HdfsTestUtil;
 import org.apache.solr.common.cloud.ZkConfigManager;
 import org.apache.solr.util.BadHdfsThreadsFilter;
+import org.apache.solr.util.LogLevel;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Test;
 
 /**
  *
@@ -32,6 +34,7 @@ import org.junit.BeforeClass;
     BadHdfsThreadsFilter.class, // hdfs currently leaks thread(s)
     MoveReplicaHDFSTest.ForkJoinThreadsFilter.class
 })
+@LogLevel("org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.autoscaling=DEBUG;")
 public class MoveReplicaHDFSTest extends MoveReplicaTest {
 
   private static MiniDFSCluster dfsCluster;
@@ -54,6 +57,18 @@ public class MoveReplicaHDFSTest extends MoveReplicaTest {
     dfsCluster = null;
   }
 
+  @Test
+  public void testNormalMove() throws Exception {
+    inPlaceMove = false;
+    test();
+  }
+
+  @Test
+  public void testNormalFailedMove() throws Exception {
+    inPlaceMove = false;
+    testFailedMove();
+  }
+
   public static class ForkJoinThreadsFilter implements ThreadFilter {
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
index 9ed751f2..9e16e90 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaTest.java
@@ -28,6 +28,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.embedded.JettySolrRunner;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -36,6 +38,7 @@ import org.apache.solr.client.solrj.request.CollectionAdminRequest;
 import org.apache.solr.client.solrj.request.CoreAdminRequest;
 import org.apache.solr.client.solrj.response.CoreAdminResponse;
 import org.apache.solr.client.solrj.response.RequestStatusState;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.CollectionStateWatcher;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.Replica;
@@ -45,6 +48,7 @@ import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.util.IdUtils;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -57,6 +61,9 @@ public class MoveReplicaTest extends SolrCloudTestCase {
   private static ZkStateReaderAccessor accessor;
   private static int overseerLeaderIndex;
 
+  // used by MoveReplicaHDFSTest
+  protected boolean inPlaceMove = true;
+
   @BeforeClass
   public static void setupCluster() throws Exception {
     configureCluster(4)
@@ -86,12 +93,17 @@ public class MoveReplicaTest extends SolrCloudTestCase {
   @Before
   public void beforeTest() throws Exception {
     cluster.deleteAllCollections();
+    // restart any shut down nodes
+    for (int i = cluster.getJettySolrRunners().size(); i < 5; i++) {
+      cluster.startJettySolrRunner();
+    }
+    cluster.waitForAllNodes(5000);
+    inPlaceMove = true;
   }
 
   @Test
   public void test() throws Exception {
-    cluster.waitForAllNodes(5000);
-    String coll = "movereplicatest_coll";
+    String coll = getTestClass().getSimpleName() + "_coll";
     log.info("total_jettys: " + cluster.getJettySolrRunners().size());
     int REPLICATION = 2;
 
@@ -99,8 +111,11 @@ public class MoveReplicaTest extends SolrCloudTestCase {
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll,
"conf1", 2, REPLICATION);
     create.setMaxShardsPerNode(2);
+    create.setAutoAddReplicas(false);
     cloudClient.request(create);
 
+    addDocs(coll, 100);
+
     Replica replica = getRandomReplica(coll, cloudClient);
     Set<String> liveNodes = cloudClient.getZkStateReader().getClusterState().getLiveNodes();
     ArrayList<String> l = new ArrayList<>(liveNodes);
@@ -126,8 +141,10 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     int targetNumCores = getNumOfCores(cloudClient, targetNode, coll);
 
     CollectionAdminRequest.MoveReplica moveReplica = createMoveReplicaRequest(coll, replica,
targetNode);
-    moveReplica.processAsync("000", cloudClient);
-    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("000");
+    moveReplica.setInPlaceMove(inPlaceMove);
+    String asyncId = IdUtils.randomId();
+    moveReplica.processAsync(asyncId, cloudClient);
+    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);
     // wait for async request success
     boolean success = false;
     for (int i = 0; i < 200; i++) {
@@ -180,10 +197,13 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     }
     assertTrue("replica never fully recovered", recovered);
 
+    assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
+
     Set<CollectionStateWatcher> newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
     assertEquals(watchers, newWatchers);
 
     moveReplica = createMoveReplicaRequest(coll, replica, targetNode, shardId);
+    moveReplica.setInPlaceMove(inPlaceMove);
     moveReplica.process(cloudClient);
     checkNumOfCores(cloudClient, replica.getNodeName(), coll, sourceNumCores);
     // wait for recovery
@@ -223,11 +243,13 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     assertTrue("replica never fully recovered", recovered);
     newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
     assertEquals(watchers, newWatchers);
+
+    assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
   }
 
   @Test
   public void testFailedMove() throws Exception {
-    String coll = "movereplicatest_failed_coll";
+    String coll = getTestClass().getSimpleName() + "_failed_coll";
     int REPLICATION = 2;
 
     CloudSolrClient cloudClient = cluster.getSolrClient();
@@ -235,8 +257,11 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     Set<CollectionStateWatcher> watchers = new HashSet<>(accessor.getStateWatchers(coll));
 
     CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll,
"conf1", 2, REPLICATION);
+    create.setAutoAddReplicas(false);
     cloudClient.request(create);
 
+    addDocs(coll, 100);
+
     Replica replica = getRandomReplica(coll, cloudClient);
     Set<String> liveNodes = cloudClient.getZkStateReader().getClusterState().getLiveNodes();
     ArrayList<String> l = new ArrayList<>(liveNodes);
@@ -250,15 +275,17 @@ public class MoveReplicaTest extends SolrCloudTestCase {
     }
     assertNotNull(targetNode);
     CollectionAdminRequest.MoveReplica moveReplica = createMoveReplicaRequest(coll, replica,
targetNode);
+    moveReplica.setInPlaceMove(inPlaceMove);
     // start moving
-    moveReplica.processAsync("001", cloudClient);
+    String asyncId = IdUtils.randomId();
+    moveReplica.processAsync(asyncId, cloudClient);
     // shut down target node
     for (int i = 0; i < cluster.getJettySolrRunners().size(); i++) {
       if (cluster.getJettySolrRunner(i).getNodeName().equals(targetNode)) {
         cluster.stopJettySolrRunner(i);
       }
     }
-    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus("001");
+    CollectionAdminRequest.RequestStatus requestStatus = CollectionAdminRequest.requestStatus(asyncId);
     // wait for async request success
     boolean success = true;
     for (int i = 0; i < 200; i++) {
@@ -274,6 +301,8 @@ public class MoveReplicaTest extends SolrCloudTestCase {
 
     Set<CollectionStateWatcher> newWatchers = new HashSet<>(accessor.getStateWatchers(coll));
     assertEquals(watchers, newWatchers);
+
+    assertEquals(100, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
   }
 
   private CollectionAdminRequest.MoveReplica createMoveReplicaRequest(String coll, Replica
replica, String targetNode, String shardId) {
@@ -351,4 +380,15 @@ public class MoveReplicaTest extends SolrCloudTestCase {
       }
     }
   }
+
+  private void addDocs(String collection, int numDocs) throws Exception {
+    SolrClient solrClient = cluster.getSolrClient();
+    for (int docId = 1; docId <= numDocs; docId++) {
+      SolrInputDocument doc = new SolrInputDocument();
+      doc.addField("id", docId);
+      solrClient.add(collection, doc);
+    }
+    solrClient.commit(collection);
+    Thread.sleep(5000);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
index b14b026..370c27a 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java
@@ -604,6 +604,8 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
     protected String collection, replica, targetNode;
     protected String shard, sourceNode;
     protected boolean randomlyMoveReplica;
+    protected boolean inPlaceMove = true;
+    protected int timeout = -1;
 
     public MoveReplica(String collection, String replica, String targetNode) {
       super(CollectionAction.MOVEREPLICA);
@@ -622,11 +624,23 @@ public abstract class CollectionAdminRequest<T extends CollectionAdminResponse>
       this.randomlyMoveReplica = true;
     }
 
+    public void setInPlaceMove(boolean inPlaceMove) {
+      this.inPlaceMove = inPlaceMove;
+    }
+
+    public void setTimeout(int timeout) {
+      this.timeout = timeout;
+    }
+
     @Override
     public SolrParams getParams() {
       ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
       params.set("collection", collection);
       params.set(CollectionParams.TARGET_NODE, targetNode);
+      params.set(CommonAdminParams.IN_PLACE_MOVE, inPlaceMove);
+      if (timeout != -1) {
+        params.set(CommonAdminParams.TIMEOUT, timeout);
+      }
       if (randomlyMoveReplica) {
         params.set("shard", shard);
         params.set(CollectionParams.SOURCE_NODE, sourceNode);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
index b70d2f1..9bf48bb 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
@@ -87,6 +87,7 @@ public class ZkStateReader implements Closeable {
   public static final String SHARD_PARENT_PROP = "shard_parent";
   public static final String NUM_SHARDS_PROP = "numShards";
   public static final String LEADER_PROP = "leader";
+  public static final String SHARED_STORAGE_PROP = "shared_storage";
   public static final String PROPERTY_PROP = "property";
   public static final String PROPERTY_PROP_PREFIX = "property.";
   public static final String PROPERTY_VALUE_PROP = "property.value";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
index f20afa7..c39b4a8 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonAdminParams.java
@@ -19,7 +19,12 @@ package org.apache.solr.common.params;
 public interface CommonAdminParams
 {
 
-  /** async or not? **/
+  /** Async or not? **/
   String ASYNC = "async";
+  /** Wait for final state of the operation. */
   String WAIT_FOR_FINAL_STATE = "waitForFinalState";
+  /** Allow in-place move of replicas that use shared filesystems. */
+  String IN_PLACE_MOVE = "inPlaceMove";
+  /** Timeout for replicas to become active. */
+  String TIMEOUT = "timeout";
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7ccf122d/solr/solrj/src/resources/apispec/collections.collection.Commands.json
----------------------------------------------------------------------
diff --git a/solr/solrj/src/resources/apispec/collections.collection.Commands.json b/solr/solrj/src/resources/apispec/collections.collection.Commands.json
index dfae2f2..0cd3644 100644
--- a/solr/solrj/src/resources/apispec/collections.collection.Commands.json
+++ b/solr/solrj/src/resources/apispec/collections.collection.Commands.json
@@ -20,7 +20,7 @@
     "move-replica": {
       "type": "object",
       "documentation": "https://lucene.apache.org/solr/guide/collections-api.html#movereplica",
-      "description": "This command moves a replica from one node to a new node. In case of
shared filesystems the `dataDir` will be reused.",
+      "description": "This command moves a replica from one node to a new node. In case of
shared filesystems the `dataDir` and `ulogDir` may be reused.",
       "properties": {
         "replica": {
           "type": "string",
@@ -32,13 +32,29 @@
         },
         "sourceNode": {
           "type": "string",
-          "description": "The name of the node that contains the replica"
+          "description": "The name of the node that contains the replica."
         },
         "targetNode": {
           "type": "string",
-          "description": "The name of the destination node. This parameter is required"
+          "description": "The name of the destination node. This parameter is required."
+        },
+        "waitForFinalState": {
+          "type": "boolean",
+          "default": "false",
+          "description": "Wait for the moved replica to become active."
+        },
+        "timeout": {
+          "type": "integer",
+          "default": 600,
+          "description": "Timeout to wait for replica to become active. For very large replicas
this may need to be increased."
+        },
+        "inPlaceMove": {
+          "type": "boolean",
+          "default": "true",
+          "description": "For replicas that use shared filesystems allow 'in-place' move
that reuses shared data."
         }
-      }
+      },
+      "required":["targetNode"]
     },
     "migrate-docs":{
       "type":"object",


Mime
View raw message