lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a.@apache.org
Subject lucene-solr:jira/solr-11458: SOLR-11458: Use moveNormalReplica for HDFS replicas when RF==1.
Date Mon, 06 Nov 2017 19:56:25 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-11458 [created] 26c15f1f0


SOLR-11458: Use moveNormalReplica for HDFS replicas when RF==1.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/26c15f1f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/26c15f1f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/26c15f1f

Branch: refs/heads/jira/solr-11458
Commit: 26c15f1f08709d7638d92d211753a2920954fb1b
Parents: dc6119b
Author: Andrzej Bialecki <ab@apache.org>
Authored: Mon Nov 6 20:55:40 2017 +0100
Committer: Andrzej Bialecki <ab@apache.org>
Committed: Mon Nov 6 20:55:40 2017 +0100

----------------------------------------------------------------------
 .../org/apache/solr/cloud/MoveReplicaCmd.java   |  9 +++-
 .../solr/cloud/MoveReplicaHDFSFailoverTest.java | 55 ++++++++++++++++----
 .../solr/common/cloud/ClusterStateUtil.java     |  2 +-
 3 files changed, 52 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26c15f1f/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
index a2ed407..a89e8ba 100644
--- a/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/MoveReplicaCmd.java
@@ -112,9 +112,14 @@ public class MoveReplicaCmd implements Cmd{
     }
     assert slice != null;
     Object dataDir = replica.get("dataDir");
-    if (dataDir != null && dataDir.toString().startsWith("hdfs:/")) {
+    // don't move the only replica in place - if it fails we can lose data
+    boolean inPlaceMove = slice.getReplicas().size() > 1;
+    log.debug("--- in-place move allowed=" + inPlaceMove);
+    if (inPlaceMove && dataDir != null && dataDir.toString().startsWith("hdfs:/"))
{
+      log.debug("--- using moveHdfsReplica");
       moveHdfsReplica(clusterState, results, dataDir.toString(), targetNode, async, coll,
replica, slice, timeout, waitForFinalState);
     } else {
+      log.debug("--- using moveNormalReplica");
       moveNormalReplica(clusterState, results, targetNode, async, coll, replica, slice, timeout,
waitForFinalState);
     }
   }
@@ -224,7 +229,7 @@ public class MoveReplicaCmd implements Cmd{
           results.add("failure", errorString);
           return;
         } else {
-          log.debug("Replica " + watcher.getActiveReplicas() + " is active - deleting the
source...");
+          log.info("Replica " + watcher.getActiveReplicas() + " is active - deleting the
source...");
         }
       } finally {
         ocmh.zkStateReader.removeCollectionStateWatcher(coll.getName(), watcher);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26c15f1f/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSFailoverTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSFailoverTest.java b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSFailoverTest.java
index 5edae7c..6621fc4 100644
--- a/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSFailoverTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/MoveReplicaHDFSFailoverTest.java
@@ -34,6 +34,7 @@ import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.ZkConfigManager;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.util.BadHdfsThreadsFilter;
+import org.apache.solr.util.LogLevel;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -42,6 +43,7 @@ import org.junit.Test;
     BadHdfsThreadsFilter.class, // hdfs currently leaks thread(s)
     MoveReplicaHDFSTest.ForkJoinThreadsFilter.class
 })
+@LogLevel("org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.overseer=DEBUG;org.apache.solr.client.solrj.impl.SolrClientDataProvider=DEBUG;")
 public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {
   private static MiniDFSCluster dfsCluster;
 
@@ -70,12 +72,12 @@ public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {
   @Test
   public void testDataDirAndUlogAreMaintained() throws Exception {
     String coll = "movereplicatest_coll2";
-    CollectionAdminRequest.createCollection(coll, "conf1", 1, 1)
+    CollectionAdminRequest.createCollection(coll, "conf1", 1, 2)
         .setCreateNodeSet("")
         .process(cluster.getSolrClient());
     String hdfsUri = HdfsTestUtil.getURI(dfsCluster);
-    String dataDir = hdfsUri + "/dummyFolder/dataDir";
-    String ulogDir = hdfsUri + "/dummyFolder2/ulogDir";
+    String dataDir = hdfsUri + "/dummyFolder11/dataDir";
+    String ulogDir = hdfsUri + "/dummyFolder12/ulogDir";
     CollectionAdminResponse res = CollectionAdminRequest
         .addReplicaToShard(coll, "shard1")
         .setDataDir(dataDir)
@@ -83,6 +85,15 @@ public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {
         .setNode(cluster.getJettySolrRunner(0).getNodeName())
         .process(cluster.getSolrClient());
 
+    String dataDir2 = hdfsUri + "/dummyFolder21/dataDir";
+    String ulogDir2 = hdfsUri + "/dummyFolder22/ulogDir";
+    res = CollectionAdminRequest
+        .addReplicaToShard(coll, "shard1")
+        .setDataDir(dataDir2)
+        .setUlogDir(ulogDir2)
+        .setNode(cluster.getJettySolrRunner(0).getNodeName())
+        .process(cluster.getSolrClient());
+
     ulogDir += "/tlog";
     ZkStateReader zkStateReader = cluster.getSolrClient().getZkStateReader();
     assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000));
@@ -96,8 +107,15 @@ public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {
         .process(cluster.getSolrClient());
     assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000));
     docCollection = zkStateReader.getClusterState().getCollection(coll);
-    assertEquals(1, docCollection.getSlice("shard1").getReplicas().size());
-    Replica newReplica = docCollection.getReplicas().iterator().next();
+    assertEquals(2, docCollection.getSlice("shard1").getReplicas().size());
+    Replica newReplica = null;
+    for (Replica r : docCollection.getReplicas()) {
+      if (r.getCoreName().equals(replica.getCoreName())) {
+        newReplica = r;
+        break;
+      }
+    }
+    assertNotNull(newReplica);
     assertEquals(newReplica.getNodeName(), cluster.getJettySolrRunner(1).getNodeName());
     assertTrue(newReplica.getStr("ulogDir"), newReplica.getStr("ulogDir").equals(ulogDir)
|| newReplica.getStr("ulogDir").equals(ulogDir+'/'));
     assertTrue(newReplica.getStr("dataDir"),newReplica.getStr("dataDir").equals(dataDir)
|| newReplica.getStr("dataDir").equals(dataDir+'/'));
@@ -112,14 +130,27 @@ public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {
     Thread.sleep(5000);
     new CollectionAdminRequest.MoveReplica(coll, newReplica.getName(), cluster.getJettySolrRunner(0).getNodeName())
         .process(cluster.getSolrClient());
-    assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000));
+    boolean active = ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000);
+    if (!active) {
+      fail("Time out waiting for all replicas to become active: " + zkStateReader.getClusterState().getCollection(coll));
+    }
 
     // assert that the old core will be removed on startup
     cluster.getJettySolrRunner(1).start();
-    assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000));
+    active = ClusterStateUtil.waitForAllActiveAndLiveReplicas(zkStateReader, 120000);
+    if (!active) {
+      fail("Time out waiting for all replicas to become active: " + zkStateReader.getClusterState().getCollection(coll));
+    }
     docCollection = zkStateReader.getClusterState().getCollection(coll);
-    assertEquals(1, docCollection.getReplicas().size());
-    newReplica = docCollection.getReplicas().iterator().next();
+    assertEquals(2, docCollection.getReplicas().size());
+    newReplica = null;
+    for (Replica r : docCollection.getReplicas()) {
+      if (r.getCoreName().equals(replica.getCoreName())) {
+        newReplica = r;
+        break;
+      }
+    }
+    assertNotNull(newReplica);
     assertEquals(newReplica.getNodeName(), cluster.getJettySolrRunner(0).getNodeName());
     assertTrue(newReplica.getStr("ulogDir"), newReplica.getStr("ulogDir").equals(ulogDir)
|| newReplica.getStr("ulogDir").equals(ulogDir+'/'));
     assertTrue(newReplica.getStr("dataDir"),newReplica.getStr("dataDir").equals(dataDir)
|| newReplica.getStr("dataDir").equals(dataDir+'/'));
@@ -144,9 +175,11 @@ public class MoveReplicaHDFSFailoverTest extends SolrCloudTestCase {
     assertTrue(ClusterStateUtil.waitForAllReplicasNotLive(cluster.getSolrClient().getZkStateReader(),
20000));
 
     // move replica from node0 -> node1
-    new CollectionAdminRequest.MoveReplica(coll, replica.getName(), cluster.getJettySolrRunner(1).getNodeName())
-        .process(cluster.getSolrClient());
+    CollectionAdminRequest.MoveReplica moveReq = new CollectionAdminRequest.MoveReplica(coll,
replica.getName(), cluster.getJettySolrRunner(1).getNodeName());
+    moveReq.setWaitForFinalState(true);
+    moveReq.process(cluster.getSolrClient());
     assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(),
20000));
+    assertEquals(2, cluster.getSolrClient().query(coll, new SolrQuery("*:*")).getResults().getNumFound());
 
     cluster.getJettySolrRunners().get(1).stop();
     assertTrue(ClusterStateUtil.waitForAllReplicasNotLive(cluster.getSolrClient().getZkStateReader(),
20000));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/26c15f1f/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java
index 0910868..fad46e6 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ClusterStateUtil.java
@@ -100,7 +100,7 @@ public class ClusterStateUtil {
         }
       }
     }
-    
+
     return success;
   }
   


Mime
View raw message