kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject kudu git commit: [java-client] Re-enable multi-master tests
Date Fri, 05 Aug 2016 22:09:42 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 7a08b25a0 -> 9c82e61f5


[java-client] Re-enable multi-master tests

This patch makes TestMasterFailover useful again. It also adds the killing of masters
to ITClient. Finally, it sets the raft heartbeat lower so that we don't wait 1.5s for
leader elections.

TestGetMasterRegistrationReceived was added since a previous version of this patch
encountered a bug that such a simple unit test can detect.

Change-Id: Ia1051222738c84ef3d3e1a33b4981bc9454b7972
Reviewed-on: http://gerrit.cloudera.org:8080/3654
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/9c82e61f
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/9c82e61f
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/9c82e61f

Branch: refs/heads/master
Commit: 9c82e61f58cfd88579b9c3f83b4dd2419978afdf
Parents: 7a08b25
Author: Jean-Daniel Cryans <jdcryans@apache.org>
Authored: Thu Jul 14 12:40:39 2016 -0700
Committer: Adar Dembo <adar@cloudera.com>
Committed: Fri Aug 5 22:09:24 2016 +0000

----------------------------------------------------------------------
 .../client/GetMasterRegistrationReceived.java   |  57 +++--
 .../org/apache/kudu/client/BaseKuduTest.java    |   2 +-
 .../java/org/apache/kudu/client/ITClient.java   |  11 +-
 .../org/apache/kudu/client/MiniKuduCluster.java |   3 +-
 .../TestGetMasterRegistrationReceived.java      | 207 +++++++++++++++++++
 .../apache/kudu/client/TestMasterFailover.java  |   6 +-
 6 files changed, 252 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/9c82e61f/java/kudu-client/src/main/java/org/apache/kudu/client/GetMasterRegistrationReceived.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/main/java/org/apache/kudu/client/GetMasterRegistrationReceived.java
b/java/kudu-client/src/main/java/org/apache/kudu/client/GetMasterRegistrationReceived.java
index ad4d0bb..3f2c745 100644
--- a/java/kudu-client/src/main/java/org/apache/kudu/client/GetMasterRegistrationReceived.java
+++ b/java/kudu-client/src/main/java/org/apache/kudu/client/GetMasterRegistrationReceived.java
@@ -108,38 +108,53 @@ final class GetMasterRegistrationReceived {
   private void incrementCountAndCheckExhausted() {
     if (countResponsesReceived.incrementAndGet() == numMasters) {
       if (responseDCalled.compareAndSet(false, true)) {
+
+        // We want `allUnrecoverable` to only be true if all the masters came back with
+        // NonRecoverableException so that we know for sure we can't retry anymore. Just
one master
+        // that replies with RecoverableException or with an ok response but is a FOLLOWER
is
+        // enough to keep us retrying.
         boolean allUnrecoverable = true;
-        for (Exception ex : exceptionsReceived) {
-          if (!(ex instanceof NonRecoverableException)) {
-            allUnrecoverable = false;
-            break;
+        if (exceptionsReceived.size() == countResponsesReceived.get()) {
+          for (Exception ex : exceptionsReceived) {
+            if (!(ex instanceof NonRecoverableException)) {
+              allUnrecoverable = false;
+              break;
+            }
           }
+        } else {
+          allUnrecoverable = false;
         }
+
         String allHosts = NetUtil.hostsAndPortsToString(masterAddrs);
-        // Doing a negative check because allUnrecoverable stays true if there are no exceptions.
-        if (!allUnrecoverable) {
-          String message = "Master config (" + allHosts + ") has no leader.";
+        if (allUnrecoverable) {
+          // This will stop retries.
+          String msg = String.format("Couldn't find a valid master in (%s). " +
+              "Exceptions received: %s", allHosts,
+              Joiner.on(",").join(Lists.transform(
+                  exceptionsReceived, Functions.toStringFunction())));
+          Status s = Status.ServiceUnavailable(msg);
+          responseD.callback(new NonRecoverableException(s));
+        } else {
+          String message = String.format("Master config (%s) has no leader.",
+              allHosts);
           Exception ex;
           if (exceptionsReceived.isEmpty()) {
-            LOG.warn("None of the provided masters (" + allHosts + ") is a leader, will retry.");
+            LOG.warn(String.format(
+                "None of the provided masters (%s) is a leader, will retry.",
+                allHosts));
             ex = new NoLeaderMasterFoundException(Status.ServiceUnavailable(message));
           } else {
-            LOG.warn("Unable to find the leader master (" + allHosts + "), will retry");
-            String joinedMsg = message + ". Exceptions received: " +
-                Joiner.on(",").join(
-                    Lists.transform(exceptionsReceived, Functions.toStringFunction()));
-            Status statusServiceUnavailable = Status.ServiceUnavailable(joinedMsg);
-            ex = new NoLeaderMasterFoundException(
-                statusServiceUnavailable,
+            LOG.warn(String.format(
+                "Unable to find the leader master (%s), will retry",
+                allHosts));
+            String joinedMsg = message + " Exceptions received: " +
+                Joiner.on(",").join(Lists.transform(
+                    exceptionsReceived, Functions.toStringFunction()));
+            Status s = Status.ServiceUnavailable(joinedMsg);
+            ex = new NoLeaderMasterFoundException(s,
                 exceptionsReceived.get(exceptionsReceived.size() - 1));
           }
           responseD.callback(ex);
-        } else {
-          Status statusConfigurationError = Status.ConfigurationError(
-              "Couldn't find a valid master in (" + allHosts +
-                  "), exceptions: " + exceptionsReceived);
-          // This will stop retries.
-          responseD.callback(new NonRecoverableException(statusConfigurationError));
         }
       }
     }

http://git-wip-us.apache.org/repos/asf/kudu/blob/9c82e61f/java/kudu-client/src/test/java/org/apache/kudu/client/BaseKuduTest.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/BaseKuduTest.java b/java/kudu-client/src/test/java/org/apache/kudu/client/BaseKuduTest.java
index fbc8b09..8f092a1 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/BaseKuduTest.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/BaseKuduTest.java
@@ -46,7 +46,7 @@ public class BaseKuduTest {
 
   private static final String NUM_MASTERS_PROP = "NUM_MASTERS";
   private static final int NUM_TABLET_SERVERS = 3;
-  private static final int DEFAULT_NUM_MASTERS = 1;
+  private static final int DEFAULT_NUM_MASTERS = 3;
 
   // Number of masters that will be started for this test if we're starting
   // a cluster.

http://git-wip-us.apache.org/repos/asf/kudu/blob/9c82e61f/java/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java b/java/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java
index e582e2f..10cf231 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/ITClient.java
@@ -146,15 +146,14 @@ public class ITClient extends BaseKuduTest {
       while (KEEP_RUNNING_LATCH.getCount() > 0) {
         try {
           boolean shouldContinue;
-          if (System.currentTimeMillis() % 2 == 0) {
+          int randomInt = random.nextInt(3);
+          if (randomInt == 0) {
             shouldContinue = restartTS();
-          } else {
-
+          } else if (randomInt == 1) {
             shouldContinue = disconnectNode();
+          } else {
+            shouldContinue = restartMaster();
           }
-          // TODO restarting the master currently finds more bugs. Also, adding it to the
list makes
-          // it necessary to find a new weighing mechanism betweent he different chaos options.
-          // shouldContinue = restartMaster();
 
           if (!shouldContinue) {
             return;

http://git-wip-us.apache.org/repos/asf/kudu/blob/9c82e61f/java/kudu-client/src/test/java/org/apache/kudu/client/MiniKuduCluster.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/MiniKuduCluster.java b/java/kudu-client/src/test/java/org/apache/kudu/client/MiniKuduCluster.java
index 49663b6..3cfff34 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/MiniKuduCluster.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/MiniKuduCluster.java
@@ -195,7 +195,8 @@ public class MiniKuduCluster implements AutoCloseable {
           "--webserver_interface=" + localhost,
           "--local_ip_for_outbound_sockets=" + localhost,
           "--rpc_bind_addresses=" + localhost + ":" + port,
-          "--webserver_port=" + masterWebPorts.get(i));
+          "--webserver_port=" + masterWebPorts.get(i),
+          "--raft_heartbeat_interval_ms=200"); // make leader elections faster for faster
tests
       if (numMasters > 1) {
         masterCmdLine.add("--master_addresses=" + masterAddresses);
       }

http://git-wip-us.apache.org/repos/asf/kudu/blob/9c82e61f/java/kudu-client/src/test/java/org/apache/kudu/client/TestGetMasterRegistrationReceived.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestGetMasterRegistrationReceived.java
b/java/kudu-client/src/test/java/org/apache/kudu/client/TestGetMasterRegistrationReceived.java
new file mode 100644
index 0000000..8570668
--- /dev/null
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestGetMasterRegistrationReceived.java
@@ -0,0 +1,207 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.kudu.client;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.net.HostAndPort;
+import com.stumbleupon.async.Callback;
+import com.stumbleupon.async.Deferred;
+import org.junit.Test;
+import org.apache.kudu.WireProtocol;
+import org.apache.kudu.consensus.Metadata;
+import org.apache.kudu.master.Master;
+
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.fail;
+import static org.apache.kudu.consensus.Metadata.RaftPeerPB.Role.FOLLOWER;
+import static org.apache.kudu.consensus.Metadata.RaftPeerPB.Role.LEADER;
+
+public class TestGetMasterRegistrationReceived {
+
+  private static final List<HostAndPort> MASTERS = ImmutableList.of(
+      HostAndPort.fromParts("0", 9000),
+      HostAndPort.fromParts("1", 9000),
+      HostAndPort.fromParts("2", 9000));
+
+  @Test(timeout = 10000)
+  public void test() throws Exception {
+    NonRecoverableException reusableNRE = new NonRecoverableException(
+        Status.RuntimeError(""));
+    RecoverableException reusableRE = new RecoverableException(
+        Status.RuntimeError(""));
+    NoLeaderMasterFoundException retryResponse =
+        new NoLeaderMasterFoundException(Status.RuntimeError(""));
+    // We don't test for a particular good response, so as long as we pass something that's
not an
+    // exception to runTest() we're good.
+    Object successResponse = new Object();
+
+    // Success cases.
+
+    // Normal case.
+    runTest(
+        makeGMRR(LEADER),
+        makeGMRR(FOLLOWER),
+        makeGMRR(FOLLOWER),
+        successResponse);
+
+    // Permutation works too.
+    runTest(
+        makeGMRR(FOLLOWER),
+        makeGMRR(LEADER),
+        makeGMRR(FOLLOWER),
+        successResponse);
+
+    // Multiple leaders, that's fine since it might be a TOCTOU situation, or one master
+    // is confused. Raft handles this if the client then tries to do something that requires
a
+    // replication on the master-side.
+    runTest(
+        makeGMRR(LEADER),
+        makeGMRR(LEADER),
+        makeGMRR(FOLLOWER),
+        successResponse);
+
+    // Mixed bag, still works because there's a leader.
+    runTest(
+        reusableNRE,
+        makeGMRR(FOLLOWER),
+        makeGMRR(LEADER),
+        successResponse);
+
+    // All unreachable except one leader, still good.
+    runTest(
+        reusableNRE,
+        reusableNRE,
+        makeGMRR(LEADER),
+        successResponse);
+
+    // Permutation of the previous.
+    runTest(
+        reusableNRE,
+        makeGMRR(LEADER),
+        reusableNRE,
+        successResponse);
+
+    // Retry cases.
+
+    // Just followers means we retry.
+    runTest(
+        makeGMRR(FOLLOWER),
+        makeGMRR(FOLLOWER),
+        makeGMRR(FOLLOWER),
+        retryResponse);
+
+    // One NRE but we have responsive masters, retry.
+    runTest(
+        makeGMRR(FOLLOWER),
+        makeGMRR(FOLLOWER),
+        reusableNRE,
+        retryResponse);
+
+    // One good master but no leader, retry.
+    runTest(
+        reusableNRE,
+        makeGMRR(FOLLOWER),
+        reusableNRE,
+        retryResponse);
+
+    // Different case but same outcome.
+    runTest(
+        reusableRE,
+        reusableNRE,
+        makeGMRR(FOLLOWER),
+        retryResponse);
+
+    // All recoverable means retry.
+    runTest(
+        reusableRE,
+        reusableRE,
+        reusableRE,
+        retryResponse);
+
+    // Just one recoverable still means retry.
+    runTest(
+        reusableRE,
+        reusableNRE,
+        reusableNRE,
+        retryResponse);
+
+    // Failure case.
+
+    // Can't recover anything, give up.
+    runTest(
+        reusableNRE,
+        reusableNRE,
+        reusableNRE,
+        reusableNRE);
+  }
+
+  private void runTest(Object response0,
+                       Object response1,
+                       Object response2,
+                       Object expectedResponse) throws Exception {
+
+    // Here we basically do what AsyncKuduClient would do, add all the callbacks and then
we also
+    // add the responses. We then check for the right response.
+
+    Deferred<Master.GetTableLocationsResponsePB> d = new Deferred<>();
+
+    GetMasterRegistrationReceived grrm = new GetMasterRegistrationReceived(MASTERS, d);
+
+    Callback<Void, GetMasterRegistrationResponse> cb0 = grrm.callbackForNode(MASTERS.get(0));
+    Callback<Void, GetMasterRegistrationResponse> cb1 = grrm.callbackForNode(MASTERS.get(1));
+    Callback<Void, GetMasterRegistrationResponse> cb2 = grrm.callbackForNode(MASTERS.get(2));
+
+    Callback<Void, Exception> eb0 = grrm.errbackForNode(MASTERS.get(0));
+    Callback<Void, Exception> eb1 = grrm.errbackForNode(MASTERS.get(1));
+    Callback<Void, Exception> eb2 = grrm.errbackForNode(MASTERS.get(2));
+
+    callTheRightCallback(cb0, eb0, response0);
+    callTheRightCallback(cb1, eb1, response1);
+    callTheRightCallback(cb2, eb2, response2);
+
+    try {
+      d.join(); // Don't care about the response.
+      if ((expectedResponse instanceof Exception)) {
+        fail("Should not work " + expectedResponse.getClass());
+      } else {
+        // ok
+      }
+    } catch (Exception ex) {
+      assertEquals(expectedResponse.getClass(), ex.getClass());
+    }
+  }
+
+  // Helper method that determines if the callback or errback should be called.
+  private static void callTheRightCallback(
+      Callback<Void, GetMasterRegistrationResponse> cb,
+      Callback<Void, Exception> eb,
+      Object response) throws Exception {
+    if (response instanceof Exception) {
+      eb.call((Exception) response);
+    } else {
+      cb.call((GetMasterRegistrationResponse) response);
+    }
+  }
+
+  private static GetMasterRegistrationResponse makeGMRR(Metadata.RaftPeerPB.Role role) {
+    return new GetMasterRegistrationResponse(0, "", role, null,
+        WireProtocol.NodeInstancePB.getDefaultInstance());
+  }
+}

http://git-wip-us.apache.org/repos/asf/kudu/blob/9c82e61f/java/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java
----------------------------------------------------------------------
diff --git a/java/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java
b/java/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java
index 9adc36e..7cc2413 100644
--- a/java/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java
+++ b/java/kudu-client/src/test/java/org/apache/kudu/client/TestMasterFailover.java
@@ -39,11 +39,7 @@ public class TestMasterFailover extends BaseKuduTest {
     createTable(TABLE_NAME, basicSchema, getBasicCreateTableOptions());
   }
 
-  /**
-   * This test is disabled as we're not supporting multi-master just yet.
-   */
   @Test(timeout = 30000)
-  @Ignore
   public void testKillLeader() throws Exception {
     int countMasters = masterHostPorts.size();
     if (countMasters < 3) {
@@ -59,7 +55,7 @@ public class TestMasterFailover extends BaseKuduTest {
 
     // Test that we can create a new table when one of the masters is down.
     String newTableName = TABLE_NAME + "-afterLeaderIsDead";
-    createTable(newTableName, basicSchema, new CreateTableOptions());
+    createTable(newTableName, basicSchema, getBasicCreateTableOptions());
     table = openTable(newTableName);
     assertEquals(0, countRowsInScan(client.newScannerBuilder(table).build()));
 


Mime
View raw message