kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ale...@apache.org
Subject [5/5] kudu git commit: [delete_table-itest] fix flake in TestUnknownTabletsAreNotDeleted
Date Tue, 20 Mar 2018 17:46:14 GMT
[delete_table-itest] fix flake in TestUnknownTabletsAreNotDeleted

Fixed flake in the DeleteTableITest.TestUnknownTabletsAreNotDeleted.
The flake was easily reproducible under macOS.

The scenario involves removing master's data directory along with the
IPKI information.  Once the master re-generates its IPKI system records
and starts using the new TLS server certificate signed by the newly
generated CA private key, the tserver fails to verify the new master's
server certificate using the old CA certificate.

With the RPC authentication set to "optional" and 10ms tserver->master
heartbeat interval, the tserver in most cases was able to establish
a connection to the restarted master before it re-generates its IPKI
records, so no TLS certificate was used for authentication.

Change-Id: Ib6fd439c0ef5fb66b752f7f49175e4c2d818412e
Reviewed-on: http://gerrit.cloudera.org:8080/9722
Tested-by: Kudu Jenkins
Reviewed-by: Will Berkeley <wdberkeley@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/5d7b68d5
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/5d7b68d5
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/5d7b68d5

Branch: refs/heads/master
Commit: 5d7b68d59ae78560d0997168a27a73c4e5280ecc
Parents: e391e8a
Author: Alexey Serbin <aserbin@cloudera.com>
Authored: Mon Mar 19 20:04:38 2018 -0700
Committer: Alexey Serbin <aserbin@cloudera.com>
Committed: Tue Mar 20 17:43:34 2018 +0000

----------------------------------------------------------------------
 .../integration-tests/delete_table-itest.cc     | 48 ++++++++++++--------
 1 file changed, 30 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/5d7b68d5/src/kudu/integration-tests/delete_table-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/delete_table-itest.cc b/src/kudu/integration-tests/delete_table-itest.cc
index 43ed3b2..e611a01 100644
--- a/src/kudu/integration-tests/delete_table-itest.cc
+++ b/src/kudu/integration-tests/delete_table-itest.cc
@@ -1093,10 +1093,32 @@ TEST_F(DeleteTableITest, TestWebPageForTombstonedTablet) {
 }
 
 TEST_F(DeleteTableITest, TestUnknownTabletsAreNotDeleted) {
-  // Speed up heartbeating so that the unknown tablet is detected faster.
-  vector<string> extra_ts_flags = { "--heartbeat_interval_ms=10" };
+  //
+  // NOTE on disabled RPC authentication and encryption:
+  //   This test scenario would be flaky if the master/tserver authentication
+  //   were done via TLS certificates. That's because the scenario involves
+  //   removing master's data directory along with the IPKI information. Once
+  //   the master re-generates its IPKI system records and starts using the new
+  //   TLS server certificate signed by the newly generated CA private key,
+  //   the tserver fails to verify the new master's certificate using the old CA
+  //   certificate.
+  //
+  constexpr int kNumTabletServers = 1;
+  const vector<string> extra_ts_flags = {
+    // Speed up heartbeating so that the unknown tablet is detected faster.
+    "--heartbeat_interval_ms=10",
+
+    // See the note above on disabled RPC authentication and encryption.
+    "--rpc_authentication=disabled",
+    "--rpc_encryption=disabled",
+  };
+  const vector<string> extra_master_flags = {
+    // See the note above on disabled RPC authentication and encryption.
+    "--rpc_authentication=disabled",
+    "--rpc_encryption=disabled",
+  };
 
-  NO_FATALS(StartCluster(extra_ts_flags, {}, 1));
+  NO_FATALS(StartCluster(extra_ts_flags, extra_master_flags, kNumTabletServers));
 
   Schema schema(GetSimpleTestSchema());
   client::KuduSchema client_schema(client::KuduSchemaFromSchema(schema));
@@ -1113,22 +1135,12 @@ TEST_F(DeleteTableITest, TestUnknownTabletsAreNotDeleted) {
   ASSERT_OK(cluster_->master()->DeleteFromDisk());
   ASSERT_OK(cluster_->master()->Restart());
 
-  // Give the master a chance to finish writing the new master tablet to disk
-  // so that it can be found after the subsequent restart below.
-  ASSERT_OK(cluster_->master()->WaitForCatalogManager());
-
-  // The master should not delete the tablet. Let's wait for at least one
-  // heartbeat to pass.
+  // Let's wait for tablet server registration with the master: it guarantees
+  // at least one heartbeat is processed by the master.
+  ASSERT_OK(cluster_->WaitForTabletServerCount(kNumTabletServers,
+                                               MonoDelta::FromSeconds(30)));
+  // The master should not delete the tablet.
   int64_t num_delete_attempts;
-  ASSERT_EVENTUALLY([&]() {
-    int64_t num_heartbeats;
-    ASSERT_OK(GetInt64Metric(
-        cluster_->master()->bound_http_hostport(),
-        &METRIC_ENTITY_server, "kudu.master",
-        &METRIC_handler_latency_kudu_master_MasterService_TSHeartbeat, "total_count",
-        &num_heartbeats));
-    ASSERT_GE(num_heartbeats, 1);
-  });
   ASSERT_OK(GetInt64Metric(
       cluster_->tablet_server(0)->bound_http_hostport(),
       &METRIC_ENTITY_server, "kudu.tabletserver",


Mime
View raw message