kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jdcry...@apache.org
Subject [1/4] incubator-kudu git commit: KUDU-1278 - Add a way to inject latency into remote bootstrap client and a failing test
Date Sat, 06 Feb 2016 18:42:00 GMT
Repository: incubator-kudu
Updated Branches:
  refs/heads/branch-0.7.0 a2bb870c3 -> eef13645f


KUDU-1278 - Add a way to inject latency into remote bootstrap client and a failing test

This adds a way to inject latency into the remote bootstrapping client and a
failing test which reproduces the bug documented in KUDU-1278.

Change-Id: I5921a0fea65e47fd906ee64b5979dfacf652c97d
Reviewed-on: http://gerrit.cloudera.org:8080/2079
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Kudu Jenkins
(cherry picked from commit 4a9e2ca0713741bbb7edc8925dadb8b36d8d5560)
Reviewed-on: http://gerrit.cloudera.org:8080/2080
Reviewed-by: Jean-Daniel Cryans
Tested-by: Jean-Daniel Cryans


Project: http://git-wip-us.apache.org/repos/asf/incubator-kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kudu/commit/bc074b39
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kudu/tree/bc074b39
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kudu/diff/bc074b39

Branch: refs/heads/branch-0.7.0
Commit: bc074b397650a26caf0b967ff4ce58337b56cf2d
Parents: a2bb870
Author: David Alves <david.alves@cloudera.com>
Authored: Fri Feb 5 14:41:15 2016 -0800
Committer: Jean-Daniel Cryans <jdcryans@gerrit.cloudera.org>
Committed: Fri Feb 5 23:33:34 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/remote_bootstrap-itest.cc | 60 ++++++++++++++++++++
 src/kudu/tserver/remote_bootstrap_client.cc     | 11 ++++
 2 files changed, 71 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/bc074b39/src/kudu/integration-tests/remote_bootstrap-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/remote_bootstrap-itest.cc b/src/kudu/integration-tests/remote_bootstrap-itest.cc
index 914e140..97df129 100644
--- a/src/kudu/integration-tests/remote_bootstrap-itest.cc
+++ b/src/kudu/integration-tests/remote_bootstrap-itest.cc
@@ -683,4 +683,64 @@ TEST_F(RemoteBootstrapITest, TestDisableRemoteBootstrap_NoTightLoopWhenTabletDel
   EXPECT_LT(num_logs_per_second, 20);
 }
 
+// Test that if a remote bootstrap is taking a long time but the client peer is still responsive,
+// the leader won't mark it as failed.
+TEST_F(RemoteBootstrapITest, DISABLED_TestSlowBootstrapDoesntFail) {
+  MonoDelta timeout = MonoDelta::FromSeconds(10);
+  vector<string> ts_flags, master_flags;
+  ts_flags.push_back("--enable_leader_failure_detection=false");
+  ts_flags.push_back("--remote_bootstrap_dowload_file_inject_latency_ms=5000");
+  ts_flags.push_back("--follower_unavailable_considered_failed_sec=2");
+  master_flags.push_back("--catalog_manager_wait_for_new_tablets_to_elect_leader=false");
+  NO_FATALS(StartCluster(ts_flags, master_flags));
+
+  TestWorkload workload(cluster_.get());
+  // TODO(KUDU-1322): the client should handle retrying on different replicas
+  // if the tablet isn't found, rather than giving us this error.
+  workload.set_not_found_allowed(true);
+  workload.set_write_batch_size(1);
+  workload.Setup();
+
+  // Figure out the tablet id of the created tablet.
+  vector<ListTabletsResponsePB::StatusAndSchemaPB> tablets;
+  ExternalTabletServer* replica_ets = cluster_->tablet_server(1);
+  TServerDetails* replica_ts = ts_map_[replica_ets->uuid()];
+  ASSERT_OK(WaitForNumTabletsOnTS(replica_ts, 1, timeout, &tablets));
+  string tablet_id = tablets[0].tablet_status().tablet_id();
+
+  // Wait until all replicas are up and running.
+  for (int i = 0; i < cluster_->num_tablet_servers(); i++) {
+    ASSERT_OK(itest::WaitUntilTabletRunning(ts_map_[cluster_->tablet_server(i)->uuid()],
+                                            tablet_id, timeout));
+  }
+
+  // Elect a leader (TS 0)
+  ExternalTabletServer* leader_ts = cluster_->tablet_server(0);
+  ASSERT_OK(itest::StartElection(ts_map_[leader_ts->uuid()], tablet_id, timeout));
+
+  // Start writing, wait for some rows to be inserted.
+  workload.Start();
+  while (workload.rows_inserted() < 100) {
+    SleepFor(MonoDelta::FromMilliseconds(10));
+  }
+
+
+  // Tombstone the follower.
+  LOG(INFO) << "Tombstoning follower tablet " << tablet_id << " on TS "
<< replica_ts->uuid();
+  ASSERT_OK(itest::DeleteTablet(replica_ts, tablet_id, TABLET_DATA_TOMBSTONED, boost::none,
+                                timeout));
+
+  // Wait for remote bootstrap to start.
+  ASSERT_OK(inspect_->WaitForTabletDataStateOnTS(1, tablet_id,
+                                                 tablet::TABLET_DATA_COPYING, timeout));
+
+  workload.StopAndJoin();
+  ASSERT_OK(WaitForServersToAgree(timeout, ts_map_, tablet_id, 1));
+
+  ClusterVerifier v(cluster_.get());
+  NO_FATALS(v.CheckCluster());
+  NO_FATALS(v.CheckRowCount(workload.table_name(), ClusterVerifier::AT_LEAST,
+                            workload.rows_inserted()));
+}
+
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/incubator-kudu/blob/bc074b39/src/kudu/tserver/remote_bootstrap_client.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/remote_bootstrap_client.cc b/src/kudu/tserver/remote_bootstrap_client.cc
index bac717e..4e00a06 100644
--- a/src/kudu/tserver/remote_bootstrap_client.cc
+++ b/src/kudu/tserver/remote_bootstrap_client.cc
@@ -56,6 +56,11 @@ TAG_FLAG(remote_bootstrap_save_downloaded_metadata, advanced);
 TAG_FLAG(remote_bootstrap_save_downloaded_metadata, hidden);
 TAG_FLAG(remote_bootstrap_save_downloaded_metadata, runtime);
 
+DEFINE_int32(remote_bootstrap_dowload_file_inject_latency_ms, 0,
+             "Injects latency into the loop that downloads files, causing remote bootstrap
"
+             "to take much longer. For use in tests only.");
+TAG_FLAG(remote_bootstrap_dowload_file_inject_latency_ms, hidden);
+
 // RETURN_NOT_OK_PREPEND() with a remote-error unwinding step.
 #define RETURN_NOT_OK_UNWIND_PREPEND(status, controller, msg) \
   RETURN_NOT_OK_PREPEND(UnwindRemoteError(status, controller), msg)
@@ -518,6 +523,12 @@ Status RemoteBootstrapClient::DownloadFile(const DataIdPB& data_id,
     // Write the data.
     RETURN_NOT_OK(appendable->Append(resp.chunk().data()));
 
+    if (PREDICT_FALSE(FLAGS_remote_bootstrap_dowload_file_inject_latency_ms > 0)) {
+      LOG_WITH_PREFIX(INFO) << "Injecting latency into file download: " <<
+          FLAGS_remote_bootstrap_dowload_file_inject_latency_ms;
+      SleepFor(MonoDelta::FromMilliseconds(FLAGS_remote_bootstrap_dowload_file_inject_latency_ms));
+    }
+
     if (offset + resp.chunk().data().size() == resp.chunk().total_data_length()) {
       done = true;
     }


Mime
View raw message