kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [7/9] kudu git commit: KUDU-1374: send full tablet report when new leader master is detected
Date Mon, 01 Aug 2016 22:11:44 GMT
KUDU-1374: send full tablet report when new leader master is detected

This should help prevent missed tablet reports in very specific edge cases,
detailed in the bug report.

The new integration test fails 100% of the time without the change, and
passes 100% of the time with it.

Change-Id: Ic16fc46736476dba39616e79ecfe79eee48b3d7f
Reviewed-on: http://gerrit.cloudera.org:8080/3643
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/d648141c
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/d648141c
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/d648141c

Branch: refs/heads/master
Commit: d648141c86b5f1a8e8cf7a678c345c0ccf17a253
Parents: fb2022c
Author: Adar Dembo <adar@cloudera.com>
Authored: Tue Jul 12 18:35:00 2016 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Mon Aug 1 21:16:42 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/master_failover-itest.cc  | 62 +++++++++++++++++++-
 src/kudu/master/catalog_manager.cc              | 11 ++++
 src/kudu/tserver/heartbeater.cc                 | 31 ++++++++--
 3 files changed, 97 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/d648141c/src/kudu/integration-tests/master_failover-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_failover-itest.cc b/src/kudu/integration-tests/master_failover-itest.cc
index 95866be..5e9219b 100644
--- a/src/kudu/integration-tests/master_failover-itest.cc
+++ b/src/kudu/integration-tests/master_failover-itest.cc
@@ -17,6 +17,7 @@
 
 #include <glog/logging.h>
 #include <gtest/gtest.h>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -41,6 +42,7 @@ const int kNumTabletServerReplicas = 3;
 using sp::shared_ptr;
 using std::string;
 using std::vector;
+using std::unique_ptr;
 
 class MasterFailoverTest : public KuduTest {
  public:
@@ -105,7 +107,7 @@ class MasterFailoverTest : public KuduTest {
     b.AddColumn("int_val")->Type(KuduColumnSchema::INT32)->NotNull();
     b.AddColumn("string_val")->Type(KuduColumnSchema::STRING)->NotNull();
     CHECK_OK(b.Build(&schema));
-    gscoped_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
+    unique_ptr<KuduTableCreator> table_creator(client_->NewTableCreator());
     return table_creator->table_name(table_name)
         .schema(&schema)
         .set_range_partition_columns({ "key" })
@@ -114,7 +116,7 @@ class MasterFailoverTest : public KuduTest {
   }
 
   Status RenameTable(const std::string& table_name_orig, const std::string& table_name_new)
{
-    gscoped_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(table_name_orig));
+    unique_ptr<KuduTableAlterer> table_alterer(client_->NewTableAlterer(table_name_orig));
     return table_alterer
       ->RenameTo(table_name_new)
       ->wait(true)
@@ -141,7 +143,7 @@ class MasterFailoverTest : public KuduTest {
  protected:
   int num_masters_;
   ExternalMiniClusterOptions opts_;
-  gscoped_ptr<ExternalMiniCluster> cluster_;
+  unique_ptr<ExternalMiniCluster> cluster_;
   shared_ptr<KuduClient> client_;
 };
 
@@ -266,5 +268,59 @@ TEST_F(MasterFailoverTest, TestRenameTableSync) {
   ASSERT_TRUE(s.IsNotFound());
 }
 
+
+TEST_F(MasterFailoverTest, TestKUDU1374) {
+  const char* kTableName = "testKUDU1374";
+
+  // Wait at least one additional heartbeat interval after creating the table.
+  // The idea is to guarantee that all tservers sent a tablet report with the
+  // new (dirty) tablet, and should now be sending empty incremental tablet
+  // reports.
+  ASSERT_OK(CreateTable(kTableName, kWaitForCreate));
+  SleepFor(MonoDelta::FromMilliseconds(1000));
+
+  // Force all asynchronous RPCs sent by the leader master to fail. This
+  // means the subsequent AlterTable() will be hidden from the tservers,
+  // at least while this master is leader.
+  int leader_idx;
+  ASSERT_OK(cluster_->GetLeaderMasterIndex(&leader_idx));
+  ExternalDaemon* leader_master = cluster_->master(leader_idx);
+  ASSERT_OK(cluster_->SetFlag(leader_master,
+                              "catalog_manager_fail_ts_rpcs", "true"));
+
+  unique_ptr<KuduTableAlterer> alter(client_->NewTableAlterer(kTableName));
+  alter->AddColumn("new_col")->Type(KuduColumnSchema::INT32);
+  ASSERT_OK(alter
+    ->wait(false)
+    ->Alter());
+
+  leader_master->Shutdown();
+
+  // Wait for the AlterTable() to finish. Progress is as follows:
+  // 1. TS notices the change in leadership and sends a full tablet report.
+  // 2. Leader master notices that the reported tablet isn't fully altered
+  //    and sends the TS an AlterSchema() RPC.
+  // 3. TS updates the tablet's schema. This also dirties the tablet.
+  // 4. TS send an incremental tablet report containing the dirty tablet.
+  // 5. Leader master sees that all tablets in the table now have the new
+  //    schema and ends the AlterTable() operation.
+  // 6. The next IsAlterTableInProgress() call returns false.
+  MonoTime deadline = MonoTime::Now(MonoTime::FINE);
+  deadline.AddDelta(MonoDelta::FromSeconds(90));
+  MonoTime now = MonoTime::Now(MonoTime::FINE);
+  while (now.ComesBefore(deadline)) {
+    bool in_progress;
+    ASSERT_OK(client_->IsAlterTableInProgress(kTableName, &in_progress));
+    if (!in_progress) {
+      break;
+    }
+
+    SleepFor(MonoDelta::FromMilliseconds(100));
+    now = MonoTime::Now(MonoTime::FINE);
+  }
+  ASSERT_TRUE(now.ComesBefore(deadline))
+    << "Deadline elapsed before alter table completed";
+}
+
 } // namespace client
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/d648141c/src/kudu/master/catalog_manager.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/catalog_manager.cc b/src/kudu/master/catalog_manager.cc
index f807661..9cd0364 100644
--- a/src/kudu/master/catalog_manager.cc
+++ b/src/kudu/master/catalog_manager.cc
@@ -153,6 +153,11 @@ DEFINE_int32(table_locations_ttl_ms, 60 * 60 * 1000, // 1 hour
              "until after waiting for the ttl period.");
 TAG_FLAG(table_locations_ttl_ms, advanced);
 
+DEFINE_bool(catalog_manager_fail_ts_rpcs, false,
+            "Whether all master->TS async calls should fail. Only for testing!");
+TAG_FLAG(catalog_manager_fail_ts_rpcs, hidden);
+TAG_FLAG(catalog_manager_fail_ts_rpcs, runtime);
+
 using std::pair;
 using std::shared_ptr;
 using std::string;
@@ -2147,6 +2152,12 @@ class RetryingTSRpcTask : public MonitoredTask {
 
   // Send the subclass RPC request.
   Status Run() {
+    if (PREDICT_FALSE(FLAGS_catalog_manager_fail_ts_rpcs)) {
+      MarkFailed();
+      UnregisterAsyncTask(); // May delete this.
+      return Status::RuntimeError("Async RPCs configured to fail");
+    }
+
     Status s = ResetTSProxy();
     if (!s.ok()) {
       LOG(WARNING) << "Unable to reset TS proxy: " << s.ToString();

http://git-wip-us.apache.org/repos/asf/kudu/blob/d648141c/src/kudu/tserver/heartbeater.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/heartbeater.cc b/src/kudu/tserver/heartbeater.cc
index 05c5a83..166c91b 100644
--- a/src/kudu/tserver/heartbeater.cc
+++ b/src/kudu/tserver/heartbeater.cc
@@ -172,6 +172,10 @@ class Heartbeater::Thread {
   bool should_run_;
   bool heartbeat_asap_;
 
+  // Indicates that the thread should send a full tablet report. Set when
+  // the thread detects that the master has been elected leader.
+  bool send_full_tablet_report_;
+
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
 
@@ -270,7 +274,8 @@ Heartbeater::Thread::Thread(const HostPort& master_address, TabletServer*
server
     next_report_seq_(0),
     cond_(&mutex_),
     should_run_(false),
-    heartbeat_asap_(true) {
+    heartbeat_asap_(true),
+    send_full_tablet_report_(false) {
 }
 
 Status Heartbeater::Thread::ConnectToMaster() {
@@ -358,9 +363,18 @@ Status Heartbeater::Thread::DoHeartbeat() {
                           "Unable to set up registration");
   }
 
-  if (last_hb_response_.needs_full_tablet_report()) {
-    LOG(INFO) << Substitute("Sending a full tablet report to master $0...",
-                            master_address_.ToString());
+  if (send_full_tablet_report_) {
+    LOG(INFO) << Substitute(
+        "Master $0 was elected leader, sending a full tablet report...",
+        master_address_.ToString());
+    GenerateFullTabletReport(req.mutable_tablet_report());
+    // Should the heartbeat fail, we'd want the next heartbeat to resend this
+    // full tablet report. As such, send_full_tablet_report_ is only reset
+    // after all error checking is complete.
+  } else if (last_hb_response_.needs_full_tablet_report()) {
+    LOG(INFO) << Substitute(
+        "Master $0 requested a full tablet report, sending...",
+        master_address_.ToString());
     GenerateFullTabletReport(req.mutable_tablet_report());
   } else {
     VLOG(2) << Substitute("Sending an incremental tablet report to master $0...",
@@ -382,6 +396,15 @@ Status Heartbeater::Thread::DoHeartbeat() {
 
   VLOG(2) << Substitute("Received heartbeat response from $0:\n$1",
                         master_address_.ToString(), resp.DebugString());
+
+  // If we've detected that our master was elected leader, send a full tablet
+  // report in the next heartbeat.
+  if (!last_hb_response_.leader_master() && resp.leader_master()) {
+    send_full_tablet_report_ = true;
+  } else {
+    send_full_tablet_report_ = false;
+  }
+
   last_hb_response_.Swap(&resp);
 
   MarkTabletReportAcknowledged(req.tablet_report());


Mime
View raw message