kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ale...@apache.org
Subject kudu git commit: [master] add feature flag for 3-4-3 scheme
Date Fri, 09 Mar 2018 22:38:35 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 0ba702c20 -> 92486c83d


[master] add feature flag for 3-4-3 scheme

This changelist addresses the case of misconfiguration of the
replica management scheme between masters and tablet servers
in the case when a master of version < 1.7 happen to work with
tablet servers of version >= 1.7.

Also, added a new run-time flag --heartbeat_incompatibility_is_fatal
to control whether the replica management scheme misconfiguration
should be treated as a fatal error for a tablet server.

This is a follow-up for 1769eed53ee2c21a88a766cb72bf8c9ae622099d.

Change-Id: I0e34a6fffc375720b26c2641e88057975d9190b9
Reviewed-on: http://gerrit.cloudera.org:8080/9565
Tested-by: Kudu Jenkins
Reviewed-by: Dan Burkert <danburkert@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/92486c83
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/92486c83
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/92486c83

Branch: refs/heads/master
Commit: 92486c83d5663108ba6ffb63a081fa55ba38bb03
Parents: 0ba702c
Author: Alexey Serbin <aserbin@cloudera.com>
Authored: Thu Mar 8 18:44:26 2018 -0800
Committer: Alexey Serbin <aserbin@cloudera.com>
Committed: Fri Mar 9 22:37:07 2018 +0000

----------------------------------------------------------------------
 src/kudu/consensus/replica_management.proto     |  5 +-
 .../raft_consensus_nonvoter-itest.cc            | 31 +++++++-
 src/kudu/master/master.proto                    | 11 ++-
 src/kudu/master/master_service.cc               | 11 ++-
 src/kudu/tserver/heartbeater.cc                 | 82 ++++++++++++++++----
 5 files changed, 110 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/92486c83/src/kudu/consensus/replica_management.proto
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/replica_management.proto b/src/kudu/consensus/replica_management.proto
index b9a1ee1..6a01673 100644
--- a/src/kudu/consensus/replica_management.proto
+++ b/src/kudu/consensus/replica_management.proto
@@ -27,11 +27,12 @@ message ReplicaManagementInfoPB {
     UNKNOWN = 999;
 
     // The leader replica evicts the failed replica first, and then the new
-    // voter replica is added.
+    // voter replica is added (a.k.a. '3-2-3' replica management scheme).
     EVICT_FIRST = 0;
 
     // Add a new non-voter replica, promote the replica to voter once it
-    // caught up with the leader, and only after that evict the failed replica.
+    // caught up with the leader, and only after that evict the failed replica
+    // (a.k.a. '3-4-3' replica managment scheme).
     PREPARE_REPLACEMENT_BEFORE_EVICTION = 1;
   }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/92486c83/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
index 8e991a4..8e1d08c 100644
--- a/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
+++ b/src/kudu/integration-tests/raft_consensus_nonvoter-itest.cc
@@ -1841,17 +1841,19 @@ TEST_F(RaftConsensusNonVoterITest, NonVoterReplicasInConsensusQueue)
{
 // Also, it makes sure the tablet server crashes to signal the misconfiguration.
 class IncompatibleReplicaReplacementSchemesITest :
     public RaftConsensusNonVoterITest,
-    public ::testing::WithParamInterface<bool> {
+    public ::testing::WithParamInterface<std::tuple<bool, bool>> {
 };
 INSTANTIATE_TEST_CASE_P(, IncompatibleReplicaReplacementSchemesITest,
-                        ::testing::Bool());
+                        ::testing::Combine(::testing::Bool(),
+                                           ::testing::Bool()));
 TEST_P(IncompatibleReplicaReplacementSchemesITest, MasterAndTserverMisconfig) {
   FLAGS_num_tablet_servers = 1;
   FLAGS_num_replicas = 1;
   const MonoDelta kTimeout = MonoDelta::FromSeconds(60);
   const int64_t heartbeat_interval_ms = 500;
 
-  const bool is_3_4_3 = GetParam();
+  const bool is_incompatible_replica_management_fatal = std::get<0>(GetParam());
+  const bool is_3_4_3 = std::get<1>(GetParam());
 
   // The easiest way to have everything setup is to start the cluster with
   // compatible parameters.
@@ -1859,6 +1861,8 @@ TEST_P(IncompatibleReplicaReplacementSchemesITest, MasterAndTserverMisconfig)
{
     Substitute("--raft_prepare_replacement_before_eviction=$0", is_3_4_3),
   };
   const vector<string> kTsFlags = {
+    Substitute("--heartbeat_incompatible_replica_management_is_fatal=$0",
+        is_incompatible_replica_management_fatal),
     Substitute("--heartbeat_interval_ms=$0", heartbeat_interval_ms),
     Substitute("--raft_prepare_replacement_before_eviction=$0", is_3_4_3),
   };
@@ -1883,14 +1887,33 @@ TEST_P(IncompatibleReplicaReplacementSchemesITest, MasterAndTserverMisconfig)
{
   ts->mutable_flags()->push_back(
       Substitute("--raft_prepare_replacement_before_eviction=$0", !is_3_4_3));
   ASSERT_OK(ts->Restart());
+  if (is_incompatible_replica_management_fatal) {
+    ASSERT_OK(ts->WaitForFatal(kTimeout));
+  }
+  SleepFor(MonoDelta::FromMilliseconds(heartbeat_interval_ms * 3));
 
-  ASSERT_OK(ts->WaitForFatal(kTimeout));
+  // The tablet server should not be registered with the master.
+  ASSERT_OK(itest::ListTabletServers(cluster_->master_proxy(), kTimeout, &tservers));
+  ASSERT_EQ(0, tservers.size());
 
+  // Inject feature flag not supported by the master and make sure the tablet
+  // server will not be registered with incompatible master.
+  ts->mutable_flags()->pop_back();
+  ts->mutable_flags()->push_back("--heartbeat_inject_required_feature_flag=999");
+  ts->Shutdown();
+  ASSERT_OK(ts->Restart());
+  if (is_incompatible_replica_management_fatal) {
+    ASSERT_OK(ts->WaitForFatal(kTimeout));
+  }
   SleepFor(MonoDelta::FromMilliseconds(heartbeat_interval_ms * 3));
 
   // The tablet server should not be registered with the master.
   ASSERT_OK(itest::ListTabletServers(cluster_->master_proxy(), kTimeout, &tservers));
   ASSERT_EQ(0, tservers.size());
+
+  if (!is_incompatible_replica_management_fatal) {
+    NO_FATALS(cluster_->AssertNoCrashes());
+  }
 }
 
 }  // namespace tserver

http://git-wip-us.apache.org/repos/asf/kudu/blob/92486c83/src/kudu/master/master.proto
----------------------------------------------------------------------
diff --git a/src/kudu/master/master.proto b/src/kudu/master/master.proto
index 048489f..9af60a4 100644
--- a/src/kudu/master/master.proto
+++ b/src/kudu/master/master.proto
@@ -75,9 +75,9 @@ message MasterErrorPB {
     // The number of replicas requested is illegal (eg non-positive).
     ILLEGAL_REPLICATION_FACTOR = 11;
 
-    // The caller detected an application-level incompatibility with the callee.
-    // This might be caused by a misconfiguration, incompatible features, etc.
-    INCOMPATIBILITY = 12;
+    // The callee detected that its replica management scheme is incompatible
+    // with the caller's scheme.
+    INCOMPATIBLE_REPLICA_MANAGEMENT = 12;
   }
 
   // The error code.
@@ -710,6 +710,11 @@ enum MasterFeatures {
   ADD_DROP_RANGE_PARTITIONS = 2;
   // The master supports the 'ConnectToMaster' RPC.
   CONNECT_TO_MASTER = 3;
+  // The catalog manager supports different replica management schemes
+  // (see "kudu/consensus/replica_management.proto"). At least, both the
+  // EVICT_FIRST (a.k.a. 3-2-3) and the PREPARE_REPLACEMENT_BEFORE_EVICTION
+  // (a.k.a. 3-4-3) schemes.
+  REPLICA_MANAGEMENT = 4;
 }
 
 service MasterService {

http://git-wip-us.apache.org/repos/asf/kudu/blob/92486c83/src/kudu/master/master_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master_service.cc b/src/kudu/master/master_service.cc
index 5229448..219f804 100644
--- a/src/kudu/master/master_service.cc
+++ b/src/kudu/master/master_service.cc
@@ -176,7 +176,7 @@ void MasterServiceImpl::TSHeartbeat(const TSHeartbeatRequestPB* req,
 
       auto* error = resp->mutable_error();
       StatusToPB(s, error->mutable_status());
-      error->set_code(MasterErrorPB::INCOMPATIBILITY);
+      error->set_code(MasterErrorPB::INCOMPATIBLE_REPLICA_MANAGEMENT);
 
       // Yes, this is confusing: the RPC result is success, but the response
       // contains an application-level error.
@@ -513,11 +513,14 @@ void MasterServiceImpl::ConnectToMaster(const ConnectToMasterRequestPB*
/*req*/,
 
 bool MasterServiceImpl::SupportsFeature(uint32_t feature) const {
   switch (feature) {
-    case MasterFeatures::RANGE_PARTITION_BOUNDS:
-    case MasterFeatures::ADD_DROP_RANGE_PARTITIONS: return true;
+    case MasterFeatures::RANGE_PARTITION_BOUNDS:    FALLTHROUGH_INTENDED;
+    case MasterFeatures::ADD_DROP_RANGE_PARTITIONS: FALLTHROUGH_INTENDED;
+    case MasterFeatures::REPLICA_MANAGEMENT:
+      return true;
     case MasterFeatures::CONNECT_TO_MASTER:
       return FLAGS_master_support_connect_to_master_rpc;
-    default: return false;
+    default:
+      return false;
   }
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/92486c83/src/kudu/tserver/heartbeater.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/heartbeater.cc b/src/kudu/tserver/heartbeater.cc
index 24117c6..e2657fb 100644
--- a/src/kudu/tserver/heartbeater.cc
+++ b/src/kudu/tserver/heartbeater.cc
@@ -43,6 +43,7 @@
 #include "kudu/master/master.pb.h"
 #include "kudu/master/master.proxy.h"
 #include "kudu/rpc/rpc_controller.h"
+#include "kudu/rpc/rpc_header.pb.h"
 #include "kudu/security/cert.h"
 #include "kudu/security/openssl_util.h"
 #include "kudu/security/tls_context.h"
@@ -56,6 +57,7 @@
 #include "kudu/util/condition_variable.h"
 #include "kudu/util/flag_tags.h"
 #include "kudu/util/locks.h"
+#include "kudu/util/logging.h"
 #include "kudu/util/monotime.h"
 #include "kudu/util/mutex.h"
 #include "kudu/util/net/net_util.h"
@@ -86,12 +88,27 @@ DEFINE_int32(heartbeat_inject_latency_before_heartbeat_ms, 0,
 TAG_FLAG(heartbeat_inject_latency_before_heartbeat_ms, runtime);
 TAG_FLAG(heartbeat_inject_latency_before_heartbeat_ms, unsafe);
 
+DEFINE_bool(heartbeat_incompatible_replica_management_is_fatal, true,
+            "Whether incompatible replica management schemes or unsupported "
+            "PREPARE_REPLACEMENT_BEFORE_EVICTION feature flag by master are fatal");
+TAG_FLAG(heartbeat_incompatible_replica_management_is_fatal, advanced);
+TAG_FLAG(heartbeat_incompatible_replica_management_is_fatal, runtime);
+
+DEFINE_uint32(heartbeat_inject_required_feature_flag, 0,
+              "Feature flag to inject while sending heartbeat to master "
+              "(for testing only)");
+TAG_FLAG(heartbeat_inject_required_feature_flag, runtime);
+TAG_FLAG(heartbeat_inject_required_feature_flag, unsafe);
+
 DECLARE_bool(raft_prepare_replacement_before_eviction);
 
+using kudu::consensus::ReplicaManagementInfoPB;
 using kudu::master::MasterErrorPB;
+using kudu::master::MasterFeatures;
 using kudu::master::MasterServiceProxy;
 using kudu::master::TabletReportPB;
 using kudu::pb_util::SecureDebugString;
+using kudu::rpc::ErrorStatusPB;
 using kudu::rpc::RpcController;
 using std::shared_ptr;
 using std::string;
@@ -150,7 +167,7 @@ class Heartbeater::Thread {
   Status ConnectToMaster();
   int GetMinimumHeartbeatMillis() const;
   int GetMillisUntilNextHeartbeat() const;
-  Status DoHeartbeat(MasterErrorPB* error = nullptr);
+  Status DoHeartbeat(MasterErrorPB* error, ErrorStatusPB* error_status);
   Status SetupRegistration(ServerRegistrationPB* reg);
   void SetupCommonField(master::TSToMasterCommonPB* common);
   bool IsCurrentThread() const;
@@ -379,7 +396,8 @@ int Heartbeater::Thread::GetMillisUntilNextHeartbeat() const {
   return FLAGS_heartbeat_interval_ms;
 }
 
-Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* error) {
+Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* error,
+                                        ErrorStatusPB* error_status) {
   if (PREDICT_FALSE(server_->fail_heartbeats_for_tests())) {
     return Status::IOError("failing all heartbeats for tests");
   }
@@ -399,8 +417,10 @@ Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* error) {
     DCHECK(proxy_);
   }
 
-  master::TSHeartbeatRequestPB req;
+  RpcController rpc;
+  rpc.set_timeout(MonoDelta::FromMilliseconds(FLAGS_heartbeat_rpc_timeout_ms));
 
+  master::TSHeartbeatRequestPB req;
   SetupCommonField(req.mutable_common());
   if (last_hb_response_.needs_reregister()) {
     LOG(INFO) << "Registering TS with master...";
@@ -410,8 +430,24 @@ Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* error) {
     // scheme the tablet server is running with.
     auto* info = req.mutable_replica_management_info();
     info->set_replacement_scheme(FLAGS_raft_prepare_replacement_before_eviction
-        ? consensus::ReplicaManagementInfoPB::PREPARE_REPLACEMENT_BEFORE_EVICTION
-        : consensus::ReplicaManagementInfoPB::EVICT_FIRST);
+        ? ReplicaManagementInfoPB::PREPARE_REPLACEMENT_BEFORE_EVICTION
+        : ReplicaManagementInfoPB::EVICT_FIRST);
+    if (info->replacement_scheme() != ReplicaManagementInfoPB::EVICT_FIRST) {
+      // It's necessary to have both the tablet server and the masters to run
+      // with the same replica replacement scheme. Otherwise the system cannot
+      // re-create tablet replicas consistently. If this tablet server is running
+      // with the newer PREPARE_REPLACEMENT_BEFORE_EVICTION (a.k.a. 3-4-3) scheme,
+      // a master of an older version might not recognise the mismatch because
+      // it doesn't know about the 'replica_management_info' field (otherwise it
+      // would respond with the INCOMPATIBLE_REPLICA_MANAGEMENT error code
+      // when mismatch detected). To address that, tablet servers rely on the
+      // dedicated feature flag to enforce the consistency of replica management
+      // schemes.
+      rpc.RequireServerFeature(MasterFeatures::REPLICA_MANAGEMENT);
+    }
+    if (PREDICT_FALSE(FLAGS_heartbeat_inject_required_feature_flag != 0)) {
+      rpc.RequireServerFeature(FLAGS_heartbeat_inject_required_feature_flag);
+    }
   }
 
   // Check with the TS cert manager if it has a cert that needs signing.
@@ -447,18 +483,18 @@ Status Heartbeater::Thread::DoHeartbeat(MasterErrorPB* error) {
   }
   req.set_num_live_tablets(server_->tablet_manager()->GetNumLiveTablets());
 
-  RpcController rpc;
-  rpc.set_timeout(MonoDelta::FromMilliseconds(FLAGS_heartbeat_rpc_timeout_ms));
-
   VLOG(2) << "Sending heartbeat:\n" << SecureDebugString(req);
   master::TSHeartbeatResponsePB resp;
-  RETURN_NOT_OK_PREPEND(proxy_->TSHeartbeat(req, &resp, &rpc),
-                        "Failed to send heartbeat to master");
-  if (resp.has_error()) {
-    if (error) {
-      error->Swap(resp.mutable_error());
+  const auto& s = proxy_->TSHeartbeat(req, &resp, &rpc);
+  if (!s.ok()) {
+    if (rpc.error_response()) {
+      error_status->CopyFrom(*rpc.error_response());
     }
-    return StatusFromPB(error ? error->status() : resp.error().status());
+    RETURN_NOT_OK_PREPEND(s, "Failed to send heartbeat to master");
+  }
+  if (resp.has_error()) {
+    error->Swap(resp.mutable_error());
+    return StatusFromPB(error->status());
   }
 
   VLOG(2) << Substitute("Received heartbeat response from $0:\n$1",
@@ -548,7 +584,8 @@ void Heartbeater::Thread::RunThread() {
     }
 
     MasterErrorPB error;
-    const auto& s = DoHeartbeat(&error);
+    ErrorStatusPB error_status;
+    const auto& s = DoHeartbeat(&error, &error_status);
     if (!s.ok()) {
       const auto& err_msg = s.ToString();
       LOG(WARNING) << Substitute("Failed to heartbeat to $0: $1",
@@ -560,8 +597,19 @@ void Heartbeater::Thread::RunThread() {
           consecutive_failed_heartbeats_ >= FLAGS_heartbeat_max_failures_before_backoff)
{
         proxy_.reset();
       }
-      if (error.has_code() && error.code() == MasterErrorPB::INCOMPATIBILITY) {
-        LOG(FATAL) << "master detected incompatibility: " << err_msg;
+      string msg;
+      if (error.has_code() &&
+          error.code() == MasterErrorPB::INCOMPATIBLE_REPLICA_MANAGEMENT) {
+        msg = Substitute("master detected incompatibility: $0", err_msg);
+      }
+      if (s.IsRemoteError() && error_status.unsupported_feature_flags_size() >
0) {
+        msg = Substitute("master does not support required feature flags: $0", err_msg);
+      }
+      if (!msg.empty()) {
+        if (FLAGS_heartbeat_incompatible_replica_management_is_fatal) {
+          LOG(FATAL) << msg;
+        }
+        KLOG_EVERY_N_SECS(ERROR, 60) << msg;
       }
       continue;
     }


Mime
View raw message