kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mpe...@apache.org
Subject [3/3] kudu git commit: [tools] make kudu CLI auto-detect the replication scheme
Date Tue, 09 Jan 2018 01:40:59 GMT
[tools] make kudu CLI auto-detect the replication scheme

Added functionality to allow for automatic detection of appropriate
replica move scenario by the 'kudu tablet replica move' sub-command.

This is a follow-up for 9120cdd1d288ef6b3e03e2cd8445436712a7d4a9.

Change-Id: Ia536c1f64ef8173aa4385db8ba01581bc3528154
Reviewed-on: http://gerrit.cloudera.org:8080/8915
Tested-by: Kudu Jenkins
Reviewed-by: Mike Percy <mpercy@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e37bd1cf
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e37bd1cf
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e37bd1cf

Branch: refs/heads/master
Commit: e37bd1cf5d2ca26facf5cfba1850af2e648bbfb4
Parents: 1769eed
Author: Alexey Serbin <aserbin@cloudera.com>
Authored: Fri Dec 22 16:37:16 2017 -0800
Committer: Mike Percy <mpercy@apache.org>
Committed: Tue Jan 9 01:38:55 2018 +0000

----------------------------------------------------------------------
 src/kudu/consensus/consensus.proto   |  3 ++
 src/kudu/tools/kudu-admin-test.cc    | 12 +----
 src/kudu/tools/tool_action_tablet.cc | 88 ++++++++++++++++++-------------
 src/kudu/tserver/tablet_service.cc   |  6 +++
 4 files changed, 61 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/e37bd1cf/src/kudu/consensus/consensus.proto
----------------------------------------------------------------------
diff --git a/src/kudu/consensus/consensus.proto b/src/kudu/consensus/consensus.proto
index 1c7f56c..d2a1f8d 100644
--- a/src/kudu/consensus/consensus.proto
+++ b/src/kudu/consensus/consensus.proto
@@ -23,6 +23,7 @@ import "kudu/common/common.proto";
 import "kudu/common/wire_protocol.proto";
 import "kudu/consensus/metadata.proto";
 import "kudu/consensus/opid.proto";
+import "kudu/consensus/replica_management.proto";
 import "kudu/rpc/rpc_header.proto";
 import "kudu/tablet/metadata.proto";
 import "kudu/tablet/tablet.proto";
@@ -490,6 +491,8 @@ message GetConsensusStateResponsePB {
   }
   repeated TabletConsensusInfoPB tablets = 1;
 
+  optional ReplicaManagementInfoPB replica_management_info = 3;
+
   optional tserver.TabletServerErrorPB error = 2;
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/e37bd1cf/src/kudu/tools/kudu-admin-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/kudu-admin-test.cc b/src/kudu/tools/kudu-admin-test.cc
index bd9ac47..332750b 100644
--- a/src/kudu/tools/kudu-admin-test.cc
+++ b/src/kudu/tools/kudu-admin-test.cc
@@ -232,14 +232,12 @@ TEST_F(AdminCliTest, TestChangeConfig) {
 // 4. Using the CLI, move the 3 replicas around the 5 TS.
 // 5. Profit!
 void AdminCliTest::DoTestMoveTablet(EnableKudu1097 enable_kudu_1097) {
-  const string kKudu1097Flag = "--raft_prepare_replacement_before_eviction=true";
-
   FLAGS_num_tablet_servers = 5;
   FLAGS_num_replicas = 3;
 
   vector<string> ts_flags, master_flags;
   if (enable_kudu_1097) {
-    ts_flags = master_flags = { kKudu1097Flag };
+    ts_flags = master_flags = { "--raft_prepare_replacement_before_eviction=true" };
   }
   NO_FATALS(BuildAndStart(ts_flags, master_flags));
 
@@ -281,20 +279,12 @@ void AdminCliTest::DoTestMoveTablet(EnableKudu1097 enable_kudu_1097)
{
       "change_config",
       "move_replica",
     };
-    vector<string> kudu_1097_args = {
-      "--unlock_experimental_flags",
-      kKudu1097Flag,
-    };
     vector<string> tool_args = {
       cluster_->master()->bound_rpc_addr().ToString(),
       tablet_id_,
       remove,
       add,
     };
-    if (enable_kudu_1097 == kEnableKudu1097) {
-      // Only add these arguments if we running with Kudu 1097 enabled.
-      tool_command.insert(tool_command.end(), kudu_1097_args.begin(), kudu_1097_args.end());
-    }
     tool_command.insert(tool_command.end(), tool_args.begin(), tool_args.end());
 
     ASSERT_OK(RunKuduTool(tool_command));

http://git-wip-us.apache.org/repos/asf/kudu/blob/e37bd1cf/src/kudu/tools/tool_action_tablet.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool_action_tablet.cc b/src/kudu/tools/tool_action_tablet.cc
index c6c9209..f4f3d13 100644
--- a/src/kudu/tools/tool_action_tablet.cc
+++ b/src/kudu/tools/tool_action_tablet.cc
@@ -28,7 +28,6 @@
 
 #include <boost/optional/optional.hpp>
 #include <gflags/gflags.h>
-#include <gflags/gflags_declare.h>
 #include <glog/logging.h>
 
 #include "kudu/client/client.h"
@@ -39,6 +38,7 @@
 #include "kudu/consensus/metadata.pb.h"
 #include "kudu/consensus/opid.pb.h"
 #include "kudu/consensus/opid_util.h"
+#include "kudu/consensus/replica_management.pb.h"
 #include "kudu/gutil/basictypes.h"
 #include "kudu/gutil/map-util.h"
 #include "kudu/gutil/stl_util.h"
@@ -58,28 +58,25 @@ DEFINE_int64(move_copy_timeout_sec, 600,
              "Number of seconds to wait for tablet copy to complete when relocating a tablet");
 DEFINE_int64(move_leader_timeout_sec, 30,
              "Number of seconds to wait for a leader when relocating a leader tablet");
-DECLARE_bool(raft_prepare_replacement_before_eviction);
 
-namespace kudu {
-namespace tools {
-
-using client::KuduClient;
-using client::KuduClientBuilder;
-using client::KuduTablet;
-using client::KuduTabletServer;
-using consensus::ADD_PEER;
-using consensus::BulkChangeConfigRequestPB;
-using consensus::ChangeConfigType;
-using consensus::ConsensusServiceProxy;
-using consensus::ConsensusStatePB;
-using consensus::GetConsensusStateRequestPB;
-using consensus::GetConsensusStateResponsePB;
-using consensus::GetLastOpIdRequestPB;
-using consensus::GetLastOpIdResponsePB;
-using consensus::MODIFY_PEER;
-using consensus::OpId;
-using consensus::RaftPeerPB;
-using rpc::RpcController;
+using kudu::client::KuduClient;
+using kudu::client::KuduClientBuilder;
+using kudu::client::KuduTablet;
+using kudu::client::KuduTabletServer;
+using kudu::consensus::ADD_PEER;
+using kudu::consensus::BulkChangeConfigRequestPB;
+using kudu::consensus::ChangeConfigType;
+using kudu::consensus::ConsensusServiceProxy;
+using kudu::consensus::ConsensusStatePB;
+using kudu::consensus::GetConsensusStateRequestPB;
+using kudu::consensus::GetConsensusStateResponsePB;
+using kudu::consensus::GetLastOpIdRequestPB;
+using kudu::consensus::GetLastOpIdResponsePB;
+using kudu::consensus::MODIFY_PEER;
+using kudu::consensus::OpId;
+using kudu::consensus::RaftPeerPB;
+using kudu::consensus::ReplicaManagementInfoPB;
+using kudu::rpc::RpcController;
 using std::cout;
 using std::endl;
 using std::shared_ptr;
@@ -88,6 +85,9 @@ using std::unique_ptr;
 using std::vector;
 using strings::Substitute;
 
+namespace kudu {
+namespace tools {
+
 namespace {
 
 const char* const kReplicaTypeArg = "replica_type";
@@ -299,7 +299,8 @@ Status GetConsensusState(const unique_ptr<ConsensusServiceProxy>&
proxy,
                          const string& tablet_id,
                          const string& replica_uuid,
                          const MonoDelta& timeout,
-                         ConsensusStatePB* consensus_state) {
+                         ConsensusStatePB* consensus_state,
+                         bool* is_3_4_3_replication = nullptr) {
   GetConsensusStateRequestPB req;
   GetConsensusStateResponsePB resp;
   RpcController controller;
@@ -314,7 +315,13 @@ Status GetConsensusState(const unique_ptr<ConsensusServiceProxy>&
proxy,
     return Status::NotFound("tablet not found:", tablet_id);
   }
   DCHECK_EQ(1, resp.tablets_size());
-  *consensus_state = resp.tablets(0).cstate();
+  if (consensus_state) {
+    *consensus_state = resp.tablets(0).cstate();
+  }
+  if (is_3_4_3_replication) {
+    *is_3_4_3_replication = resp.replica_management_info().replacement_scheme() ==
+        ReplicaManagementInfoPB::PREPARE_REPLACEMENT_BEFORE_EVICTION;
+  }
   return Status::OK();
 }
 
@@ -395,9 +402,28 @@ Status MoveReplica(const RunnerContext &context) {
   RETURN_NOT_OK_PREPEND(DoKsckForTablet(master_addresses, tablet_id),
                         "ksck pre-move health check failed");
 
+  client::sp::shared_ptr<KuduClient> client;
+  RETURN_NOT_OK(KuduClientBuilder()
+                .master_server_addrs(master_addresses)
+                .Build(&client));
+
+  // Find this tablet's leader replica. We need its UUID and RPC address.
+  string leader_uuid;
+  HostPort leader_hp;
+  RETURN_NOT_OK(GetTabletLeader(client, tablet_id, &leader_uuid, &leader_hp));
+  unique_ptr<ConsensusServiceProxy> proxy;
+  RETURN_NOT_OK(BuildProxy(leader_hp.host(), leader_hp.port(), &proxy));
+
+  // Get information on current replication scheme: the move scenario depends
+  // on the replication scheme used.
+  bool is_3_4_3_replication;
+  RETURN_NOT_OK(GetConsensusState(proxy, tablet_id, leader_uuid,
+                                  client->default_admin_operation_timeout(),
+                                  nullptr, &is_3_4_3_replication));
+
   // The pre- KUDU-1097 way of moving a replica involves first adding a new
   // replica and then evicting the old one.
-  if (!FLAGS_raft_prepare_replacement_before_eviction) {
+  if (!is_3_4_3_replication) {
     RETURN_NOT_OK(DoChangeConfig(master_addresses, tablet_id, to_ts_uuid,
                                 RaftPeerPB::VOTER, consensus::ADD_PEER));
 
@@ -428,11 +454,6 @@ Status MoveReplica(const RunnerContext &context) {
   // adding the replacement as a non-voter with promote=true.
   // The following code implements tablet movement in that paradigm.
 
-  client::sp::shared_ptr<KuduClient> client;
-  RETURN_NOT_OK(KuduClientBuilder()
-                .master_server_addrs(master_addresses)
-                .Build(&client));
-
   BulkChangeConfigRequestPB bulk_req;
   {
     auto* change = bulk_req.add_config_changes();
@@ -451,13 +472,6 @@ Status MoveReplica(const RunnerContext &context) {
     RETURN_NOT_OK(HostPortToPB(hp, change->mutable_peer()->mutable_last_known_addr()));
   }
 
-  // Find this tablet's leader replica. We need its UUID and RPC address.
-  string leader_uuid;
-  HostPort leader_hp;
-  RETURN_NOT_OK(GetTabletLeader(client, tablet_id, &leader_uuid, &leader_hp));
-  unique_ptr<ConsensusServiceProxy> proxy;
-  RETURN_NOT_OK(BuildProxy(leader_hp.host(), leader_hp.port(), &proxy));
-
   BulkChangeConfigRequestPB req;
   consensus::ChangeConfigResponsePB resp;
   RpcController rpc;

http://git-wip-us.apache.org/repos/asf/kudu/blob/e37bd1cf/src/kudu/tserver/tablet_service.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tserver/tablet_service.cc b/src/kudu/tserver/tablet_service.cc
index cd264f6..1294491 100644
--- a/src/kudu/tserver/tablet_service.cc
+++ b/src/kudu/tserver/tablet_service.cc
@@ -53,6 +53,7 @@
 #include "kudu/consensus/metadata.pb.h"
 #include "kudu/consensus/opid.pb.h"
 #include "kudu/consensus/raft_consensus.h"
+#include "kudu/consensus/replica_management.pb.h"
 #include "kudu/consensus/time_manager.h"
 #include "kudu/gutil/casts.h"
 #include "kudu/gutil/macros.h"
@@ -133,6 +134,7 @@ DEFINE_int32(scanner_inject_latency_on_each_batch_ms, 0,
              "Used for tests.");
 TAG_FLAG(scanner_inject_latency_on_each_batch_ms, unsafe);
 
+DECLARE_bool(raft_prepare_replacement_before_eviction);
 DECLARE_int32(memory_limit_warn_threshold_percentage);
 DECLARE_int32(tablet_history_max_age_sec);
 
@@ -1217,6 +1219,10 @@ void ConsensusServiceImpl::GetConsensusState(const consensus::GetConsensusStateR
     tablet_info->set_tablet_id(replica->tablet_id());
     *tablet_info->mutable_cstate() = consensus->ConsensusState();
   }
+  const auto scheme = FLAGS_raft_prepare_replacement_before_eviction
+      ? consensus::ReplicaManagementInfoPB::PREPARE_REPLACEMENT_BEFORE_EVICTION
+      : consensus::ReplicaManagementInfoPB::EVICT_FIRST;
+  resp->mutable_replica_management_info()->set_replacement_scheme(scheme);
 
   context->RespondSuccess();
 }


Mime
View raw message