kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject [4/4] kudu git commit: KUDU-526: use on-disk cmeta when loading existing master state
Date Tue, 16 Aug 2016 00:26:35 GMT
KUDU-526: use on-disk cmeta when loading existing master state

The cmeta is populated when creating all new master state; we can trust it
when that master is restarted.

Note: this is a precondition for the migration from single-node to
multi-node master deployments.

Change-Id: I5b4c6d8b6adf696973445a6f9d1314ba9de27e70
Reviewed-on: http://gerrit.cloudera.org:8080/3786
Reviewed-by: Todd Lipcon <todd@apache.org>
Tested-by: Kudu Jenkins


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/2c3fc7c2
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/2c3fc7c2
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/2c3fc7c2

Branch: refs/heads/master
Commit: 2c3fc7c27f37ba3986f1d09eb79c7d4969c31f83
Parents: 5a70799
Author: Adar Dembo <adar@cloudera.com>
Authored: Tue Jul 26 18:05:15 2016 -0700
Committer: Adar Dembo <adar@cloudera.com>
Committed: Tue Aug 16 00:21:33 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/master_failover-itest.cc  | 42 ++++++++++++++
 .../master_replication-itest.cc                 | 13 +++++
 src/kudu/master/sys_catalog.cc                  | 60 +++++++++++++-------
 src/kudu/master/sys_catalog.h                   | 11 +---
 4 files changed, 95 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/2c3fc7c2/src/kudu/integration-tests/master_failover-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_failover-itest.cc b/src/kudu/integration-tests/master_failover-itest.cc
index 1a9b1b0..b1c31f5 100644
--- a/src/kudu/integration-tests/master_failover-itest.cc
+++ b/src/kudu/integration-tests/master_failover-itest.cc
@@ -27,10 +27,14 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/integration-tests/external_mini_cluster.h"
+#include "kudu/util/metrics.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/stopwatch.h"
 #include "kudu/util/test_util.h"
 
+METRIC_DECLARE_entity(server);
+METRIC_DECLARE_histogram(handler_latency_kudu_consensus_ConsensusService_GetNodeInstance);
+
 namespace kudu {
 
 // Note: this test needs to be in the client namespace in order for
@@ -43,6 +47,7 @@ using sp::shared_ptr;
 using std::string;
 using std::vector;
 using std::unique_ptr;
+using strings::Substitute;
 
 class MasterFailoverTest : public KuduTest {
  public:
@@ -322,5 +327,42 @@ TEST_F(MasterFailoverTest, TestKUDU1374) {
     << "Deadline elapsed before alter table completed";
 }
 
+TEST_F(MasterFailoverTest, TestMasterUUIDResolution) {
+  // After a fresh start, the masters should have received RPCs asking for
+  // their UUIDs.
+  for (int i = 0; i < cluster_->num_masters(); i++) {
+    int64_t num_get_node_instances;
+    ASSERT_OK(cluster_->master(i)->GetInt64Metric(
+        &METRIC_ENTITY_server, "kudu.master",
+        &METRIC_handler_latency_kudu_consensus_ConsensusService_GetNodeInstance,
+        "total_count", &num_get_node_instances));
+
+    // Client-side timeouts may increase the number of calls beyond the raw
+    // number of masters.
+    ASSERT_GE(num_get_node_instances, cluster_->num_masters());
+  }
+
+  // Restart the masters. They should reuse one another's UUIDs from the cached
+  // consensus metadata instead of sending RPCs to discover them. See KUDU-526.
+  cluster_->Shutdown();
+  ASSERT_OK(cluster_->Restart());
+
+  // To determine whether the cached UUIDs were used, let's look at the number
+  // of GetNodeInstance() calls each master serviced. It should be zero.
+  for (int i = 0; i < cluster_->num_masters(); i++) {
+    ExternalMaster* master = cluster_->master(i);
+    int64_t num_get_node_instances;
+    ASSERT_OK(master->GetInt64Metric(
+        &METRIC_ENTITY_server, "kudu.master",
+        &METRIC_handler_latency_kudu_consensus_ConsensusService_GetNodeInstance,
+        "total_count", &num_get_node_instances));
+    EXPECT_EQ(0, num_get_node_instances) <<
+        Substitute(
+            "Following restart, master $0 has serviced $1 GetNodeInstance() calls",
+            master->bound_rpc_hostport().ToString(),
+            num_get_node_instances);
+  }
+}
+
 } // namespace client
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2c3fc7c2/src/kudu/integration-tests/master_replication-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_replication-itest.cc b/src/kudu/integration-tests/master_replication-itest.cc
index 1307014..3356736 100644
--- a/src/kudu/integration-tests/master_replication-itest.cc
+++ b/src/kudu/integration-tests/master_replication-itest.cc
@@ -262,5 +262,18 @@ TEST_F(MasterReplicationTest, TestHeartbeatAcceptedByAnyMaster) {
       MiniCluster::MatchMode::DO_NOT_MATCH_TSERVERS, &descs));
 }
 
+TEST_F(MasterReplicationTest, TestMasterPeerSetsDontMatch) {
+  // Restart one master with an additional entry in --master_addresses. The
+  // discrepancy with the on-disk list of masters should trigger a failure.
+  cluster_->mini_master(0)->Shutdown();
+  vector<uint16_t> master_rpc_ports = opts_.master_rpc_ports;
+  master_rpc_ports.push_back(55555);
+  ASSERT_OK(cluster_->mini_master(0)->StartDistributedMaster(master_rpc_ports));
+  Status s = cluster_->mini_master(0)->WaitForCatalogManagerInit();
+  SCOPED_TRACE(s.ToString());
+  ASSERT_TRUE(s.IsInvalidArgument());
+  ASSERT_STR_CONTAINS(s.ToString(), "55555")
+}
+
 } // namespace master
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/2c3fc7c2/src/kudu/master/sys_catalog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/sys_catalog.cc b/src/kudu/master/sys_catalog.cc
index 35da4b6..0658cb3 100644
--- a/src/kudu/master/sys_catalog.cc
+++ b/src/kudu/master/sys_catalog.cc
@@ -17,9 +17,12 @@
 
 #include "kudu/master/sys_catalog.h"
 
+#include <algorithm>
 #include <gflags/gflags.h>
 #include <glog/logging.h>
+#include <iterator>
 #include <memory>
+#include <set>
 
 #include "kudu/common/partial_row.h"
 #include "kudu/common/partition.h"
@@ -32,6 +35,7 @@
 #include "kudu/consensus/opid_util.h"
 #include "kudu/consensus/quorum_util.h"
 #include "kudu/fs/fs_manager.h"
+#include "kudu/gutil/strings/join.h"
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/master/catalog_manager.h"
 #include "kudu/master/master.h"
@@ -54,6 +58,7 @@ TAG_FLAG(sys_catalog_fail_during_write, unsafe);
 
 using kudu::consensus::CONSENSUS_CONFIG_COMMITTED;
 using kudu::consensus::ConsensusMetadata;
+using kudu::consensus::ConsensusStatePB;
 using kudu::consensus::RaftConfigPB;
 using kudu::consensus::RaftPeerPB;
 using kudu::log::Log;
@@ -107,27 +112,41 @@ Status SysCatalogTable::Load(FsManager *fs_manager) {
     return(Status::Corruption("Unexpected schema", metadata->schema().ToString()));
   }
 
-  // Allow for statically and explicitly assigning the consensus configuration and roles
through
-  // the master configuration on startup.
-  //
-  // TODO: The following assumptions need revisiting:
-  // 1. We always believe the local config options for who is in the consensus configuration.
-  // 2. We always want to look up all node's UUIDs on start (via RPC).
-  //    - TODO: Cache UUIDs. See KUDU-526.
   if (master_->opts().IsDistributed()) {
-    LOG(INFO) << "Configuring consensus for distributed operation...";
-
+    LOG(INFO) << "Verifying existing consensus state";
     string tablet_id = metadata->tablet_id();
     unique_ptr<ConsensusMetadata> cmeta;
     RETURN_NOT_OK_PREPEND(ConsensusMetadata::Load(fs_manager, tablet_id,
                                                   fs_manager->uuid(), &cmeta),
                           "Unable to load consensus metadata for tablet " + tablet_id);
-
-    RaftConfigPB config;
-    RETURN_NOT_OK(SetupDistributedConfig(master_->opts(), &config));
-    cmeta->set_committed_config(config);
-    RETURN_NOT_OK_PREPEND(cmeta->Flush(),
-                          "Unable to persist consensus metadata for tablet " + tablet_id);
+    ConsensusStatePB cstate = cmeta->ToConsensusStatePB(CONSENSUS_CONFIG_COMMITTED);
+    RETURN_NOT_OK(consensus::VerifyConsensusState(
+        cstate, consensus::COMMITTED_QUORUM));
+
+    // Make sure the set of masters passed in at start time matches the set in
+    // the on-disk cmeta.
+    set<string> peer_addrs_from_opts;
+    for (const auto& hp : master_->opts().master_addresses) {
+      peer_addrs_from_opts.insert(hp.ToString());
+    }
+    set<string> peer_addrs_from_disk;
+    for (const auto& p : cstate.config().peers()) {
+      HostPort hp;
+      RETURN_NOT_OK(HostPortFromPB(p.last_known_addr(), &hp));
+      peer_addrs_from_disk.insert(hp.ToString());
+    }
+    vector<string> symm_diff;
+    std::set_symmetric_difference(peer_addrs_from_opts.begin(),
+                                  peer_addrs_from_opts.end(),
+                                  peer_addrs_from_disk.begin(),
+                                  peer_addrs_from_disk.end(),
+                                  std::back_inserter(symm_diff));
+    if (!symm_diff.empty()) {
+      string msg = Substitute(
+          "on-disk and provided master lists are different: $0",
+          JoinStrings(symm_diff, " "));
+      return Status::InvalidArgument(msg);
+    }
   }
 
   RETURN_NOT_OK(SetupTablet(metadata));
@@ -157,8 +176,8 @@ Status SysCatalogTable::CreateNew(FsManager *fs_manager) {
 
   RaftConfigPB config;
   if (master_->opts().IsDistributed()) {
-    RETURN_NOT_OK_PREPEND(SetupDistributedConfig(master_->opts(), &config),
-                          "Failed to initialize distributed config");
+    RETURN_NOT_OK_PREPEND(CreateDistributedConfig(master_->opts(), &config),
+                          "Failed to create new distributed Raft config");
   } else {
     config.set_opid_index(consensus::kInvalidOpIdIndex);
     RaftPeerPB* peer = config.add_peers();
@@ -175,8 +194,8 @@ Status SysCatalogTable::CreateNew(FsManager *fs_manager) {
   return SetupTablet(metadata);
 }
 
-Status SysCatalogTable::SetupDistributedConfig(const MasterOptions& options,
-                                               RaftConfigPB* committed_config) {
+Status SysCatalogTable::CreateDistributedConfig(const MasterOptions& options,
+                                                RaftConfigPB* committed_config) {
   DCHECK(options.IsDistributed());
 
   RaftConfigPB new_config;
@@ -206,9 +225,6 @@ Status SysCatalogTable::SetupDistributedConfig(const MasterOptions&
options,
       LOG(INFO) << peer.ShortDebugString()
                 << " has no permanent_uuid. Determining permanent_uuid...";
       RaftPeerPB new_peer = peer;
-      // TODO: Use ConsensusMetadata to cache the results of these lookups so
-      // we only require RPC access to the full consensus configuration on first startup.
-      // See KUDU-526.
       RETURN_NOT_OK_PREPEND(consensus::SetPermanentUuidForRemotePeer(master_->messenger(),
                                                                      &new_peer),
                             Substitute("Unable to resolve UUID for peer $0",

http://git-wip-us.apache.org/repos/asf/kudu/blob/2c3fc7c2/src/kudu/master/sys_catalog.h
----------------------------------------------------------------------
diff --git a/src/kudu/master/sys_catalog.h b/src/kudu/master/sys_catalog.h
index efb2125..9d22ef9 100644
--- a/src/kudu/master/sys_catalog.h
+++ b/src/kudu/master/sys_catalog.h
@@ -134,15 +134,8 @@ class SysCatalogTable {
 
   // Use the master options to generate a new consensus configuration.
   // In addition, resolve all UUIDs of this consensus configuration.
-  //
-  // Note: The current node adds itself to the peers whether leader or
-  // follower, depending on whether the Master options leader flag is
-  // set. Even if the local node should be a follower, it should not be listed
-  // in the Master options followers list, as it will add itself automatically.
-  //
-  // TODO: Revisit this whole thing when integrating leader election.
-  Status SetupDistributedConfig(const MasterOptions& options,
-                                consensus::RaftConfigPB* committed_config);
+  Status CreateDistributedConfig(const MasterOptions& options,
+                                 consensus::RaftConfigPB* committed_config);
 
   const scoped_refptr<tablet::TabletPeer>& tablet_peer() const {
     return tablet_peer_;


Mime
View raw message