kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [2/5] kudu git commit: [tools] Add version info and check to ksck
Date Tue, 05 Jun 2018 23:29:03 GMT
[tools] Add version info and check to ksck

This adds a new version summary table, similar to the one on the
master web ui for tablet servers, that summarizes the versions of Kudu
running in the cluster (both masters and tablet servers). ksck will
issue a warning if there are multiple versions. The version info is
available as part of the server health summaries in the KsckResults, so
clients of the Ksck class can use this information. Version info is
plumbed into the JSON output as well.

An example table and warning:

Version Summary
    Version     |                                                  Servers
----------------+-----------------------------------------------------------------------------------------------------
 1.6.0          | master@master-0.foo.com, tserver@tserver-0.foo.com, tserver@tserver-1.foo.com,
and 1 other server(s)
 1.7.0          | master@master-1.foo.com
 1.8.0-SNAPSHOT | master@master-2.foo.com

==================
Warnings:
==================
version check error: not all servers are running the same version: 3 different versions were
seen

Change-Id: I4a2bc7138f074ab8d74f21ace2eb2b8018c53f50
Reviewed-on: http://gerrit.cloudera.org:8080/10579
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <aserbin@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/46dd4cd4
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/46dd4cd4
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/46dd4cd4

Branch: refs/heads/master
Commit: 46dd4cd4cb9a66b6b974460ef2565830481f886b
Parents: 91881e2
Author: Will Berkeley <wdberkeley@apache.org>
Authored: Fri Jun 1 14:53:28 2018 -0700
Committer: Will Berkeley <wdberkeley@gmail.com>
Committed: Tue Jun 5 01:08:52 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/ksck-test.cc    | 28 +++++++++++++++++++++++++++-
 src/kudu/tools/ksck.cc         | 37 +++++++++++++++++++++++++++++--------
 src/kudu/tools/ksck.h          | 20 ++++++++++++++++++++
 src/kudu/tools/ksck_remote.cc  |  3 +++
 src/kudu/tools/ksck_results.cc | 30 ++++++++++++++++++++++++++++++
 src/kudu/tools/ksck_results.h  |  8 ++++++++
 src/kudu/tools/tool.proto      |  1 +
 7 files changed, 118 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index 8a2e137..22d737f 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -73,6 +73,7 @@ class MockKsckMaster : public KsckMaster {
       : KsckMaster(address),
         fetch_info_status_(Status::OK()) {
     uuid_ = uuid;
+    version_ = "mock-version";
     flags_ = GetFlagsResponsePB{};
   }
 
@@ -104,6 +105,7 @@ class MockKsckMaster : public KsckMaster {
   using KsckMaster::uuid_;
   using KsckMaster::cstate_;
   using KsckMaster::flags_;
+  using KsckMaster::version_;
 };
 
 class MockKsckTabletServer : public KsckTabletServer {
@@ -113,6 +115,7 @@ class MockKsckTabletServer : public KsckTabletServer {
         fetch_info_status_(Status::OK()),
         fetch_info_health_(KsckServerHealth::HEALTHY),
         address_("<mock>") {
+    version_ = "mock-version";
     flags_ = GetFlagsResponsePB{};
   }
 
@@ -154,6 +157,7 @@ class MockKsckTabletServer : public KsckTabletServer {
   Status fetch_info_status_;
   KsckServerHealth fetch_info_health_;
   using KsckTabletServer::flags_;
+  using KsckTabletServer::version_;
 
  private:
   const string address_;
@@ -1454,7 +1458,7 @@ TEST_F(KsckTest, TestTabletFilters) {
   CreateOneSmallReplicatedTable();
 
   ksck_->set_tablet_id_filters({ "tablet-id-0", "tablet-id-1" });
-  ASSERT_OK(ksck_->RunAndPrintResults());
+  ASSERT_OK(RunKsck());
   ASSERT_STR_CONTAINS(err_stream_.str(),
       "                | Total Count\n"
       "----------------+-------------\n"
@@ -1467,5 +1471,27 @@ TEST_F(KsckTest, TestTabletFilters) {
   CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
 }
 
+TEST_F(KsckTest, TestVersionCheck) {
+  for (int i : {1, 2}) {
+    shared_ptr<MockKsckMaster> master =
+        static_pointer_cast<MockKsckMaster>(cluster_->masters_[i]);
+    master->version_ = Substitute("v$0", i);
+  }
+
+  ASSERT_OK(RunKsck());
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+      "Version Summary\n"
+      "   Version    |                                Servers\n"
+      "--------------+------------------------------------------------------------------------\n"
+      " mock-version | master@master-0, tserver@<mock>, tserver@<mock>, and 1
other server(s)\n"
+      " v1           | master@master-1\n"
+      " v2           | master@master-2");
+  ASSERT_STR_CONTAINS(err_stream_.str(), "version check error: not all servers "
+                                         "are running the same version: "
+                                         "3 different versions were seen");
+
+  CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
+}
+
 } // namespace tools
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 8357999..7f5538b 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -19,7 +19,6 @@
 
 #include <algorithm>
 #include <cstdint>
-#include <iomanip>
 #include <iostream>
 #include <iterator>
 #include <map>
@@ -41,9 +40,9 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/tablet/tablet.pb.h"
+#include "kudu/tools/color.h"
 #include "kudu/util/atomic.h"
 #include "kudu/util/blocking_queue.h"
-#include "kudu/tools/color.h"
 #include "kudu/util/countdown_latch.h"
 #include "kudu/util/locks.h"
 #include "kudu/util/monotime.h"
@@ -86,14 +85,11 @@ DEFINE_bool(consensus, true,
 
 using std::cout;
 using std::endl;
-using std::left;
-using std::map;
 using std::ostream;
 using std::ostringstream;
-using std::setw;
+using std::set;
 using std::shared_ptr;
 using std::string;
-using std::to_string;
 using std::unordered_map;
 using std::vector;
 using strings::Substitute;
@@ -208,6 +204,7 @@ Status Ksck::CheckMasterHealth() {
         });
     sh.uuid = master->uuid();
     sh.address = master->address();
+    sh.version = master->version();
     sh.status = s;
     if (!s.ok()) {
       if (IsNotAuthorizedMethodAccess(s)) {
@@ -372,8 +369,9 @@ Status Ksck::FetchInfoFromTabletServers() {
                 return Status::OK();
               });
           KsckServerHealthSummary summary;
-          summary.uuid = entry.second->uuid();
-          summary.address = entry.second->address();
+          summary.uuid = ts->uuid();
+          summary.address = ts->address();
+          summary.version = ts->version();
           summary.status = s;
           if (!s.ok()) {
             if (IsNotAuthorizedMethodAccess(s)) {
@@ -447,6 +445,8 @@ Status Ksck::Run() {
                       "error fetching info from tablet servers");
   PUSH_PREPEND_NOT_OK(CheckTabletServerUnusualFlags(), results_.warning_messages,
                       "tserver flag check error");
+  PUSH_PREPEND_NOT_OK(CheckServerVersions(), results_.warning_messages,
+                      "version check error");
 
   PUSH_PREPEND_NOT_OK(CheckTablesConsistency(), results_.error_messages,
                       "table consistency check error");
@@ -497,6 +497,27 @@ Status Ksck::CheckTabletServerUnusualFlags() {
   return Status::OK();
 }
 
+Status Ksck::CheckServerVersions() {
+  set<string> versions;
+  for (const auto& s : results_.master_summaries) {
+    if (!s.version) continue;
+    InsertIfNotPresent(&versions, *s.version);
+  }
+  for (const auto& s : results_.tserver_summaries) {
+    if (!s.version) continue;
+    InsertIfNotPresent(&versions, *s.version);
+  }
+  if (versions.size() > 1) {
+    // This status seemed to fit best even though a version mismatch isn't an
+    // error. In any case, ksck only prints the message for warnings.
+    return Status::ConfigurationError(
+        Substitute("not all servers are running the same version: "
+                   "$0 different versions were seen",
+                   versions.size()));
+  }
+  return Status::OK();
+}
+
 Status Ksck::PrintResults() {
   PrintMode mode;
   if (FLAGS_ksck_format == "plain_concise") {

http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/ksck.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.h b/src/kudu/tools/ksck.h
index 5599ee4..3857b4d 100644
--- a/src/kudu/tools/ksck.h
+++ b/src/kudu/tools/ksck.h
@@ -243,6 +243,12 @@ class KsckMaster {
     return address_;
   }
 
+  virtual const boost::optional<std::string>& version() const {
+    CHECK_NE(KsckFetchState::UNINITIALIZED, state_);
+    return version_;
+  }
+
+
   virtual const boost::optional<consensus::ConsensusStatePB> cstate() const {
     CHECK_NE(KsckFetchState::UNINITIALIZED, state_);
     return cstate_;
@@ -280,6 +286,9 @@ class KsckMaster {
   // been done, and if it succeeded or failed.
   KsckFetchState flags_state_ = KsckFetchState::UNINITIALIZED;
 
+  // May be none if fetching info from the master fails.
+  boost::optional<std::string> version_;
+
   // May be none if consensus state fetch fails.
   boost::optional<consensus::ConsensusStatePB> cstate_;
 
@@ -359,6 +368,11 @@ class KsckTabletServer {
 
   tablet::TabletStatePB ReplicaState(const std::string& tablet_id) const;
 
+  virtual const boost::optional<std::string>& version() const {
+    CHECK_NE(KsckFetchState::UNINITIALIZED, state_);
+    return version_;
+  }
+
   virtual const boost::optional<server::GetFlagsResponsePB>& flags() const {
     CHECK_NE(KsckFetchState::UNINITIALIZED, flags_state_);
     return flags_;
@@ -389,6 +403,9 @@ class KsckTabletServer {
   TabletStatusMap tablet_status_map_;
   TabletConsensusStateMap tablet_consensus_state_map_;
 
+  // May be none if fetching info from the tablet server fails.
+  boost::optional<std::string> version_;
+
   // May be none if flag fetch fails.
   boost::optional<server::GetFlagsResponsePB> flags_;
   uint64_t timestamp_;
@@ -521,6 +538,9 @@ class Ksck {
   // Must first call FetchInfoFromTabletServers().
   Status CheckTabletServerUnusualFlags();
 
+  // Check for version inconsistencies among all servers.
+  Status CheckServerVersions();
+
   // Verifies that all the tablets in all tables matching the filters have
   // enough replicas, and that each tablet's view of the tablet's consensus
   // matches every other tablet's and the master's.

http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/ksck_remote.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_remote.cc b/src/kudu/tools/ksck_remote.cc
index f908136..98b2b0a 100644
--- a/src/kudu/tools/ksck_remote.cc
+++ b/src/kudu/tools/ksck_remote.cc
@@ -60,6 +60,7 @@
 #include "kudu/util/monotime.h"
 #include "kudu/util/net/net_util.h"
 #include "kudu/util/net/sockaddr.h"
+#include "kudu/util/version_info.pb.h"
 
 DECLARE_int64(timeout_ms); // defined in tool_action_common
 DEFINE_bool(checksum_cache_blocks, false, "Should the checksum scanners cache the read blocks");
@@ -125,6 +126,7 @@ Status RemoteKsckMaster::FetchInfo() {
   rpc.set_timeout(GetDefaultTimeout());
   RETURN_NOT_OK(generic_proxy_->GetStatus(req, &resp, &rpc));
   uuid_ = resp.status().node_instance().permanent_uuid();
+  version_ = resp.status().version_info().version_string();
   state_ = KsckFetchState::FETCHED;
   return Status::OK();
 }
@@ -188,6 +190,7 @@ Status RemoteKsckTabletServer::FetchInfo(KsckServerHealth* health) {
     rpc.set_timeout(GetDefaultTimeout());
     RETURN_NOT_OK_PREPEND(generic_proxy_->GetStatus(req, &resp, &rpc),
                           "could not get status from server");
+    version_ = resp.status().version_info().version_string();
     string response_uuid = resp.status().node_instance().permanent_uuid();
     if (response_uuid != uuid()) {
       *health = KsckServerHealth::WRONG_SERVER_UUID;

http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/ksck_results.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_results.cc b/src/kudu/tools/ksck_results.cc
index 008719d..528928f 100644
--- a/src/kudu/tools/ksck_results.cc
+++ b/src/kudu/tools/ksck_results.cc
@@ -255,6 +255,10 @@ Status KsckResults::PrintTo(PrintMode mode, ostream& out) {
     out << endl;
   }
 
+  // Finally, in the "server section", print the version summary.
+  RETURN_NOT_OK(PrintVersionTable(master_summaries, tserver_summaries, out));
+  out << endl;
+
   // Then, on each tablet.
   RETURN_NOT_OK(PrintTabletSummaries(tablet_summaries, mode, out));
 
@@ -385,6 +389,29 @@ Status PrintFlagTable(KsckServerType type,
   return flags_table.PrintTo(out);
 }
 
+Status PrintVersionTable(const vector<KsckServerHealthSummary>& masters,
+                         const vector<KsckServerHealthSummary>& tservers,
+                         ostream& out) {
+  map<string, vector<string>> version_map;
+  for (const auto& s : masters) {
+    if (!s.version) continue;
+    auto& servers = LookupOrInsert(&version_map, *s.version, {});
+    servers.push_back(Substitute("master@$0", s.address));
+  }
+  for (const auto& s : tservers) {
+    if (!s.version) continue;
+    auto& servers = LookupOrInsert(&version_map, *s.version, {});
+    servers.push_back(Substitute("tserver@$0", s.address));
+  }
+  out << "Version Summary" << endl;
+  DataTable table({"Version", "Servers"});
+  int num_servers = masters.size() + tservers.size();
+  for (const auto& entry : version_map) {
+    table.AddRow({entry.first, ServerCsv(num_servers, entry.second)});
+  }
+  return table.PrintTo(out);
+}
+
 Status PrintTableSummaries(const vector<KsckTableSummary>& table_summaries,
                            ostream& out) {
   if (table_summaries.empty()) {
@@ -545,6 +572,9 @@ void KsckServerHealthSummaryToPb(const KsckServerHealthSummary& summary,
   }
   pb->set_uuid(summary.uuid);
   pb->set_address(summary.address);
+  if (summary.version) {
+    pb->set_version(*summary.version);
+  }
   pb->set_status(summary.status.ToString());
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/ksck_results.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_results.h b/src/kudu/tools/ksck_results.h
index b42d5aa..b9cad6c 100644
--- a/src/kudu/tools/ksck_results.h
+++ b/src/kudu/tools/ksck_results.h
@@ -142,6 +142,7 @@ int ServerHealthScore(KsckServerHealth sh);
 struct KsckServerHealthSummary {
   std::string uuid;
   std::string address;
+  boost::optional<std::string> version;
   KsckServerHealth health = KsckServerHealth::HEALTHY;
   Status status = Status::OK();
 };
@@ -326,6 +327,13 @@ Status PrintFlagTable(KsckServerType type,
                       const KsckFlagTagsMap& flag_tags_map,
                       std::ostream& out);
 
+// Print a summary of the Kudu versions running across all servers from which
+// information could be fetched. Servers are grouped by version to make the
+// table compact.
+Status PrintVersionTable(const std::vector<KsckServerHealthSummary>& masters,
+                         const std::vector<KsckServerHealthSummary>& tservers,
+                         std::ostream& out);
+
 // Print a formatted summary of the tables in 'table_summaries' to 'out'.
 Status PrintTableSummaries(const std::vector<KsckTableSummary>& table_summaries,
                            std::ostream& out);

http://git-wip-us.apache.org/repos/asf/kudu/blob/46dd4cd4/src/kudu/tools/tool.proto
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool.proto b/src/kudu/tools/tool.proto
index f293549..7d49cf1 100644
--- a/src/kudu/tools/tool.proto
+++ b/src/kudu/tools/tool.proto
@@ -227,6 +227,7 @@ message KsckServerHealthSummaryPB {
   optional string address = 2;
   optional ServerHealth health = 3;
   optional string status = 4;
+  optional string version = 5;
 }
 
 message KsckConsensusStatePB {


Mime
View raw message