kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mpe...@apache.org
Subject [1/5] kudu git commit: [tools] ksck imp. [8/n]: Easy-to-see errors when running w/o admin
Date Tue, 29 May 2018 23:44:26 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 2f61b72ca -> e840ad7e9


[tools] ksck imp. [8/n]: Easy-to-see errors when running w/o admin

If ksck is not run as an admin user, then it cannot gather all of the
information it uses to do cluster checks. "Not authorized" errors are
easy to mistake for other errors. This patch handles these errors
specially to make it obvious to administrators when they have forgotten
to run as an administrative user.

The new errors at the end of the ksck output look like this:

==================
Errors:
==================
Not authorized: error fetching info from masters: failed to gather info from 1 of 1 masters
due to lack of admin privileges
Not authorized: error fetching info from tablet servers: failed to gather info from 11 of
12 tablet servers due to lack of admin privileges

FAILED
Not authorized: re-run ksck with administrator privileges

Change-Id: I644957103efefceeba4b82f4557376a603507423
Reviewed-on: http://gerrit.cloudera.org:8080/10300
Reviewed-by: Attila Bukor <abukor@cloudera.com>
Tested-by: Kudu Jenkins
Reviewed-by: Alexey Serbin <aserbin@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/e04fd4e4
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/e04fd4e4
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/e04fd4e4

Branch: refs/heads/master
Commit: e04fd4e4d935a062235d6f7451db0ce56773206a
Parents: 2f61b72
Author: Will Berkeley <wdberkeley@apache.org>
Authored: Thu May 24 16:10:39 2018 -0700
Committer: Will Berkeley <wdberkeley@gmail.com>
Committed: Fri May 25 21:03:59 2018 +0000

----------------------------------------------------------------------
 src/kudu/tools/ksck-test.cc    | 22 +++++++++++++++++++
 src/kudu/tools/ksck.cc         | 43 ++++++++++++++++++++++++++++++++++++-
 src/kudu/tools/ksck_results.cc | 11 ++++++++--
 src/kudu/tools/ksck_results.h  |  3 +++
 src/kudu/tools/tool.proto      |  1 +
 5 files changed, 77 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/e04fd4e4/src/kudu/tools/ksck-test.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck-test.cc b/src/kudu/tools/ksck-test.cc
index eff2609..64bca59 100644
--- a/src/kudu/tools/ksck-test.cc
+++ b/src/kudu/tools/ksck-test.cc
@@ -756,6 +756,28 @@ TEST_F(KsckTest, TestMasterUnavailable) {
   CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
 }
 
+TEST_F(KsckTest, TestUnauthorized) {
+  Status noauth = Status::RemoteError("Not authorized: unauthorized access to method");
+  shared_ptr<MockKsckMaster> master =
+      std::static_pointer_cast<MockKsckMaster>(cluster_->masters_.at(1));
+  master->fetch_info_status_ = noauth;
+  shared_ptr<MockKsckTabletServer> tserver =
+      std::static_pointer_cast<MockKsckTabletServer>(
+          cluster_->tablet_servers().begin()->second);
+  tserver->fetch_info_status_ = noauth;
+  Status s = RunKsck();
+  ASSERT_TRUE(s.IsNotAuthorized()) << s.ToString();
+  ASSERT_STR_CONTAINS(s.ToString(), "re-run ksck with administrator privileges");
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "failed to gather info from 1 of 3 "
+                      "masters due to lack of admin privileges");
+  ASSERT_STR_CONTAINS(err_stream_.str(),
+                      "failed to gather info from 1 of 3 "
+                      "tablet servers due to lack of admin privileges");
+
+  CheckJsonStringVsKsckResults(KsckResultsToJsonString(), ksck_->results());
+}
+
 // A wrong-master-uuid situation can happen if a master that is part of, e.g.,
 // a 3-peer config fails permanently and is wiped and reborn on the same address
 // with a new uuid.

http://git-wip-us.apache.org/repos/asf/kudu/blob/e04fd4e4/src/kudu/tools/ksck.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck.cc b/src/kudu/tools/ksck.cc
index 5e36f79..6b886d0 100644
--- a/src/kudu/tools/ksck.cc
+++ b/src/kudu/tools/ksck.cc
@@ -142,6 +142,11 @@ void BuildKsckConsensusStateForConfigMember(const consensus::ConsensusStatePB&
c
   }
 }
 
+bool IsNotAuthorizedMethodAccess(const Status& s) {
+  return s.IsRemoteError() &&
+         s.ToString().find("Not authorized: unauthorized access to method") != string::npos;
+}
+
 } // anonymous namespace
 
 ChecksumOptions::ChecksumOptions()
@@ -192,6 +197,7 @@ Ksck::Ksck(shared_ptr<KsckCluster> cluster, ostream* out)
 
 Status Ksck::CheckMasterHealth() {
   int bad_masters = 0;
+  int unauthorized_masters = 0;
   vector<KsckServerHealthSummary> master_summaries;
   // There shouldn't be more than 5 masters, so we'll keep it simple and gather
   // info in sequence instead of spreading it across a threadpool.
@@ -204,14 +210,28 @@ Status Ksck::CheckMasterHealth() {
     sh.address = master->address();
     sh.status = s;
     if (!s.ok()) {
+      if (IsNotAuthorizedMethodAccess(s)) {
+        sh.health = KsckServerHealth::UNAUTHORIZED;
+        unauthorized_masters++;
+      } else {
+        sh.health = KsckServerHealth::UNAVAILABLE;
+      }
       bad_masters++;
-      sh.health = KsckServerHealth::UNAVAILABLE;
     }
     master_summaries.push_back(std::move(sh));
   }
   results_.master_summaries.swap(master_summaries);
 
   int num_masters = cluster_->masters().size();
+
+  // Return a NotAuthorized status if any master has auth errors, since this
+  // indicates ksck may not be able to gather full and accurate info.
+  if (unauthorized_masters > 0) {
+    return Status::NotAuthorized(
+        Substitute("failed to gather info from $0 of $1 "
+                   "masters due to lack of admin privileges",
+                   unauthorized_masters, num_masters));
+  }
   if (bad_masters > 0) {
     return Status::NetworkError(
         Substitute("failed to gather info from all masters: $0 of $1 had errors",
@@ -283,6 +303,7 @@ Status Ksck::FetchInfoFromTabletServers() {
                 .Build(&pool));
 
   AtomicInt<int32_t> bad_servers(0);
+  AtomicInt<int32_t> unauthorized_servers(0);
   VLOG(1) << "Fetching info from all " << servers_count << " tablet servers";
 
   vector<KsckServerHealthSummary> tablet_server_summaries;
@@ -304,6 +325,10 @@ Status Ksck::FetchInfoFromTabletServers() {
           summary.address = entry.second->address();
           summary.status = s;
           if (!s.ok()) {
+            if (IsNotAuthorizedMethodAccess(s)) {
+              health = KsckServerHealth::UNAUTHORIZED;
+              unauthorized_servers.Increment();
+            }
             bad_servers.Increment();
           }
           summary.health = health;
@@ -319,6 +344,14 @@ Status Ksck::FetchInfoFromTabletServers() {
   if (bad_servers.Load() == 0) {
     return Status::OK();
   }
+  // Return a NotAuthorized status if any tablet server has auth errors, since
+  // this indicates ksck may not be able to gather full and accurate info.
+  if (unauthorized_servers.Load() > 0) {
+    return Status::NotAuthorized(
+        Substitute("failed to gather info from $0 of $1 tablet servers due "
+                   "to lack of admin privileges",
+                   unauthorized_servers.Load(), servers_count));
+  }
   return Status::NetworkError(
       Substitute("failed to gather info for all tablet servers: $0 of $1 had errors",
                  bad_servers.Load(), servers_count));
@@ -357,6 +390,14 @@ Status Ksck::Run() {
                         results_.error_messages, "checksum scan error");
   }
 
+  // Use a special-case error if there are auth errors. This makes it harder
+  // for admins to miss that ksck isn't working right because they forgot to
+  // (e.g.) sudo -u kudu when running ksck!
+  if (std::any_of(std::begin(results_.error_messages),
+                  std::end(results_.error_messages),
+                  [](const Status& s) { return s.IsNotAuthorized(); })) {
+    return Status::NotAuthorized("re-run ksck with administrator privileges");
+  }
   if (!results_.error_messages.empty()) {
     return Status::RuntimeError("ksck discovered errors");
   }

http://git-wip-us.apache.org/repos/asf/kudu/blob/e04fd4e4/src/kudu/tools/ksck_results.cc
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_results.cc b/src/kudu/tools/ksck_results.cc
index a3b01af..476db0e 100644
--- a/src/kudu/tools/ksck_results.cc
+++ b/src/kudu/tools/ksck_results.cc
@@ -153,6 +153,8 @@ const char* const ServerHealthToString(KsckServerHealth sh) {
   switch (sh) {
     case KsckServerHealth::HEALTHY:
       return "HEALTHY";
+    case KsckServerHealth::UNAUTHORIZED:
+      return "UNAUTHORIZED";
     case KsckServerHealth::UNAVAILABLE:
       return "UNAVAILABLE";
     case KsckServerHealth::WRONG_SERVER_UUID:
@@ -166,10 +168,12 @@ int ServerHealthScore(KsckServerHealth sh) {
   switch (sh) {
     case KsckServerHealth::HEALTHY:
       return 0;
-    case KsckServerHealth::UNAVAILABLE:
+    case KsckServerHealth::UNAUTHORIZED:
       return 1;
-    case KsckServerHealth::WRONG_SERVER_UUID:
+    case KsckServerHealth::UNAVAILABLE:
       return 2;
+    case KsckServerHealth::WRONG_SERVER_UUID:
+      return 3;
     default:
       LOG(FATAL) << "Unknown KsckServerHealth";
   }
@@ -447,6 +451,9 @@ void KsckServerHealthSummaryToPb(const KsckServerHealthSummary& summary,
     case KsckServerHealth::HEALTHY:
       pb->set_health(KsckServerHealthSummaryPB_ServerHealth_HEALTHY);
       break;
+    case KsckServerHealth::UNAUTHORIZED:
+      pb->set_health(KsckServerHealthSummaryPB_ServerHealth_UNAUTHORIZED);
+      break;
     case KsckServerHealth::UNAVAILABLE:
       pb->set_health(KsckServerHealthSummaryPB_ServerHealth_UNAVAILABLE);
       break;

http://git-wip-us.apache.org/repos/asf/kudu/blob/e04fd4e4/src/kudu/tools/ksck_results.h
----------------------------------------------------------------------
diff --git a/src/kudu/tools/ksck_results.h b/src/kudu/tools/ksck_results.h
index c9fba92..a7b9c02 100644
--- a/src/kudu/tools/ksck_results.h
+++ b/src/kudu/tools/ksck_results.h
@@ -119,6 +119,9 @@ enum class KsckServerHealth {
   // The server is healthy.
   HEALTHY,
 
+  // The server rejected attempts to communicate as unauthorized.
+  UNAUTHORIZED,
+
   // The server can't be contacted.
   UNAVAILABLE,
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/e04fd4e4/src/kudu/tools/tool.proto
----------------------------------------------------------------------
diff --git a/src/kudu/tools/tool.proto b/src/kudu/tools/tool.proto
index 63e3961..f293549 100644
--- a/src/kudu/tools/tool.proto
+++ b/src/kudu/tools/tool.proto
@@ -221,6 +221,7 @@ message KsckServerHealthSummaryPB {
     HEALTHY = 0;
     UNAVAILABLE = 1;
     WRONG_SERVER_UUID = 2;
+    UNAUTHORIZED = 3;
   }
   optional string uuid = 1;
   optional string address = 2;


Mime
View raw message