kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject [1/2] kudu git commit: master: do not crash on /dump-entities during startup
Date Sat, 10 Sep 2016 18:56:55 GMT
Repository: kudu
Updated Branches:
  refs/heads/master 2ecf034c8 -> 89b54c9f9


master: do not crash on /dump-entities during startup

A user reported a master crash when the following conditions were met:
1. The master tablet was enormous and took a while to bootstrap.
2. An external monitoring agent was periodically accessing the master's
   /dump-entities URL.

While fixing this, I also double checked that the rest of the master URLs do
the right thing before making calls into the master.

The majority of the patch repurposes the PeriodicWebUIChecker (from
linked_list-test) for the new integration test. I modified its URL polling
behavior so that all URLs are polled before sleeping.

Change-Id: I7d8e9caf7236d2c487fb9322cb3e1993cc631cdf
Reviewed-on: http://gerrit.cloudera.org:8080/4366
Tested-by: Kudu Jenkins
Reviewed-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c06cdced
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c06cdced
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c06cdced

Branch: refs/heads/master
Commit: c06cdcedb16c8ba0b8a8c7465bdf786cb2e68245
Parents: 2ecf034
Author: Adar Dembo <adar@cloudera.com>
Authored: Fri Sep 9 17:41:36 2016 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Sat Sep 10 18:53:47 2016 +0000

----------------------------------------------------------------------
 .../integration-tests/external_mini_cluster.cc    | 11 ++++++++++-
 .../integration-tests/linked_list-test-util.h     | 15 +++++++--------
 .../integration-tests/master_failover-itest.cc    | 18 ++++++++++++++++++
 src/kudu/master/master-path-handlers.cc           |  7 ++++++-
 4 files changed, 41 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/c06cdced/src/kudu/integration-tests/external_mini_cluster.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/external_mini_cluster.cc b/src/kudu/integration-tests/external_mini_cluster.cc
index 7a46db9..ae54b5e 100644
--- a/src/kudu/integration-tests/external_mini_cluster.cc
+++ b/src/kudu/integration-tests/external_mini_cluster.cc
@@ -71,6 +71,7 @@ static const char* const kMasterBinaryName = "kudu-master";
 static const char* const kTabletServerBinaryName = "kudu-tserver";
 static double kProcessStartTimeoutSeconds = 30.0;
 static double kTabletServerRegistrationTimeoutSeconds = 15.0;
+static double kMasterCatalogManagerTimeoutSeconds = 30.0;
 
 #if defined(__APPLE__)
 static bool kBindToUniqueLoopbackAddress = false;
@@ -869,7 +870,9 @@ Status ExternalMaster::Restart() {
 Status ExternalMaster::WaitForCatalogManager() {
   unique_ptr<MasterServiceProxy> proxy(
       new MasterServiceProxy(messenger_, bound_rpc_addr()));
-  while (true) {
+  Stopwatch sw;
+  sw.start();
+  while (sw.elapsed().wall_seconds() < kMasterCatalogManagerTimeoutSeconds) {
     ListTablesRequestPB req;
     ListTablesResponsePB resp;
     RpcController rpc;
@@ -897,6 +900,12 @@ Status ExternalMaster::WaitForCatalogManager() {
     // ready. Sleep and retry.
     SleepFor(MonoDelta::FromMilliseconds(50));
   }
+  if (sw.elapsed().wall_seconds() > kMasterCatalogManagerTimeoutSeconds) {
+    return Status::TimedOut(
+        Substitute("Timed out after $0s waiting for master ($1) startup",
+                   kMasterCatalogManagerTimeoutSeconds,
+                   bound_rpc_addr().ToString()));
+  }
   return Status::OK();
 }
 

http://git-wip-us.apache.org/repos/asf/kudu/blob/c06cdced/src/kudu/integration-tests/linked_list-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/linked_list-test-util.h b/src/kudu/integration-tests/linked_list-test-util.h
index cccc990..9b2175c 100644
--- a/src/kudu/integration-tests/linked_list-test-util.h
+++ b/src/kudu/integration-tests/linked_list-test-util.h
@@ -334,17 +334,16 @@ class PeriodicWebUIChecker {
     for (std::string url : urls_) {
       LOG(INFO) << url;
     }
-    for (int count = 0; is_running_.Load(); count++) {
-      const std::string &url = urls_[count % urls_.size()];
-      LOG(INFO) << "Curling URL " << url;
+    while (is_running_.Load()) {
+      // Poll all of the URLs.
       const MonoTime start = MonoTime::Now();
-      Status status = curl.FetchURL(url, &dst);
-      if (status.ok()) {
-        CHECK_GT(dst.length(), 0);
+      for (const auto& url : urls_) {
+        if (curl.FetchURL(url, &dst).ok()) {
+          CHECK_GT(dst.length(), 0);
+        }
       }
       // Sleep until the next period
-      const MonoTime end = MonoTime::Now();
-      const MonoDelta elapsed = end - start;
+      const MonoDelta elapsed = MonoTime::Now() - start;
       const int64_t sleep_ns = period_.ToNanoseconds() - elapsed.ToNanoseconds();
       if (sleep_ns > 0) {
         SleepFor(MonoDelta::FromNanoseconds(sleep_ns));

http://git-wip-us.apache.org/repos/asf/kudu/blob/c06cdced/src/kudu/integration-tests/master_failover-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/master_failover-itest.cc b/src/kudu/integration-tests/master_failover-itest.cc
index 5063350..104d1b4 100644
--- a/src/kudu/integration-tests/master_failover-itest.cc
+++ b/src/kudu/integration-tests/master_failover-itest.cc
@@ -29,6 +29,7 @@
 #include "kudu/gutil/strings/substitute.h"
 #include "kudu/gutil/strings/util.h"
 #include "kudu/integration-tests/external_mini_cluster.h"
+#include "kudu/integration-tests/linked_list-test-util.h"
 #include "kudu/master/sys_catalog.h"
 #include "kudu/util/metrics.h"
 #include "kudu/util/random.h"
@@ -466,5 +467,22 @@ TEST_F(MasterFailoverTest, TestMasterPermanentFailure) {
   }
 }
 
+// Tests that accessing the web UI while the master is starting doesn't cause
+// any crashes.
+TEST_F(MasterFailoverTest, TestWebUIDuringStartup) {
+  PeriodicWebUIChecker checker(
+      *cluster_.get(),
+      "doesn't matter", // it'll ping a non-existent page
+      MonoDelta::FromMilliseconds(50));
+
+  for (int i = 0; i < 5; i++) {
+    for (int j = 0; j < cluster_->num_masters(); j++) {
+      cluster_->master(j)->Shutdown();
+      ASSERT_OK(cluster_->master(j)->Restart());
+      ASSERT_OK(cluster_->master(j)->WaitForCatalogManager());
+    }
+  }
+}
+
 } // namespace client
 } // namespace kudu

http://git-wip-us.apache.org/repos/asf/kudu/blob/c06cdced/src/kudu/master/master-path-handlers.cc
----------------------------------------------------------------------
diff --git a/src/kudu/master/master-path-handlers.cc b/src/kudu/master/master-path-handlers.cc
index 87f854e..4a2eff1 100644
--- a/src/kudu/master/master-path-handlers.cc
+++ b/src/kudu/master/master-path-handlers.cc
@@ -471,6 +471,11 @@ void JsonError(const Status& s, ostringstream* out) {
 
 void MasterPathHandlers::HandleDumpEntities(const Webserver::WebRequest& req,
                                             ostringstream* output) {
+  Status s = master_->catalog_manager()->CheckOnline();
+  if (!s.ok()) {
+    JsonError(s, output);
+    return;
+  }
   JsonWriter jw(output, JsonWriter::COMPACT);
   JsonDumper d(&jw);
 
@@ -478,7 +483,7 @@ void MasterPathHandlers::HandleDumpEntities(const Webserver::WebRequest&
req,
 
   jw.String("tables");
   jw.StartArray();
-  Status s = master_->catalog_manager()->sys_catalog()->VisitTables(&d);
+  s = master_->catalog_manager()->sys_catalog()->VisitTables(&d);
   if (!s.ok()) {
     JsonError(s, output);
     return;


Mime
View raw message