kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From a...@apache.org
Subject kudu git commit: webserver-stress-itest: fix flakiness
Date Mon, 26 Feb 2018 20:00:43 GMT
Repository: kudu
Updated Branches:
  refs/heads/master f3382a9b8 -> c04981d81


webserver-stress-itest: fix flakiness

This fixes a source of flakiness I found on the flaky dashboard. In some runs
of this test, we'd hit the following interleaving:

- we start the master with webserver_port=0 and it picks some port (eg 35000)
- we stop the master
- the curl threads are still running, and one of them picks port 35000 as the
  local side of its TCP connection. It then tries to connect to 35000 and hits
  the dreaded "tcp loop connect" phenomenon[1] in which it actually connects
  to _itself_. Thus it just hangs there occupying the port
- we try to start the master again, and it fails to bind
- we now time out trying to Join() on the curl thread, which is waiting forever
  for itself to respond to an HTTP request.

The fix is to use non-ephemeral ports for the webserver as we already do
for the RPC server. I additionally added timeouts to the curl calls.

[1] http://www.rampa.sk/static/tcpLoopConnect.html

Change-Id: If754d7f47a4c9c04bae3e9ef31acad801dd4db9b
Reviewed-on: http://gerrit.cloudera.org:8080/9414
Tested-by: Kudu Jenkins
Reviewed-by: Adar Dembo <adar@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/c04981d8
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/c04981d8
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/c04981d8

Branch: refs/heads/master
Commit: c04981d81d547819416155f2295e1210d9d7c39a
Parents: f3382a9
Author: Todd Lipcon <todd@apache.org>
Authored: Thu Feb 22 19:28:53 2018 -0800
Committer: Adar Dembo <adar@cloudera.com>
Committed: Mon Feb 26 20:00:20 2018 +0000

----------------------------------------------------------------------
 src/kudu/integration-tests/linked_list-test-util.h   |  6 +++++-
 src/kudu/integration-tests/webserver-stress-itest.cc | 15 +++++++++++++++
 src/kudu/util/curl_util.cc                           |  6 +++++-
 src/kudu/util/curl_util.h                            |  7 +++++++
 4 files changed, 32 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/c04981d8/src/kudu/integration-tests/linked_list-test-util.h
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/linked_list-test-util.h b/src/kudu/integration-tests/linked_list-test-util.h
index 3f873ed..ba04ea6 100644
--- a/src/kudu/integration-tests/linked_list-test-util.h
+++ b/src/kudu/integration-tests/linked_list-test-util.h
@@ -363,6 +363,8 @@ class PeriodicWebUIChecker {
  private:
   void CheckThread() {
     EasyCurl curl;
+    // Set some timeout so that if the page deadlocks, we fail the test.
+    curl.set_timeout(MonoDelta::FromSeconds(120));
     faststring dst;
     LOG(INFO) << "Curl thread will poll the following URLs every " << period_.ToMilliseconds()
         << " ms: ";
@@ -373,9 +375,11 @@ class PeriodicWebUIChecker {
       // Poll all of the URLs.
       const MonoTime start = MonoTime::Now();
       for (const auto& url : urls_) {
-        if (curl.FetchURL(url, &dst).ok()) {
+        Status s = curl.FetchURL(url, &dst);
+        if (s.ok()) {
           CHECK_GT(dst.length(), 0);
         }
+        CHECK(!s.IsTimedOut()) << "timed out fetching url " << url;
       }
       // Sleep until the next period
       const MonoDelta elapsed = MonoTime::Now() - start;

http://git-wip-us.apache.org/repos/asf/kudu/blob/c04981d8/src/kudu/integration-tests/webserver-stress-itest.cc
----------------------------------------------------------------------
diff --git a/src/kudu/integration-tests/webserver-stress-itest.cc b/src/kudu/integration-tests/webserver-stress-itest.cc
index 08ed172..51e4083 100644
--- a/src/kudu/integration-tests/webserver-stress-itest.cc
+++ b/src/kudu/integration-tests/webserver-stress-itest.cc
@@ -47,7 +47,22 @@ TEST_F(KuduTest, TestWebUIDoesNotCrashCluster) {
   const int kNumTablets = 50;
 
   ExternalMiniClusterOptions opts;
+  // Force specific ports so that we can restart and guarantee we
+  // can bind the same port. If we use ephemeral ports, it's possible
+  // for one of the 'curl' threads to grab one of the ports as the local
+  // side of a client TCP connection while the server is down, preventing
+  // it from restarting. Choosing ports from the non-ephemeral range
+  // prevents this.
   opts.master_rpc_ports = { 11010, 11011, 11012 };
+#ifdef __linux__
+  // We can only do explicit webserver ports on Linux, where we use
+  // IPs like 127.x.y.z to bind the minicluster servers to different
+  // hosts. This might make the test marginally flaky on OSX, but
+  // it's easier than adding the ability to pipe separate webserver
+  // ports to each server.
+  opts.extra_master_flags.emplace_back("-webserver_port=11013");
+  opts.extra_tserver_flags.emplace_back("-webserver_port=11014");
+#endif
   opts.num_masters = opts.master_rpc_ports.size();
 
   ExternalMiniCluster cluster(opts);

http://git-wip-us.apache.org/repos/asf/kudu/blob/c04981d8/src/kudu/util/curl_util.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/curl_util.cc b/src/kudu/util/curl_util.cc
index 44ef089..f061dc5 100644
--- a/src/kudu/util/curl_util.cc
+++ b/src/kudu/util/curl_util.cc
@@ -94,7 +94,11 @@ Status EasyCurl::DoRequest(const std::string& url,
   }
 
   RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_HTTPAUTH, CURLAUTH_ANY)));
-
+  if (timeout_.Initialized()) {
+    RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1)));
+    RETURN_NOT_OK(TranslateError(curl_easy_setopt(curl_, CURLOPT_TIMEOUT_MS,
+        timeout_.ToMilliseconds())));
+  }
   RETURN_NOT_OK(TranslateError(curl_easy_perform(curl_)));
   long rc; // NOLINT(runtime/int) curl wants a long
   RETURN_NOT_OK(TranslateError(curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &rc)));

http://git-wip-us.apache.org/repos/asf/kudu/blob/c04981d8/src/kudu/util/curl_util.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/curl_util.h b/src/kudu/util/curl_util.h
index 797c8a6..49ba2d4 100644
--- a/src/kudu/util/curl_util.h
+++ b/src/kudu/util/curl_util.h
@@ -20,6 +20,7 @@
 #include <string>
 
 #include "kudu/gutil/macros.h"
+#include "kudu/util/monotime.h"
 #include "kudu/util/status.h"
 
 typedef void CURL;
@@ -58,6 +59,10 @@ class EasyCurl {
     return_headers_ = v;
   }
 
+  void set_timeout(MonoDelta t) {
+    timeout_ = t;
+  }
+
  private:
   // Do a request. If 'post_data' is non-NULL, does a POST.
   // Otherwise, does a GET.
@@ -72,6 +77,8 @@ class EasyCurl {
   // Whether to return the HTTP headers with the response.
   bool return_headers_ = false;
 
+  MonoDelta timeout_;
+
   DISALLOW_COPY_AND_ASSIGN(EasyCurl);
 };
 


Mime
View raw message