Mailing-List: contact commits-help@kudu.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@kudu.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: todd@apache.org
To: commits@kudu.apache.org
Date: Wed, 05 Oct 2016 21:29:44 -0000
Message-Id: <16d2131cc82e47f0ab4638829cca1d54@git.apache.org>
In-Reply-To: <10e25c29f9744cb98afd91fb2eab6b52@git.apache.org>
References: <10e25c29f9744cb98afd91fb2eab6b52@git.apache.org>
Subject: [3/4] kudu git commit: kernel_stack_watchdog: avoid blocking threads
 starting
archived-at: Wed, 05 Oct 2016 21:29:44 -0000

kernel_stack_watchdog: avoid blocking threads starting

I've noticed recently that threads start particularly slowly in TSAN.
One culprit which seems to exacerbate this issue is the following:

- TSAN defers signal-handling in many cases, which causes the stack
  watchdog to be slow at collecting stacks.
- The stack watchdog was holding a lock while collecting stacks from
  stuck threads.
- This lock blocked other threads from starting, since every new thread
  needs to register itself with the watchdog.

The fix here is to make the synchronization more fine-grained: we only
hold this lock long enough to make a copy of the current map of
registered threads. However, it's still important to prevent these
threads from _exiting_ while we are looking at their TLS. So, this patch
adds a new 'unregister_lock_' which is used to prevent such exits.

Since 'lock_' is now held for only short periods of time, I switched it
out for a spinlock instead of a mutex.

Additionally, the lock protecting the log collector was also separated
out.

No new tests are included, but the watchdog is already covered and runs
as part of nearly every test.

Change-Id: I7af85ade6ec9050843ec5b146d26c2549c503d8f
Reviewed-on: http://gerrit.cloudera.org:8080/4626
Reviewed-by: Adar Dembo <adar@cloudera.com>
Tested-by: Todd Lipcon <todd@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/0558aaa4
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/0558aaa4
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/0558aaa4

Branch: refs/heads/master
Commit: 0558aaa48ed9bbfe1e680b2be9f9f3f60f7dd6c0
Parents: 98f42cd
Author: Todd Lipcon <todd@apache.org>
Authored: Tue Oct 4 19:09:11 2016 -0700
Committer: Todd Lipcon <todd@apache.org>
Committed: Wed Oct 5 20:58:33 2016 +0000

----------------------------------------------------------------------
 src/kudu/util/kernel_stack_watchdog.cc | 77 +++++++++++++++++------------
 src/kudu/util/kernel_stack_watchdog.h  | 23 +++++++--
 2 files changed, 64 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/0558aaa4/src/kudu/util/kernel_stack_watchdog.cc
----------------------------------------------------------------------
diff --git a/src/kudu/util/kernel_stack_watchdog.cc b/src/kudu/util/kernel_stack_watchdog.cc
index cbab345..2f7517a 100644
--- a/src/kudu/util/kernel_stack_watchdog.cc
+++ b/src/kudu/util/kernel_stack_watchdog.cc
@@ -34,6 +34,7 @@ DEFINE_int32(hung_task_check_interval_ms, 200,
              "Number of milliseconds in between checks for hung threads");
 TAG_FLAG(hung_task_check_interval_ms, hidden);
 
+using std::lock_guard;
 using strings::Substitute;
 
 namespace kudu {
@@ -61,7 +62,7 @@ KernelStackWatchdog::~KernelStackWatchdog() {
 }
 
 void KernelStackWatchdog::SaveLogsForTests(bool save_logs) {
-  MutexLock l(lock_);
+  lock_guard<simple_spinlock> l(log_lock_);
   if (save_logs) {
     log_collector_.reset(new vector<string>());
   } else {
@@ -70,20 +71,21 @@ void KernelStackWatchdog::SaveLogsForTests(bool save_logs) {
 }
 
 vector<string> KernelStackWatchdog::LoggedMessagesForTests() const {
-  MutexLock l(lock_);
+  lock_guard<simple_spinlock> l(log_lock_);
   CHECK(log_collector_) << "Must call SaveLogsForTests(true) first";
   return *log_collector_;
 }
 
 void KernelStackWatchdog::Register(TLS* tls) {
   int64_t tid = Thread::CurrentThreadId();
-  MutexLock l(lock_);
+  lock_guard<simple_spinlock> l(tls_lock_);
   InsertOrDie(&tls_by_tid_, tid, tls);
 }
 
-void KernelStackWatchdog::Unregister(TLS* tls) {
+void KernelStackWatchdog::Unregister() {
   int64_t tid = Thread::CurrentThreadId();
-  MutexLock l(lock_);
+  MutexLock l(unregister_lock_);
+  lock_guard<simple_spinlock> l2(tls_lock_);
   CHECK(tls_by_tid_.erase(tid));
 }
 
@@ -102,37 +104,48 @@ void KernelStackWatchdog::RunThread() {
       break;
     }
 
+    // Prevent threads from unregistering between the snapshot loop and the sending of
+    // signals. This makes it safe for us to access their TLS. We might delay the thread
+    // exit a bit, but it would be unusual for any code to block on a thread exit, whereas
+    // it's relatively important for threads to _start_ quickly.
+    MutexLock l(unregister_lock_);
+
+    // Take the snapshot of the thread information under a short lock.
+    //
+    // 'lock_' prevents new threads from starting, so we don't want to do any lengthy work
+    // (such as gathering stack traces) under this lock.
+    TLSMap tls_map_copy;
     {
-      MutexLock l(lock_);
-      MicrosecondsInt64 now = GetMonoTimeMicros();
-
-      for (const TLSMap::value_type& map_entry : tls_by_tid_) {
-        pid_t p = map_entry.first;
-        const TLS::Data* tls = &map_entry.second->data_;
-
-        TLS::Data tls_copy;
-        tls->SnapshotCopy(&tls_copy);
-
-        for (int i = 0; i < tls_copy.depth_; i++) {
-          TLS::Frame* frame = &tls_copy.frames_[i];
-
-          int paused_ms = (now - frame->start_time_) / 1000;
-          if (paused_ms > frame->threshold_ms_) {
-            string kernel_stack;
-            Status s = GetKernelStack(p, &kernel_stack);
-            if (!s.ok()) {
-              // Can't read the kernel stack of the pid -- it's possible that the thread exited
-              // while we were iterating, so just ignore it.
-              kernel_stack = "(could not read kernel stack)";
-            }
-
-            string user_stack = DumpThreadStack(p);
-            LOG_STRING(WARNING, log_collector_.get())
+      lock_guard<simple_spinlock> l(tls_lock_);
+      tls_map_copy = tls_by_tid_;
+    }
+
+    MicrosecondsInt64 now = GetMonoTimeMicros();
+    for (const auto& entry : tls_map_copy) {
+      pid_t p = entry.first;
+      TLS::Data* tls = &entry.second->data_;
+      TLS::Data tls_copy;
+      tls->SnapshotCopy(&tls_copy);
+      for (int i = 0; i < tls_copy.depth_; i++) {
+        const TLS::Frame* frame = &tls_copy.frames_[i];
+
+        int paused_ms = (now - frame->start_time_) / 1000;
+        if (paused_ms > frame->threshold_ms_) {
+          string kernel_stack;
+          Status s = GetKernelStack(p, &kernel_stack);
+          if (!s.ok()) {
+            // Can't read the kernel stack of the pid, just ignore it.
+            kernel_stack = "(could not read kernel stack)";
+          }
+
+          string user_stack = DumpThreadStack(p);
+
+          lock_guard<simple_spinlock> l(log_lock_);
+          LOG_STRING(WARNING, log_collector_.get())
               << "Thread " << p << " stuck at " << frame->status_
               << " for " << paused_ms << "ms" << ":\n"
               << "Kernel stack:\n" << kernel_stack << "\n"
               << "User stack:\n" << user_stack;
-          }
         }
       }
     }
@@ -150,7 +163,7 @@ KernelStackWatchdog::TLS::TLS() {
 }
 
 KernelStackWatchdog::TLS::~TLS() {
-  KernelStackWatchdog::GetInstance()->Unregister(this);
+  KernelStackWatchdog::GetInstance()->Unregister();
 }
 
 // Optimistic concurrency control approach to snapshot the value of another

http://git-wip-us.apache.org/repos/asf/kudu/blob/0558aaa4/src/kudu/util/kernel_stack_watchdog.h
----------------------------------------------------------------------
diff --git a/src/kudu/util/kernel_stack_watchdog.h b/src/kudu/util/kernel_stack_watchdog.h
index 914479f..79b6087 100644
--- a/src/kudu/util/kernel_stack_watchdog.h
+++ b/src/kudu/util/kernel_stack_watchdog.h
@@ -63,8 +63,9 @@
 #include "kudu/gutil/singleton.h"
 #include "kudu/gutil/walltime.h"
 #include "kudu/util/countdown_latch.h"
-#include "kudu/util/mutex.h"
+#include "kudu/util/locks.h"
 #include "kudu/util/monotime.h"
+#include "kudu/util/mutex.h"
 #include "kudu/util/threadlocal.h"
 
 #define SCOPED_WATCH_STACK(threshold_ms) \
@@ -167,7 +168,7 @@ class KernelStackWatchdog {
   void Register(TLS* tls);
 
   // Called when a thread's TLS is destructed (i.e. when the thread exits).
-  void Unregister(TLS* tls);
+  void Unregister();
 
   // The actual watchdog loop that the watchdog thread runs.
   void RunThread();
@@ -181,8 +182,22 @@ class KernelStackWatchdog {
   // Used by tests.
   gscoped_ptr<std::vector<std::string> > log_collector_;
 
-  // Lock protecting tls_by_tid_ and log_collector_.
-  mutable Mutex lock_;
+  // Lock protecting log_collector_.
+  mutable simple_spinlock log_lock_;
+
+  // Lock protecting tls_by_tid_.
+  mutable simple_spinlock tls_lock_;
+
+  // Lock which prevents threads from unregistering while the watchdog
+  // sends signals.
+  //
+  // This is used to prevent the watchdog from sending a signal to a pid just
+  // after the pid has actually exited and been reused. Sending a signal to
+  // a non-Kudu thread could have unintended consequences.
+  //
+  // When this lock is held concurrently with 'tls_lock_' or 'log_lock_',
+  // this lock must be acquired first.
+  Mutex unregister_lock_;
 
   // The watchdog thread itself.
   scoped_refptr<Thread> thread_;