kudu-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aw...@apache.org
Subject [kudu] branch master updated: KUDU-2576: TlsSocketTest.TestRecvFailure is flaky
Date Fri, 15 Mar 2019 00:28:11 GMT
This is an automated email from the ASF dual-hosted git repository.

awong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/kudu.git


The following commit(s) were added to refs/heads/master by this push:
     new df4fd91  KUDU-2576: TlsSocketTest.TestRecvFailure is flaky
df4fd91 is described below

commit df4fd91fdf19a72f881eff2ec33d2c1417109292
Author: Will Berkeley <wdberkeley@gmail.com>
AuthorDate: Thu Mar 14 15:53:19 2019 -0700

    KUDU-2576: TlsSocketTest.TestRecvFailure is flaky
    
    TestRecvFailure wanted an interleaving of client thread and server thread
    similar to the following:
    
    Server: enters echo (recv -> write) loop
    Server: blocking recv
    Client: stop the server
    Client: blocking write to match server's blocking recv
    Server: blocking write
    Client: blocking recv to match server's blocking write
    Client: blocking recv
    Server: exits echo loop and closes connection because it was stopped
    Client: blocking recv fails because connection is closed
    
    A sleep was used in the client thread to try to ensure that the server
    reached the blocking recv call before the client shut the server down.
    However, under TSAN, occasionally the client was able to stop the server
    before the server entered the echo loop, so the server closed the
    connection before any data could be sent, failing the test.
    
    This patch moves the client call to stop the server to after the first
    recv-write succeeds, guaranteeing the server is in the echo loop.
    
    Without this patch, I saw 10/2000 runs fail in TSAN with 8 stress
    threads. 8 were due to KUDU-2576. With this patch, I saw 4/2000, all of
    which were due to a different issue that will be addressed in a
    follow-up.
    
    Change-Id: If95576ddc9e1e23f2db904d5b22bc3b9c1522ea4
    Reviewed-on: http://gerrit.cloudera.org:8080/12758
    Reviewed-by: Adar Dembo <adar@cloudera.com>
    Reviewed-by: Alexey Serbin <aserbin@cloudera.com>
    Tested-by: Will Berkeley <wdberkeley@gmail.com>
---
 src/kudu/security/tls_socket-test.cc | 69 +++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 29 deletions(-)

diff --git a/src/kudu/security/tls_socket-test.cc b/src/kudu/security/tls_socket-test.cc
index 001a206..f609ce3 100644
--- a/src/kudu/security/tls_socket-test.cc
+++ b/src/kudu/security/tls_socket-test.cc
@@ -155,18 +155,18 @@ class EchoServer {
         CHECK_OK(sock->SetRecvTimeout(kTimeout));
         unique_ptr<uint8_t[]> buf(new uint8_t[kEchoChunkSize]);
         // An "echo" loop for kEchoChunkSize byte buffers.
-        while (!stop_) {
+        while (!is_stopped_) {
           size_t n;
           Status s = sock->BlockingRecv(buf.get(), kEchoChunkSize, &n, MonoTime::Now()
+ kTimeout);
           if (!s.ok()) {
-            CHECK(stop_) << "unexpected error reading: " << s.ToString();
+            CHECK(is_stopped_) << "unexpected error reading: " << s.ToString();
           }
 
           LOG(INFO) << "server echoing " << n << " bytes";
           size_t written;
           s = sock->BlockingWrite(buf.get(), n, &written, MonoTime::Now() + kTimeout);
           if (!s.ok()) {
-            CHECK(stop_) << "unexpected error writing: " << s.ToString();
+            CHECK(is_stopped_) << "unexpected error writing: " << s.ToString();
           }
           if (slow_read_) {
             SleepFor(MonoDelta::FromMilliseconds(10));
@@ -185,12 +185,12 @@ class EchoServer {
     return listen_addr_;
   }
 
-  bool stopped() const {
-    return stop_;
+  bool is_stopped() const {
+    return is_stopped_;
   }
 
   void Stop() {
-    stop_ = true;
+    is_stopped_ = true;
   }
   void Join() {
     thread_.join();
@@ -208,38 +208,49 @@ class EchoServer {
   thread thread_;
   pthread_t pthread_;
   CountDownLatch pthread_sync_;
-  std::atomic<bool> stop_ { false };
+  std::atomic<bool> is_stopped_ { false };
 
   bool slow_read_ = false;
 };
 
 void handler(int /* signal */) {}
 
+// Test that errors returned when reading from a TLS socket are reasonable and
+// contain the address of the remote.
 TEST_F(TlsSocketTest, TestRecvFailure) {
-    EchoServer server;
-    server.Start();
-    unique_ptr<Socket> client_sock;
-    NO_FATALS(ConnectClient(server.listen_addr(), &client_sock));
-    unique_ptr<uint8_t[]> buf(new uint8_t[kEchoChunkSize]);
-
-    SleepFor(MonoDelta::FromMilliseconds(100));
-    server.Stop();
-
-    size_t nwritten;
-    ASSERT_OK(client_sock->BlockingWrite(buf.get(), kEchoChunkSize, &nwritten,
-        MonoTime::Now() + kTimeout));
-    size_t nread;
+  EchoServer server;
+  server.Start();
+  unique_ptr<Socket> client_sock;
+  NO_FATALS(ConnectClient(server.listen_addr(), &client_sock));
+  unique_ptr<uint8_t[]> buf(new uint8_t[kEchoChunkSize]);
 
-    ASSERT_OK(client_sock->BlockingRecv(buf.get(), kEchoChunkSize, &nread,
-        MonoTime::Now() + kTimeout));
+  // The client writes to the server. This blocks on the server receiving,
+  // so once this completes the server is in its "echo loop".
+  size_t nwritten_unused;
+  ASSERT_OK(client_sock->BlockingWrite(buf.get(),
+                                       kEchoChunkSize,
+                                       &nwritten_unused,
+                                       MonoTime::Now() + kTimeout));
 
-    Status s = client_sock->BlockingRecv(buf.get(), kEchoChunkSize, &nread,
-        MonoTime::Now() + kTimeout);
+  // Stop the server. The client can still Recv once from the server because
+  // the server is trying to echo what was sent by the client.
+  server.Stop();
 
-    ASSERT_TRUE(!s.ok());
-    ASSERT_TRUE(s.IsNetworkError());
-    ASSERT_STR_MATCHES(s.message().ToString(), "BlockingRecv error: failed to read from "
-                                               "TLS socket \\(remote: 127.0.0.1:[0-9]+\\):
");
+  // The first Recv succeeds. This unblocks the server and causes it to exit.
+  size_t nread_unused;
+  ASSERT_OK(client_sock->BlockingRecv(buf.get(),
+                                      kEchoChunkSize,
+                                      &nread_unused,
+                                      MonoTime::Now() + kTimeout));
+
+  // The server has closed the connection, so the second Recv will fail.
+  Status s = client_sock->BlockingRecv(buf.get(),
+                                       kEchoChunkSize,
+                                       &nread_unused,
+                                       MonoTime::Now() + kTimeout);
+  ASSERT_TRUE(s.IsNetworkError()) << s.ToString();
+  ASSERT_STR_MATCHES(s.message().ToString(), "BlockingRecv error: failed to read from "
+                                             "TLS socket \\(remote: 127.0.0.1:[0-9]+\\):
");
 }
 
 // Test for failures to handle EINTR during TLS connection
@@ -257,7 +268,7 @@ TEST_F(TlsSocketTest, TestTlsSocketInterrupted) {
 
   // Start a thread to send signals to the server thread.
   thread killer([&]() {
-      while (!server.stopped()) {
+      while (!server.is_stopped()) {
         PCHECK(pthread_kill(server.pthread(), SIGUSR2) == 0);
         SleepFor(MonoDelta::FromMicroseconds(rand() % 10));
       }


Mime
View raw message