From commits-return-8060-apmail-zookeeper-commits-archive=zookeeper.apache.org@zookeeper.apache.org Mon Dec 16 21:16:44 2019 Return-Path: X-Original-To: apmail-zookeeper-commits-archive@www.apache.org Delivered-To: apmail-zookeeper-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by minotaur.apache.org (Postfix) with SMTP id 4209A190BB for ; Mon, 16 Dec 2019 21:16:44 +0000 (UTC) Received: (qmail 55698 invoked by uid 500); 16 Dec 2019 21:10:02 -0000 Delivered-To: apmail-zookeeper-commits-archive@zookeeper.apache.org Received: (qmail 55673 invoked by uid 500); 16 Dec 2019 21:10:02 -0000 Mailing-List: contact commits-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@zookeeper.apache.org Delivered-To: mailing list commits@zookeeper.apache.org Received: (qmail 55661 invoked by uid 99); 16 Dec 2019 21:10:02 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 16 Dec 2019 21:10:02 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id 9B92C8D80D; Mon, 16 Dec 2019 21:10:02 +0000 (UTC) Date: Mon, 16 Dec 2019 21:10:02 +0000 To: "commits@zookeeper.apache.org" Subject: [zookeeper] branch branch-3.6 updated: ZOOKEEPER-3651: try to fix flaky NettyServerCnxnFactoryTest MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <157653060243.15405.10514341605481120883@gitbox.apache.org> From: eolivelli@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: zookeeper X-Git-Refname: refs/heads/branch-3.6 X-Git-Reftype: branch X-Git-Oldrev: b04758e1903b0574c6b8ae82f8ff4e735cfad6b5 X-Git-Newrev: 66e9eda68539c7eda5c67af4f12003382aba8529 X-Git-Rev: 66e9eda68539c7eda5c67af4f12003382aba8529 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. eolivelli pushed a commit to branch branch-3.6 in repository https://gitbox.apache.org/repos/asf/zookeeper.git The following commit(s) were added to refs/heads/branch-3.6 by this push: new 66e9eda ZOOKEEPER-3651: try to fix flaky NettyServerCnxnFactoryTest 66e9eda is described below commit 66e9eda68539c7eda5c67af4f12003382aba8529 Author: Mate Szalay-Beko AuthorDate: Mon Dec 16 22:09:36 2019 +0100 ZOOKEEPER-3651: try to fix flaky NettyServerCnxnFactoryTest The testOutstandingHandshakeLimit is flaky, I tried to fix it in this commit. - I added extra comments and did some restructuring in the code. - Avoiding to start unnecessary ZooKeeper servers for tests don't require it - Decreasing the number of client connections the test tries to initiate - Increasing the timeout to make sure the connections get established - Filtering the 'SyncConnected' events in the client watcher to make sure the given connection is really established before counting it I think the last two points above should fix the flakiness. I tried to run the test in docker, and before the fix it failed for me once in every 4-5 execution. After applying these changes I re-executed it 100 times without failure. If these fixes are not enough, then we can introduce some only-visible-by-test method to add sleep in the SSLHandshake process in the production code to force to have handshakes in parallel. However, it would be nice to avoid that. Let's hope that these fixes will be enough. Author: Mate Szalay-Beko Reviewers: Enrico Olivelli , Norbert Kalmar Closes #1184 from symat/ZOOKEEPER-3651 (cherry picked from commit 20daae7d5fa934629e7825ed72e66ad76a94d6aa) Signed-off-by: Enrico Olivelli --- .../server/NettyServerCnxnFactoryTest.java | 150 ++++++++++++++++----- 1 file changed, 114 insertions(+), 36 deletions(-) diff --git a/zookeeper-server/src/test/java/org/apache/zookeeper/server/NettyServerCnxnFactoryTest.java b/zookeeper-server/src/test/java/org/apache/zookeeper/server/NettyServerCnxnFactoryTest.java index afb97b1..76136c4 100644 --- a/zookeeper-server/src/test/java/org/apache/zookeeper/server/NettyServerCnxnFactoryTest.java +++ b/zookeeper-server/src/test/java/org/apache/zookeeper/server/NettyServerCnxnFactoryTest.java @@ -43,24 +43,33 @@ public class NettyServerCnxnFactoryTest extends ClientBase { private static final Logger LOG = LoggerFactory .getLogger(NettyServerCnxnFactoryTest.class); - final LinkedBlockingQueue zks = new LinkedBlockingQueue(); + ClientX509Util x509Util; + final LinkedBlockingQueue zooKeeperClients = new LinkedBlockingQueue<>(); + @Override public void setUp() throws Exception { System.setProperty(ServerCnxnFactory.ZOOKEEPER_SERVER_CNXN_FACTORY, "org.apache.zookeeper.server.NettyServerCnxnFactory"); - super.setUp(); + + // by default, we don't start any ZooKeeper server, as not all the tests are needing it. } @Override public void tearDown() throws Exception { - System.clearProperty(ServerCnxnFactory.ZOOKEEPER_SERVER_CNXN_FACTORY); - // clean up - for (ZooKeeper zk : zks) { + System.clearProperty(ServerCnxnFactory.ZOOKEEPER_SERVER_CNXN_FACTORY); + if (x509Util != null) { + SSLAuthTest.clearSecureSetting(x509Util); + } + for (ZooKeeper zk : zooKeeperClients) { zk.close(); } - super.tearDown(); + + //stopping the server only if it was started + if (serverFactory != null) { + super.tearDown(); + } } @Test @@ -96,63 +105,132 @@ public class NettyServerCnxnFactoryTest extends ClientBase { Assert.assertTrue(factory.getParentChannel().isActive()); } + /* + * In this test we are flooding the server with SSL connections, and expecting that not + * all the connection will succeed at once. Some of the connections should be closed, + * as there is a maximum number of parallel SSL handshake the server is willing to do + * for security reasons. + */ @Test public void testOutstandingHandshakeLimit() throws Exception { + // setting up SSL params, but disable some debug logs + x509Util = SSLAuthTest.setUpSecure(); + System.clearProperty("javax.net.debug"); + + // starting a single server (it will be closed in the tearDown) + setUpWithServerId(1); + + // initializing the statistics SimpleCounter tlsHandshakeExceeded = (SimpleCounter) ServerMetrics.getMetrics().TLS_HANDSHAKE_EXCEEDED; tlsHandshakeExceeded.reset(); Assert.assertEquals(tlsHandshakeExceeded.get(), 0); - ClientX509Util x509Util = SSLAuthTest.setUpSecure(); + // setting the HandshakeLimit to 3, so only 3 SSL handshakes can happen in parallel NettyServerCnxnFactory factory = (NettyServerCnxnFactory) serverFactory; factory.setSecure(true); - factory.setOutstandingHandshakeLimit(10); + factory.setOutstandingHandshakeLimit(3); + // starting the threads that will try to connect to the server + // we will have 3 threads, each of them establishing 3 connections int threadNum = 3; - int cnxnPerThread = 10; - Thread[] cnxnWorker = new Thread[threadNum]; - + int cnxnPerThread = 3; + int cnxnLimit = threadNum * cnxnPerThread; AtomicInteger cnxnCreated = new AtomicInteger(0); CountDownLatch latch = new CountDownLatch(1); - + Thread[] cnxnWorker = new Thread[threadNum]; for (int i = 0; i < cnxnWorker.length; i++) { - cnxnWorker[i] = new Thread() { - @Override - public void run() { - for (int i = 0; i < cnxnPerThread; i++) { - try { - zks.add(new ZooKeeper(hostPort, 3000, new Watcher() { - @Override - public void process(WatchedEvent event) { - int created = cnxnCreated.addAndGet(1); - if (created == threadNum * cnxnPerThread) { - latch.countDown(); - } - } - })); - } catch (Exception e) { - LOG.info("Error while creating zk client", e); - } - } - } - }; + cnxnWorker[i] = new ClientConnectionGenerator(i, cnxnPerThread, cnxnCreated, cnxnLimit, latch, zooKeeperClients); cnxnWorker[i].start(); } - Assert.assertThat(latch.await(3, TimeUnit.SECONDS), Matchers.is(true)); - LOG.info("created {} connections", threadNum * cnxnPerThread); + // we might need to wait potentially for a longer time for all the connection to get established, + // as the ZooKeeper Server will close some of the connections and the clients will have to re-try + boolean allConnectionsCreatedInTime = latch.await(30, TimeUnit.SECONDS); + int actualConnections = cnxnCreated.get(); + LOG.info("created {} connections", actualConnections); + if (!allConnectionsCreatedInTime) { + Assert.fail(String.format("Only %d out of %d connections created!", actualConnections, cnxnLimit)); + } - // Assert throttling not 0 + // Assert the server refused some of the connections because the handshake limit was reached + // (throttling should be greater than 0) long handshakeThrottledNum = tlsHandshakeExceeded.get(); LOG.info("TLS_HANDSHAKE_EXCEEDED: {}", handshakeThrottledNum); Assert.assertThat("The number of handshake throttled should be " + "greater than 0", handshakeThrottledNum, Matchers.greaterThan(0L)); - // Assert there is no outstanding handshake anymore + // Assert there is no outstanding handshake anymore, all the clients connected in the end int outstandingHandshakeNum = factory.getOutstandingHandshakeNum(); LOG.info("outstanding handshake is {}", outstandingHandshakeNum); Assert.assertThat("The outstanding handshake number should be 0 " + "after all cnxns established", outstandingHandshakeNum, Matchers.is(0)); + } + + private final class ClientConnectionWatcher implements Watcher { + + private final AtomicInteger cnxnCreated; + private final int cnxnLimit; + private final int cnxnThreadId; + private final int cnxnId; + private final CountDownLatch latch; + + public ClientConnectionWatcher(AtomicInteger cnxnCreated, int cnxnLimit, int cnxnThreadId, + int cnxnId, CountDownLatch latch) { + this.cnxnCreated = cnxnCreated; + this.cnxnLimit = cnxnLimit; + this.cnxnThreadId = cnxnThreadId; + this.cnxnId = cnxnId; + this.latch = latch; + } + + @Override + public void process(WatchedEvent event) { + LOG.info(String.format("WATCHER [thread: %d, cnx:%d] - new event: %s", cnxnThreadId, cnxnId, event.toString())); + if (event.getState() == Event.KeeperState.SyncConnected) { + int created = cnxnCreated.addAndGet(1); + if (created == cnxnLimit) { + latch.countDown(); + } + } + } } + + + private final class ClientConnectionGenerator extends Thread { + + private final int cnxnThreadId; + private final int cnxnPerThread; + private final AtomicInteger cnxnCreated; + private final int cnxnLimit; + private final CountDownLatch latch; + private final LinkedBlockingQueue zks; + + private ClientConnectionGenerator(int cnxnThreadId, int cnxnPerThread, + AtomicInteger cnxnCreated, int cnxnLimit, + CountDownLatch latch, + LinkedBlockingQueue zks) { + this.cnxnThreadId = cnxnThreadId; + this.cnxnPerThread = cnxnPerThread; + this.cnxnCreated = cnxnCreated; + this.cnxnLimit = cnxnLimit; + this.latch = latch; + this.zks = zks; + } + + @Override + public void run() { + + for (int j = 0; j < cnxnPerThread; j++) { + try { + zks.add(new ZooKeeper(hostPort, 30000, + new ClientConnectionWatcher(cnxnCreated, cnxnLimit, cnxnThreadId, j, latch))); + } catch (Exception e) { + LOG.info("Error while creating zk client", e); + } + } + } + } + }