From notifications-return-2047-archive-asf-public=cust-asf.ponee.io@zookeeper.apache.org Wed Oct 2 10:45:24 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id D259418064F for ; Wed, 2 Oct 2019 12:45:23 +0200 (CEST) Received: (qmail 67846 invoked by uid 500); 2 Oct 2019 10:45:23 -0000 Mailing-List: contact notifications-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@zookeeper.apache.org Delivered-To: mailing list notifications@zookeeper.apache.org Received: (qmail 67820 invoked by uid 99); 2 Oct 2019 10:45:23 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 Oct 2019 10:45:23 +0000 From: GitBox To: notifications@zookeeper.apache.org Subject: [GitHub] [zookeeper] eolivelli commented on a change in pull request #1048: ZOOKEEPER-3188: Improve resilience to network Message-ID: <157001312307.31960.16610896350534164947.gitbox@gitbox.apache.org> Date: Wed, 02 Oct 2019 10:45:23 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit eolivelli commented on a change in pull request #1048: ZOOKEEPER-3188: Improve resilience to network URL: https://github.com/apache/zookeeper/pull/1048#discussion_r330476179 ########## File path: zookeeper-server/src/main/java/org/apache/zookeeper/server/quorum/Leader.java ########## @@ -418,66 +426,108 @@ public boolean isQuorumSynced(QuorumVerifier qv) { class LearnerCnxAcceptor extends ZooKeeperCriticalThread { - private volatile boolean stop = false; + private final AtomicBoolean stop = new AtomicBoolean(false); + private final AtomicBoolean fail = new AtomicBoolean(false); - public LearnerCnxAcceptor() { - super("LearnerCnxAcceptor-" + ss.getLocalSocketAddress(), zk.getZooKeeperServerListener()); + LearnerCnxAcceptor() { + super("LearnerCnxAcceptor-" + serverSockets.stream() + .map(ServerSocket::getLocalSocketAddress) + .map(Objects::toString) + .collect(Collectors.joining(",")), + zk.getZooKeeperServerListener()); } @Override public void run() { - try { - while (!stop) { - Socket s = null; - boolean error = false; - try { - s = ss.accept(); - - // start with the initLimit, once the ack is processed - // in LearnerHandler switch to the syncLimit - s.setSoTimeout(self.tickTime * self.initLimit); - s.setTcpNoDelay(nodelay); - - BufferedInputStream is = new BufferedInputStream(s.getInputStream()); - LearnerHandler fh = new LearnerHandler(s, is, Leader.this); - fh.start(); - } catch (SocketException e) { - error = true; - if (stop) { - LOG.info("exception while shutting down acceptor: " + e); - - // When Leader.shutdown() calls ss.close(), - // the call to accept throws an exception. - // We catch and set stop to true. - stop = true; - } else { - throw e; - } - } catch (SaslException e) { - LOG.error("Exception while connecting to quorum learner", e); - error = true; - } catch (Exception e) { - error = true; + if (!stop.get() && !serverSockets.isEmpty()) { + ExecutorService executor = Executors.newFixedThreadPool(serverSockets.size()); + CountDownLatch latch = new CountDownLatch(serverSockets.size()); + + serverSockets.forEach(serverSocket -> + executor.submit(new LearnerCnxAcceptorHandler(serverSocket, latch))); + + try { + latch.await(); + } catch (InterruptedException ie) { + LOG.error("Interrupted while sleeping. Ignoring exception", ie); + } finally { + closeSockets(); Review comment: we should shutdown the Executor. I am thinking mostly about test cases runs ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services