Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 5B464200B50 for ; Sat, 13 Aug 2016 15:57:58 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 4F4C4160AA6; Sat, 13 Aug 2016 13:57:58 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id C0F08160A73 for ; Sat, 13 Aug 2016 15:57:56 +0200 (CEST) Received: (qmail 27375 invoked by uid 500); 13 Aug 2016 13:57:55 -0000 Mailing-List: contact commits-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ Delivered-To: mailing list commits@zookeeper.apache.org Received: (qmail 27364 invoked by uid 99); 13 Aug 2016 13:57:55 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 13 Aug 2016 13:57:55 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 51509C72FF for ; Sat, 13 Aug 2016 13:57:55 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: 1.291 X-Spam-Level: * X-Spam-Status: No, score=1.291 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RP_MATCHES_RCVD=-0.519, T_FILL_THIS_FORM_SHORT=0.01] autolearn=disabled Received: from mx2-lw-us.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id 2ULubKgw78wP for ; Sat, 13 Aug 2016 13:57:52 +0000 (UTC) Received: from mailrelay1-us-west.apache.org (mailrelay1-us-west.apache.org [209.188.14.139]) by mx2-lw-us.apache.org (ASF Mail Server at mx2-lw-us.apache.org) with ESMTP id 5FBED5F230 for ; Sat, 13 Aug 2016 13:57:52 +0000 (UTC) Received: from svn01-us-west.apache.org (svn.apache.org [10.41.0.6]) by mailrelay1-us-west.apache.org (ASF Mail Server at mailrelay1-us-west.apache.org) with ESMTP id 9B594E0177 for ; Sat, 13 Aug 2016 13:57:51 +0000 (UTC) Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id BC4F13A02FD for ; Sat, 13 Aug 2016 13:57:50 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1756270 - in /zookeeper/branches/branch-3.5: ./ src/java/main/org/apache/zookeeper/server/ src/java/main/org/apache/zookeeper/server/quorum/ src/java/test/org/apache/zookeeper/server/ src/java/test/org/apache/zookeeper/test/ Date: Sat, 13 Aug 2016 13:57:50 -0000 To: commits@zookeeper.apache.org From: fpj@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20160813135750.BC4F13A02FD@svn01-us-west.apache.org> archived-at: Sat, 13 Aug 2016 13:57:58 -0000 Author: fpj Date: Sat Aug 13 13:57:49 2016 New Revision: 1756270 URL: http://svn.apache.org/viewvc?rev=1756270&view=rev Log: ZOOKEEPER-2247: Zookeeper service becomes unavailable when leader fails to write transaction log (Rakesh via fpj) Added: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerListenerImpl.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerShutdownHandler.java zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/test/NonRecoverableErrorTest.java Modified: zookeeper/branches/branch-3.5/CHANGES.txt zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerMain.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Follower.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Leader.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Learner.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/LearnerZooKeeperServer.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Observer.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/QuorumZooKeeperServer.java zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ReadOnlyZooKeeperServer.java zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/ZooKeeperServerMainTest.java Modified: zookeeper/branches/branch-3.5/CHANGES.txt URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/CHANGES.txt?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/CHANGES.txt (original) +++ zookeeper/branches/branch-3.5/CHANGES.txt Sat Aug 13 13:57:49 2016 @@ -21,6 +21,9 @@ BUGFIXES: (Abraham Fine via phunt) Fix command handling in the C client shell (phunt via fpj) + + ZOOKEEPER-2247: Zookeeper service becomes unavailable when leader + fails to write transaction log (Rakesh via fpj) IMPROVEMENTS: Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java Sat Aug 13 13:57:49 2016 @@ -102,8 +102,8 @@ public class ZooKeeperServer implements protected RequestProcessor firstProcessor; protected volatile State state = State.INITIAL; - enum State { - INITIAL, RUNNING, SHUTDOWN; + protected enum State { + INITIAL, RUNNING, SHUTDOWN, ERROR; } /** @@ -122,7 +122,9 @@ public class ZooKeeperServer implements protected ServerCnxnFactory secureServerCnxnFactory; private final ServerStats serverStats; - private final ZooKeeperServerListener listener = new ZooKeeperServerListenerImpl(); + private final ZooKeeperServerListener listener; + private ZooKeeperServerShutdownHandler zkShutdownHandler; + void removeCnxn(ServerCnxn cnxn) { zkDb.removeCnxn(cnxn); } @@ -136,6 +138,7 @@ public class ZooKeeperServer implements */ public ZooKeeperServer() { serverStats = new ServerStats(this); + listener = new ZooKeeperServerListenerImpl(this); } /** @@ -152,7 +155,7 @@ public class ZooKeeperServer implements this.tickTime = tickTime; setMinSessionTimeout(minSessionTimeout); setMaxSessionTimeout(maxSessionTimeout); - + listener = new ZooKeeperServerListenerImpl(this); LOG.info("Created server with tickTime " + tickTime + " minSessionTimeout " + getMinSessionTimeout() + " maxSessionTimeout " + getMaxSessionTimeout() @@ -446,7 +449,7 @@ public class ZooKeeperServer implements registerJMX(); - state = State.RUNNING; + setState(State.RUNNING); notifyAll(); } @@ -463,20 +466,6 @@ public class ZooKeeperServer implements return listener; } - /** - * Default listener implementation, which will do a graceful shutdown on - * notification - */ - private class ZooKeeperServerListenerImpl implements - ZooKeeperServerListener { - - @Override - public void notifyStopping(String threadName, int exitCode) { - LOG.info("Thread {} exits, error code {}", threadName, exitCode); - shutdown(); - } - } - protected void createSessionTracker() { sessionTracker = new SessionTrackerImpl(this, zkDb.getSessionWithTimeOuts(), tickTime, 1, getZooKeeperServerListener()); @@ -486,19 +475,61 @@ public class ZooKeeperServer implements ((SessionTrackerImpl)sessionTracker).start(); } + /** + * Sets the state of ZooKeeper server. After changing the state, it notifies + * the server state change to a registered shutdown handler, if any. + *

+ * The following are the server state transitions: + *

  • During startup the server will be in the INITIAL state.
  • + *
  • After successfully starting, the server sets the state to RUNNING. + *
  • + *
  • The server transitions to the ERROR state if it hits an internal + * error. {@link ZooKeeperServerListenerImpl} notifies any critical resource + * error events, e.g., SyncRequestProcessor not being able to write a txn to + * disk.
  • + *
  • During shutdown the server sets the state to SHUTDOWN, which + * corresponds to the server not running.
  • + * + * @param state new server state. + */ + protected void setState(State state) { + this.state = state; + // Notify server state changes to the registered shutdown handler, if any. + if (zkShutdownHandler != null) { + zkShutdownHandler.handle(state); + } else { + LOG.error("ZKShutdownHandler is not registered, so ZooKeeper server " + + "won't take any action on ERROR or SHUTDOWN server state changes"); + } + } + + /** + * This can be used while shutting down the server to see whether the server + * is already shutdown or not. + * + * @return true if the server is running or server hits an error, false + * otherwise. + */ + protected boolean canShutdown() { + return state == State.RUNNING || state == State.ERROR; + } + + /** + * @return true if the server is running, false otherwise. + */ public boolean isRunning() { return state == State.RUNNING; } public synchronized void shutdown() { - if (!isRunning()) { + if (!canShutdown()) { LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!"); return; } LOG.info("shutting down"); // new RuntimeException("Calling shutdown").printStackTrace(); - state = State.SHUTDOWN; + setState(State.SHUTDOWN); // Since sessionTracker and syncThreads poll we just have to // set running to false and they will detect it during the poll // interval. @@ -1142,4 +1173,15 @@ public class ZooKeeperServer implements return sessionTracker.getSessionExpiryMap(); } + /** + * This method is used to register the ZooKeeperServerShutdownHandler to get + * server's error or shutdown state change notifications. + * {@link ZooKeeperServerShutdownHandler#handle(State)} will be called for + * every server state changes {@link #setState(State)}. + * + * @param zkShutdownHandler shutdown handler + */ + void registerServerShutdownHandler(ZooKeeperServerShutdownHandler zkShutdownHandler) { + this.zkShutdownHandler = zkShutdownHandler; + } } Added: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerListenerImpl.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerListenerImpl.java?rev=1756270&view=auto ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerListenerImpl.java (added) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerListenerImpl.java Sat Aug 13 13:57:49 2016 @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zookeeper.server; + +import org.apache.zookeeper.server.ZooKeeperServer.State; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Default listener implementation, which will be used to notify internal + * errors. For example, if some critical thread has stopped due to fatal errors, + * then it will get notifications and will change the state of ZooKeeper server + * to ERROR representing an error status. + */ +class ZooKeeperServerListenerImpl implements ZooKeeperServerListener { + private static final Logger LOG = LoggerFactory + .getLogger(ZooKeeperServerListenerImpl.class); + + private final ZooKeeperServer zkServer; + + ZooKeeperServerListenerImpl(ZooKeeperServer zkServer) { + this.zkServer = zkServer; + } + + @Override + public void notifyStopping(String threadName, int exitCode) { + LOG.info("Thread {} exits, error code {}", threadName, exitCode); + zkServer.setState(State.ERROR); + } +} Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerMain.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerMain.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerMain.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerMain.java Sat Aug 13 13:57:49 2016 @@ -19,6 +19,7 @@ package org.apache.zookeeper.server; import java.io.IOException; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import javax.management.JMException; @@ -118,9 +119,15 @@ public class ZooKeeperServerMain { // run() in this thread. // create a file logger url from the command line args txnLog = new FileTxnSnapLog(config.dataLogDir, config.dataDir); - ZooKeeperServer zkServer = new ZooKeeperServer( txnLog, + final ZooKeeperServer zkServer = new ZooKeeperServer(txnLog, config.tickTime, config.minSessionTimeout, config.maxSessionTimeout, null); + // Registers shutdown handler which will be used to know the + // server error or shutdown state changes. + final CountDownLatch shutdownLatch = new CountDownLatch(1); + zkServer.registerServerShutdownHandler( + new ZooKeeperServerShutdownHandler(shutdownLatch)); + // Start Admin server adminServer = AdminServerFactory.createAdminServer(); adminServer.setZooKeeperServer(zkServer); @@ -146,14 +153,19 @@ public class ZooKeeperServerMain { ); containerManager.start(); + // Watch status of ZooKeeper server. It will do a graceful shutdown + // if the server is not running or hits an internal error. + shutdownLatch.await(); + + shutdown(); + if (cnxnFactory != null) { cnxnFactory.join(); } if (secureCnxnFactory != null) { secureCnxnFactory.join(); } - - if (zkServer.isRunning()) { + if (zkServer.canShutdown()) { zkServer.shutdown(); } } catch (InterruptedException e) { @@ -180,9 +192,16 @@ public class ZooKeeperServerMain { secureCnxnFactory.shutdown(); } try { - adminServer.shutdown(); + if (adminServer != null) { + adminServer.shutdown(); + } } catch (AdminServerException e) { LOG.warn("Problem stopping AdminServer", e); } } + + // VisibleForTesting + ServerCnxnFactory getCnxnFactory() { + return cnxnFactory; + } } Added: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerShutdownHandler.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerShutdownHandler.java?rev=1756270&view=auto ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerShutdownHandler.java (added) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/ZooKeeperServerShutdownHandler.java Sat Aug 13 13:57:49 2016 @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zookeeper.server; + +import java.util.concurrent.CountDownLatch; + +import org.apache.zookeeper.server.ZooKeeperServer.State; + +/** + * ZooKeeper server shutdown handler which will be used to handle ERROR or + * SHUTDOWN server state transitions, which in turn releases the associated + * shutdown latch. + */ +class ZooKeeperServerShutdownHandler { + private final CountDownLatch shutdownLatch; + + ZooKeeperServerShutdownHandler(CountDownLatch shutdownLatch) { + this.shutdownLatch = shutdownLatch; + } + + /** + * This will be invoked when the server transition to a new server state. + * + * @param state new server state + */ + void handle(State state) { + if (state == State.ERROR || state == State.SHUTDOWN) { + shutdownLatch.countDown(); + } + } +} Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Follower.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Follower.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Follower.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Follower.java Sat Aug 13 13:57:49 2016 @@ -85,7 +85,7 @@ public class Follower extends Learner{ } syncWithLeader(newEpochZxid); QuorumPacket qp = new QuorumPacket(); - while (self.isRunning()) { + while (this.isRunning()) { readPacket(qp); processPacket(qp); } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Leader.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Leader.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Leader.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Leader.java Sat Aug 13 13:57:49 2016 @@ -586,6 +586,12 @@ public class Leader { } } + // check leader running status + if (!this.isRunning()) { + shutdown("Unexpected internal error"); + return; + } + if (!tickSkip && !syncedAckSet.hasAllQuorums()) { // Lost quorum of last committed and/or last proposed // config, set shutdown flag @@ -1380,4 +1386,8 @@ public class Leader { return "UNKNOWN"; } } + + private boolean isRunning() { + return self.isRunning() && zk.isRunning(); + } } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Learner.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Learner.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Learner.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Learner.java Sat Aug 13 13:57:49 2016 @@ -622,4 +622,8 @@ public class Learner { zk.shutdown(); } } + + boolean isRunning() { + return self.isRunning() && zk.isRunning(); + } } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/LearnerZooKeeperServer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/LearnerZooKeeperServer.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/LearnerZooKeeperServer.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/LearnerZooKeeperServer.java Sat Aug 13 13:57:49 2016 @@ -158,7 +158,7 @@ public abstract class LearnerZooKeeperSe @Override public synchronized void shutdown() { - if (!isRunning()) { + if (!canShutdown()) { LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!"); return; } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Observer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Observer.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Observer.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/Observer.java Sat Aug 13 13:57:49 2016 @@ -23,13 +23,8 @@ import java.net.InetSocketAddress; import java.nio.ByteBuffer; import org.apache.jute.Record; -import org.apache.zookeeper.ZooDefs.OpCode; import org.apache.zookeeper.server.ObserverBean; import org.apache.zookeeper.server.Request; -import org.apache.zookeeper.server.quorum.QuorumPeer.LearnerType; -import org.apache.zookeeper.server.quorum.QuorumPeer.QuorumServer; -import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState; -import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException; import org.apache.zookeeper.server.quorum.flexible.QuorumVerifier; import org.apache.zookeeper.server.util.SerializeUtils; import org.apache.zookeeper.txn.SetDataTxn; @@ -78,7 +73,7 @@ public class Observer extends Learner{ syncWithLeader(newLeaderZxid); QuorumPacket qp = new QuorumPacket(); - while (self.isRunning()) { + while (this.isRunning()) { readPacket(qp); processPacket(qp); } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ObserverZooKeeperServer.java Sat Aug 13 13:57:49 2016 @@ -132,7 +132,7 @@ public class ObserverZooKeeperServer ext @Override public synchronized void shutdown() { - if (!isRunning()) { + if (!canShutdown()) { LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!"); return; } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/QuorumZooKeeperServer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/QuorumZooKeeperServer.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/QuorumZooKeeperServer.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/QuorumZooKeeperServer.java Sat Aug 13 13:57:49 2016 @@ -156,4 +156,9 @@ public abstract class QuorumZooKeeperSer pwriter.println("membership: "); pwriter.print(new String(self.getQuorumVerifier().toString().getBytes())); } + + @Override + protected void setState(State state) { + this.state = state; + } } Modified: zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ReadOnlyZooKeeperServer.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ReadOnlyZooKeeperServer.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ReadOnlyZooKeeperServer.java (original) +++ zookeeper/branches/branch-3.5/src/java/main/org/apache/zookeeper/server/quorum/ReadOnlyZooKeeperServer.java Sat Aug 13 13:57:49 2016 @@ -137,7 +137,7 @@ public class ReadOnlyZooKeeperServer ext @Override public synchronized void shutdown() { - if (!isRunning()) { + if (!canShutdown()) { LOG.debug("ZooKeeper server is not running, so not proceeding to shutdown!"); return; } @@ -172,4 +172,9 @@ public class ReadOnlyZooKeeperServer ext pwriter.print("peerType="); pwriter.println(self.getLearnerType().ordinal()); } + + @Override + protected void setState(State state) { + this.state = state; + } } Modified: zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/ZooKeeperServerMainTest.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/ZooKeeperServerMainTest.java?rev=1756270&r1=1756269&r2=1756270&view=diff ============================================================================== --- zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/ZooKeeperServerMainTest.java (original) +++ zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/server/ZooKeeperServerMainTest.java Sat Aug 13 13:57:49 2016 @@ -19,6 +19,7 @@ package org.apache.zookeeper.server; import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT; +import static org.junit.Assert.fail; import java.io.File; import java.io.FileWriter; @@ -124,6 +125,10 @@ public class ZooKeeperServerMainTest ext throw new IOException("Failed to delete file: " + f); } } + + ServerCnxnFactory getCnxnFactory() { + return main.getCnxnFactory(); + } } public static class TestZKSMain extends ZooKeeperServerMain { @@ -133,6 +138,63 @@ public class ZooKeeperServerMainTest ext } /** + * Test case for https://issues.apache.org/jira/browse/ZOOKEEPER-2247. + * Test to verify that even after non recoverable error (error while + * writing transaction log), ZooKeeper is still available. + */ + @Test(timeout = 30000) + public void testNonRecoverableError() throws Exception { + ClientBase.setupTestEnv(); + + final int CLIENT_PORT = PortAssignment.unique(); + + MainThread main = new MainThread(CLIENT_PORT, true, null); + main.start(); + + Assert.assertTrue("waiting for server being up", + ClientBase.waitForServerUp("127.0.0.1:" + CLIENT_PORT, + CONNECTION_TIMEOUT)); + + + ZooKeeper zk = new ZooKeeper("127.0.0.1:" + CLIENT_PORT, + ClientBase.CONNECTION_TIMEOUT, this); + + zk.create("/foo1", "foobar".getBytes(), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + Assert.assertEquals(new String(zk.getData("/foo1", null, null)), "foobar"); + + // inject problem in server + ZooKeeperServer zooKeeperServer = main.getCnxnFactory() + .getZooKeeperServer(); + FileTxnSnapLog snapLog = zooKeeperServer.getTxnLogFactory(); + FileTxnSnapLog fileTxnSnapLogWithError = new FileTxnSnapLog( + snapLog.getDataDir(), snapLog.getSnapDir()) { + @Override + public void commit() throws IOException { + throw new IOException("Input/output error"); + } + }; + ZKDatabase newDB = new ZKDatabase(fileTxnSnapLogWithError); + zooKeeperServer.setZKDatabase(newDB); + + try { + // do create operation, so that injected IOException is thrown + zk.create("/foo2", "foobar".getBytes(), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + fail("IOException is expected as error is injected in transaction log commit funtionality"); + } catch (Exception e) { + // do nothing + } + zk.close(); + Assert.assertTrue("waiting for server down", + ClientBase.waitForServerDown("127.0.0.1:" + CLIENT_PORT, + ClientBase.CONNECTION_TIMEOUT)); + fileTxnSnapLogWithError.close(); + main.shutdown(); + main.deleteDirs(); + } + + /** * Verify the ability to start a standalone server instance. */ @Test Added: zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/test/NonRecoverableErrorTest.java URL: http://svn.apache.org/viewvc/zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/test/NonRecoverableErrorTest.java?rev=1756270&view=auto ============================================================================== --- zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/test/NonRecoverableErrorTest.java (added) +++ zookeeper/branches/branch-3.5/src/java/test/org/apache/zookeeper/test/NonRecoverableErrorTest.java Sat Aug 13 13:57:49 2016 @@ -0,0 +1,185 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.zookeeper.test; + +import static org.apache.zookeeper.test.ClientBase.CONNECTION_TIMEOUT; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.UUID; + +import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.PortAssignment; +import org.apache.zookeeper.ZooDefs.Ids; +import org.apache.zookeeper.ZooKeeper; +import org.apache.zookeeper.server.ZKDatabase; +import org.apache.zookeeper.server.persistence.FileTxnSnapLog; +import org.apache.zookeeper.server.quorum.QuorumPeer; +import org.apache.zookeeper.server.quorum.QuorumPeer.ServerState; +import org.apache.zookeeper.server.quorum.QuorumPeerTestBase; +import org.apache.zookeeper.test.ClientBase.CountdownWatcher; +import org.junit.Assert; +import org.junit.Test; + +/** + * This class tests the non-recoverable error behavior of quorum server. + */ +public class NonRecoverableErrorTest extends QuorumPeerTestBase { + private static final String NODE_PATH = "/noLeaderIssue"; + + /** + * Test case for https://issues.apache.org/jira/browse/ZOOKEEPER-2247. + * Test to verify that even after non recoverable error (error while + * writing transaction log), ZooKeeper is still available. + */ + @Test(timeout = 30000) + public void testZooKeeperServiceAvailableOnLeader() throws Exception { + int SERVER_COUNT = 3; + final int clientPorts[] = new int[SERVER_COUNT]; + StringBuilder sb = new StringBuilder(); + String server; + + for (int i = 0; i < SERVER_COUNT; i++) { + clientPorts[i] = PortAssignment.unique(); + server = "server." + i + "=127.0.0.1:" + PortAssignment.unique() + + ":" + PortAssignment.unique() + ":participant;127.0.0.1:" + + clientPorts[i]; + sb.append(server + "\n"); + } + String currentQuorumCfgSection = sb.toString(); + MainThread mt[] = new MainThread[SERVER_COUNT]; + + for (int i = 0; i < SERVER_COUNT; i++) { + mt[i] = new MainThread(i, clientPorts[i], currentQuorumCfgSection, + false); + mt[i].start(); + } + + // ensure server started + for (int i = 0; i < SERVER_COUNT; i++) { + Assert.assertTrue("waiting for server " + i + " being up", + ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], + CONNECTION_TIMEOUT)); + } + + CountdownWatcher watcher = new CountdownWatcher(); + ZooKeeper zk = new ZooKeeper("127.0.0.1:" + clientPorts[0], + ClientBase.CONNECTION_TIMEOUT, watcher); + watcher.waitForConnected(ClientBase.CONNECTION_TIMEOUT); + + String data = "originalData"; + zk.create(NODE_PATH, data.getBytes(), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + + // get information of current leader + QuorumPeer leader = getLeaderQuorumPeer(mt); + assertNotNull("Leader must have been elected by now", leader); + + // inject problem in leader + FileTxnSnapLog snapLog = leader.getActiveServer().getTxnLogFactory(); + FileTxnSnapLog fileTxnSnapLogWithError = new FileTxnSnapLog( + snapLog.getDataDir(), snapLog.getSnapDir()) { + @Override + public void commit() throws IOException { + throw new IOException("Input/output error"); + } + }; + ZKDatabase originalZKDatabase = leader.getActiveServer() + .getZKDatabase(); + long leaderCurrentEpoch = leader.getCurrentEpoch(); + + ZKDatabase newDB = new ZKDatabase(fileTxnSnapLogWithError); + leader.getActiveServer().setZKDatabase(newDB); + + try { + // do create operation, so that injected IOException is thrown + zk.create(uniqueZnode(), data.getBytes(), Ids.OPEN_ACL_UNSAFE, + CreateMode.PERSISTENT); + fail("IOException is expected due to error injected to transaction log commit"); + } catch (Exception e) { + // do nothing + } + + // resetting watcher so that this watcher can be again used to ensure + // that the zkClient is able to re-establish connection with the + // newly elected zookeeper quorum. + watcher.reset(); + waitForNewLeaderElection(leader, leaderCurrentEpoch); + + // ensure server started, give enough time, so that new leader election + // takes place + for (int i = 0; i < SERVER_COUNT; i++) { + Assert.assertTrue("waiting for server " + i + " being up", + ClientBase.waitForServerUp("127.0.0.1:" + clientPorts[i], + CONNECTION_TIMEOUT)); + } + + // revert back the error + leader.getActiveServer().setZKDatabase(originalZKDatabase); + + // verify that now ZooKeeper service is up and running + leader = getLeaderQuorumPeer(mt); + assertNotNull("New leader must have been elected by now", leader); + + String uniqueNode = uniqueZnode(); + watcher.waitForConnected(ClientBase.CONNECTION_TIMEOUT); + String createNode = zk.create(uniqueNode, data.getBytes(), + Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); + // if node is created successfully then it means that ZooKeeper service + // is available + assertEquals("Failed to create znode", uniqueNode, createNode); + zk.close(); + // stop all severs + for (int i = 0; i < SERVER_COUNT; i++) { + mt[i].shutdown(); + } + } + + private void waitForNewLeaderElection(QuorumPeer peer, + long leaderCurrentEpoch) throws IOException, InterruptedException { + LOG.info("Waiting for new LE cycle.."); + int count = 100; // giving a grace period of 10seconds + while (count > 0) { + if (leaderCurrentEpoch == peer.getCurrentEpoch()) { + Thread.sleep(100); + } + count--; + } + Assert.assertNotEquals("New LE cycle must have triggered", + leaderCurrentEpoch, peer.getCurrentEpoch()); + } + + private QuorumPeer getLeaderQuorumPeer(MainThread[] mt) { + for (int i = mt.length - 1; i >= 0; i--) { + QuorumPeer quorumPeer = mt[i].getQuorumPeer(); + if (null != quorumPeer + && ServerState.LEADING == quorumPeer.getPeerState()) { + return quorumPeer; + } + } + return null; + } + + private String uniqueZnode() { + UUID randomUUID = UUID.randomUUID(); + String node = NODE_PATH + "/" + randomUUID.toString(); + return node; + } +}