Return-Path: X-Original-To: apmail-bookkeeper-commits-archive@www.apache.org Delivered-To: apmail-bookkeeper-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DB7A21989C for ; Wed, 16 Mar 2016 03:51:14 +0000 (UTC) Received: (qmail 59302 invoked by uid 500); 16 Mar 2016 03:51:14 -0000 Delivered-To: apmail-bookkeeper-commits-archive@bookkeeper.apache.org Received: (qmail 59268 invoked by uid 500); 16 Mar 2016 03:51:14 -0000 Mailing-List: contact commits-help@bookkeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: bookkeeper-dev@bookkeeper.apache.org Delivered-To: mailing list commits@bookkeeper.apache.org Received: (qmail 59259 invoked by uid 99); 16 Mar 2016 03:51:14 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 16 Mar 2016 03:51:14 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A21B9DFA42; Wed, 16 Mar 2016 03:51:14 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sijie@apache.org To: commits@bookkeeper.apache.org Message-Id: <92e2b3b7a2d04804beae6f8d20573887@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: bookkeeper git commit: BOOKKEEPER-594: AutoRecovery shutting down on SyncDisconnected Date: Wed, 16 Mar 2016 03:51:14 +0000 (UTC) Repository: bookkeeper Updated Branches: refs/heads/master 9a8d62b1d -> 1a98088e3 BOOKKEEPER-594: AutoRecovery shutting down on SyncDisconnected Author: Matteo Merli Reviewers: Sijie Guo Closes #26 from merlimat/bk-594 Project: http://git-wip-us.apache.org/repos/asf/bookkeeper/repo Commit: http://git-wip-us.apache.org/repos/asf/bookkeeper/commit/1a98088e Tree: http://git-wip-us.apache.org/repos/asf/bookkeeper/tree/1a98088e Diff: http://git-wip-us.apache.org/repos/asf/bookkeeper/diff/1a98088e Branch: refs/heads/master Commit: 1a98088e38ca0a43c26d9a4847619b0a27bb90e8 Parents: 9a8d62b Author: Matteo Merli Authored: Tue Mar 15 20:51:00 2016 -0700 Committer: Sijie Guo Committed: Tue Mar 15 20:51:00 2016 -0700 ---------------------------------------------------------------------- .../replication/AutoRecoveryMain.java | 10 ++++++ .../replication/ReplicationWorker.java | 33 +++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/bookkeeper/blob/1a98088e/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java ---------------------------------------------------------------------- diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java index 4a84b3c..3039470 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java @@ -109,6 +109,15 @@ public class AutoRecoveryMain { deathWatcher = new AutoRecoveryDeathWatcher(this); } + public AutoRecoveryMain(ServerConfiguration conf, ZooKeeper zk) throws IOException, InterruptedException, KeeperException, + UnavailableException, CompatibilityException { + this.conf = conf; + this.zk = zk; + auditorElector = new AuditorElector(Bookie.getBookieAddress(conf).toString(), conf, zk); + replicationWorker = new ReplicationWorker(zk, conf, Bookie.getBookieAddress(conf)); + deathWatcher = new AutoRecoveryDeathWatcher(this); + } + /* * Start daemons */ @@ -134,6 +143,7 @@ public class AutoRecoveryMain { } private void shutdown(int exitCode) { + LOG.info("Shutting down auto recovery: {}", exitCode); if (shuttingDown) { return; } http://git-wip-us.apache.org/repos/asf/bookkeeper/blob/1a98088e/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java ---------------------------------------------------------------------- diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java index 1a10667..e26d09a 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java @@ -156,25 +156,42 @@ public class ReplicationWorker implements Runnable { try { rereplicate(); } catch (InterruptedException e) { - shutdown(); - Thread.currentThread().interrupt(); LOG.info("InterruptedException " + "while replicating fragments", e); + shutdown(); + Thread.currentThread().interrupt(); return; } catch (BKException e) { - shutdown(); LOG.error("BKException while replicating fragments", e); - return; + if (e instanceof BKException.BKWriteOnReadOnlyBookieException) { + waitTillTargetBookieIsWritable(); + } else { + waitBackOffTime(); + } } catch (UnavailableException e) { - shutdown(); LOG.error("UnavailableException " + "while replicating fragments", e); - return; + waitBackOffTime(); } } LOG.info("ReplicationWorker exited loop!"); } + private static void waitBackOffTime() { + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + } + } + + private void waitTillTargetBookieIsWritable() { + LOG.info("Waiting for target bookie {} to be back in read/write mode", targetBookie); + while (admin.getReadOnlyBookies().contains(targetBookie)) { + waitBackOffTime(); + } + LOG.info("Target bookie {} is back in read/write mode", targetBookie); + } + /** * Replicates the under replicated fragments from failed bookie ledger to * targetBookie @@ -378,9 +395,9 @@ public class ReplicationWorker implements Runnable { underreplicationManager .releaseUnderreplicatedLedger(ledgerId); } catch (UnavailableException e) { - shutdown(); LOG.error("UnavailableException " + "while replicating fragments", e); + shutdown(); } } } @@ -393,6 +410,8 @@ public class ReplicationWorker implements Runnable { * Stop the replication worker service */ public void shutdown() { + LOG.info("Shutting down replication worker"); + synchronized (this) { if (!workerRunning) { return;