zookeeper-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From iv...@apache.org
Subject svn commit: r1589636 - in /zookeeper/bookkeeper/trunk: ./ bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ bookkeeper-server/src/test/java/org/apache/bookkeeper/replication...
Date Thu, 24 Apr 2014 08:43:56 GMT
Author: ivank
Date: Thu Apr 24 08:43:56 2014
New Revision: 1589636

URL: http://svn.apache.org/r1589636
Log:
BOOKKEEPER-742: Fix for empty ledgers losing quorum. (ivank)

Modified:
    zookeeper/bookkeeper/trunk/CHANGES.txt
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java
    zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java

Modified: zookeeper/bookkeeper/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/CHANGES.txt?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/CHANGES.txt (original)
+++ zookeeper/bookkeeper/trunk/CHANGES.txt Thu Apr 24 08:43:56 2014
@@ -178,6 +178,8 @@ Trunk (unreleased changes)
 
         BOOKKEEPER-432: Improve performance of entry log range read per ledger entries (yixue,
sijie via ivank)
 
+        BOOKKEEPER-742: Fix for empty ledgers losing quorum. (ivank)
+
       hedwig-server:
 
         BOOKKEEPER-601: readahead cache size isn't updated correctly (sijie via fpj)

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/BookKeeperAdmin.java
Thu Apr 24 08:43:56 2014
@@ -879,4 +879,11 @@ public class BookKeeperAdmin {
             }
         };
     }
+
+    /**
+     * @return the metadata for the passed ledger handle
+     */
+    public LedgerMetadata getLedgerMetadata(LedgerHandle lh) {
+        return lh.getLedgerMetadata();
+    }
 }

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/Auditor.java
Thu Apr 24 08:43:56 2014
@@ -43,6 +43,8 @@ import org.apache.bookkeeper.util.ZkUtil
 import org.apache.bookkeeper.zookeeper.ZooKeeperWatcherBase;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.zookeeper.AsyncCallback;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.util.concurrent.SettableFuture;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.ZooKeeper;
 import org.slf4j.Logger;
@@ -54,6 +56,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.concurrent.Future;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ScheduledExecutorService;
@@ -137,48 +140,49 @@ public class Auditor implements BookiesL
         }
     }
 
-    private synchronized void submitAuditTask() {
-        synchronized (this) {
-            if (executor.isShutdown()) {
-                return;
-            }
-            executor.submit(new Runnable() {
-                    @SuppressWarnings("unchecked")
-                    public void run() {
-                        try {
-                            waitIfLedgerReplicationDisabled();
+    @VisibleForTesting
+    synchronized Future<?> submitAuditTask() {
+        if (executor.isShutdown()) {
+            SettableFuture<Void> f = SettableFuture.<Void>create();
+            f.setException(new BKAuditException("Auditor shutting down"));
+            return f;
+        }
+        return executor.submit(new Runnable() {
+                @SuppressWarnings("unchecked")
+                public void run() {
+                    try {
+                        waitIfLedgerReplicationDisabled();
 
-                            List<String> availableBookies = getAvailableBookies();
+                        List<String> availableBookies = getAvailableBookies();
 
-                            // casting to String, as knownBookies and availableBookies
-                            // contains only String values
-                            // find new bookies(if any) and update the known bookie list
-                            Collection<String> newBookies = CollectionUtils.subtract(
-                                    availableBookies, knownBookies);
-                            knownBookies.addAll(newBookies);
-
-                            // find lost bookies(if any)
-                            Collection<String> lostBookies = CollectionUtils.subtract(
-                                    knownBookies, availableBookies);
-
-                            if (lostBookies.size() > 0) {
-                                knownBookies.removeAll(lostBookies);
-                                Map<String, Set<Long>> ledgerDetails = generateBookie2LedgersIndex();
-                                handleLostBookies(lostBookies, ledgerDetails);
-                            }
-                        } catch (BKException bke) {
-                            LOG.error("Exception getting bookie list", bke);
-                        } catch (InterruptedException ie) {
-                            Thread.currentThread().interrupt();
-                            LOG.error("Interrupted while watching available bookies ", ie);
-                        } catch (BKAuditException bke) {
-                            LOG.error("Exception while watching available bookies", bke);
-                        } catch (UnavailableException ue) {
-                            LOG.error("Exception while watching available bookies", ue);
+                        // casting to String, as knownBookies and availableBookies
+                        // contains only String values
+                        // find new bookies(if any) and update the known bookie list
+                        Collection<String> newBookies = CollectionUtils.subtract(
+                                availableBookies, knownBookies);
+                        knownBookies.addAll(newBookies);
+
+                        // find lost bookies(if any)
+                        Collection<String> lostBookies = CollectionUtils.subtract(
+                                knownBookies, availableBookies);
+
+                        if (lostBookies.size() > 0) {
+                            knownBookies.removeAll(lostBookies);
+                            Map<String, Set<Long>> ledgerDetails = generateBookie2LedgersIndex();
+                            handleLostBookies(lostBookies, ledgerDetails);
                         }
+                    } catch (BKException bke) {
+                        LOG.error("Exception getting bookie list", bke);
+                    } catch (InterruptedException ie) {
+                        Thread.currentThread().interrupt();
+                        LOG.error("Interrupted while watching available bookies ", ie);
+                    } catch (BKAuditException bke) {
+                        LOG.error("Exception while watching available bookies", bke);
+                    } catch (UnavailableException ue) {
+                        LOG.error("Exception while watching available bookies", ue);
                     }
-                });
-        }
+                }
+            });
     }
 
     public void start() {

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AuditorElector.java
Thu Apr 24 08:43:56 2014
@@ -269,6 +269,11 @@ public class AuditorElector {
         executor.submit(r);
     }
 
+    @VisibleForTesting
+    Auditor getAuditor() {
+        return auditor;
+    }
+
     /**
      * Shutting down AuditorElector
      */

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/AutoRecoveryMain.java
Thu Apr 24 08:43:56 2014
@@ -24,6 +24,8 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 
+import com.google.common.annotations.VisibleForTesting;
+
 import org.apache.bookkeeper.bookie.Bookie;
 import org.apache.bookkeeper.bookie.BookieCriticalThread;
 import org.apache.bookkeeper.bookie.ExitCode;
@@ -144,6 +146,11 @@ public class AutoRecoveryMain {
         return exitCode;
     }
 
+    @VisibleForTesting
+    public Auditor getAuditor() {
+        return auditorElector.getAuditor();
+    }
+
     /** Is auto-recovery service running? */
     public boolean isAutoRecoveryRunning() {
         return running;

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationException.java
Thu Apr 24 08:43:56 2014
@@ -70,5 +70,9 @@ public abstract class ReplicationExcepti
         BKAuditException(String message, Throwable cause) {
             super(message, cause);
         }
+
+        BKAuditException(String message) {
+            super(message);
+        }
     }
 }

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/main/java/org/apache/bookkeeper/replication/ReplicationWorker.java
Thu Apr 24 08:43:56 2014
@@ -24,6 +24,9 @@ import java.util.List;
 import java.util.Set;
 import java.util.Timer;
 import java.util.TimerTask;
+import java.util.SortedMap;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.concurrent.CountDownLatch;
 
 import org.apache.bookkeeper.bookie.BookieThread;
@@ -36,6 +39,7 @@ import org.apache.bookkeeper.client.Book
 import org.apache.bookkeeper.client.LedgerChecker;
 import org.apache.bookkeeper.client.LedgerFragment;
 import org.apache.bookkeeper.client.LedgerHandle;
+import org.apache.bookkeeper.client.LedgerMetadata;
 import org.apache.bookkeeper.conf.ClientConfiguration;
 import org.apache.bookkeeper.conf.ServerConfiguration;
 import org.apache.bookkeeper.meta.LedgerManagerFactory;
@@ -197,7 +201,7 @@ public class ReplicationWorker implement
             }
         }
 
-        if (foundOpenFragments) {
+        if (foundOpenFragments || isLastSegmentOpenAndMissingBookies(lh)) {
             deferLedgerLockRelease(ledgerIdToReplicate);
             return;
         }
@@ -215,6 +219,45 @@ public class ReplicationWorker implement
         }
     }
 
+    /**
+     * When checking the fragments of a ledger, there is a corner case
+     * where if the last segment/ensemble is open, but nothing has been written to
+     * some of the quorums in the ensemble, bookies can fail without any action being
+     * taken. This is fine, until enough bookies fail to cause a quorum to become
+     * unavailable, by which time the ledger is unrecoverable.
+     *
+     * For example, if in a E3Q2, only 1 entry is written and the last bookie
+     * in the ensemble fails, nothing has been written to it, so nothing needs to be
+     * recovered. But if the second to last bookie fails, we've now lost quorum for
+     * the second entry, so it's impossible to see if the second has been written or
+     * not.
+     *
+     * To avoid this situation, we need to check if bookies in the final open ensemble
+     * are unavailable, and take action if so. The action to take is to close the ledger,
+     * after a grace period as the writting client may replace the faulty bookie on its
+     * own.
+     *
+     * Missing bookies in closed ledgers are fine, as we know the last confirmed add, so
+     * we can tell which entries are supposed to exist and rereplicate them if necessary.
+     */
+    private boolean isLastSegmentOpenAndMissingBookies(LedgerHandle lh) throws BKException
{
+        LedgerMetadata md = admin.getLedgerMetadata(lh);
+        if (md.isClosed()) {
+            return false;
+        }
+
+        SortedMap<Long, ArrayList<BookieSocketAddress>> ensembles
+            = admin.getLedgerMetadata(lh).getEnsembles();
+        ArrayList<BookieSocketAddress> finalEnsemble = ensembles.get(ensembles.lastKey());
+        Collection<BookieSocketAddress> available = admin.getAvailableBookies();
+        for (BookieSocketAddress b : finalEnsemble) {
+            if (!available.contains(b)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     /** Gets the under replicated fragments */
     private Set<LedgerFragment> getUnderreplicatedFragments(LedgerHandle lh)
             throws InterruptedException {
@@ -237,6 +280,10 @@ public class ReplicationWorker implement
                 LedgerHandle lh = null;
                 try {
                     lh = admin.openLedgerNoRecovery(ledgerId);
+                    if (isLastSegmentOpenAndMissingBookies(lh)) {
+                        lh = admin.openLedger(ledgerId);
+                    }
+
                     Set<LedgerFragment> fragments = getUnderreplicatedFragments(lh);
                     for (LedgerFragment fragment : fragments) {
                         if (!fragment.isClosed()) {

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/replication/BookieAutoRecoveryTest.java
Thu Apr 24 08:43:56 2014
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.SortedMap;
 import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.bookkeeper.client.BKException;
 import org.apache.bookkeeper.client.BookKeeper.DigestType;
@@ -37,7 +38,7 @@ import org.apache.bookkeeper.net.BookieS
 import org.apache.bookkeeper.proto.BookieServer;
 import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException;
 import org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
-import org.apache.bookkeeper.test.MultiLedgerManagerTestCase;
+import org.apache.bookkeeper.test.BookKeeperClusterTestCase;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
@@ -52,8 +53,7 @@ import org.slf4j.LoggerFactory;
  * Auditor-rereplication process: Auditor will publish the bookie failures,
  * consequently ReplicationWorker will get the notifications and act on it.
  */
-public class BookieAutoRecoveryTest extends
-        MultiLedgerManagerTestCase {
+public class BookieAutoRecoveryTest extends BookKeeperClusterTestCase {
     private static final Logger LOG = LoggerFactory
             .getLogger(BookieAutoRecoveryTest.class);
     private static final byte[] PASSWD = "admin".getBytes();
@@ -68,15 +68,15 @@ public class BookieAutoRecoveryTest exte
     private final String UNDERREPLICATED_PATH = baseClientConf
             .getZkLedgersRootPath() + "/underreplication/ledgers";
 
-    public BookieAutoRecoveryTest(String ledgerManagerFactory) throws IOException, KeeperException,
+    public BookieAutoRecoveryTest() throws IOException, KeeperException,
             InterruptedException, UnavailableException, CompatibilityException {
         super(3);
-        LOG.info("Running test case using ledger manager : "
-                + ledgerManagerFactory);
-        // set ledger manager name
-        baseConf.setLedgerManagerFactoryClassName(ledgerManagerFactory);
+
+        baseConf.setLedgerManagerFactoryClassName(
+                "org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory");
         baseConf.setOpenLedgerRereplicationGracePeriod(openLedgerRereplicationGracePeriod);
-        baseClientConf.setLedgerManagerFactoryClassName(ledgerManagerFactory);
+        baseClientConf.setLedgerManagerFactoryClassName(
+                "org.apache.bookkeeper.meta.HierarchicalLedgerManagerFactory");
         this.digestType = DigestType.MAC;
         setAutoRecoveryEnabled(true);
     }
@@ -342,6 +342,51 @@ public class BookieAutoRecoveryTest exte
         }
     }
 
+    /**
+     * Test that if a empty ledger loses the bookie not in the quorum for entry 0, it will
+     * still be openable when it loses enough bookies to lose a whole quorum.
+     */
+    @Test(timeout=10000)
+    public void testEmptyLedgerLosesQuorumEventually() throws Exception {
+        LedgerHandle lh = bkc.createLedger(3, 2, 2, DigestType.CRC32, PASSWD);
+        CountDownLatch latch = new CountDownLatch(1);
+        String urZNode = getUrLedgerZNode(lh);
+        watchUrLedgerNode(urZNode, latch);
+
+        BookieSocketAddress replicaToKill = LedgerHandleAdapter
+            .getLedgerMetadata(lh).getEnsembles().get(0L).get(2);
+        LOG.info("Killing last bookie, {}, in ensemble {}", replicaToKill,
+                 LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().get(0L));
+        killBookie(replicaToKill);
+
+        getAuditor().submitAuditTask().get(); // ensure auditor runs
+
+        assertTrue("Should be marked as underreplicated", latch.await(5, TimeUnit.SECONDS));
+        latch = new CountDownLatch(1);
+        Stat s = watchUrLedgerNode(urZNode, latch); // should be marked as replicated
+        if (s != null) {
+            assertTrue("Should be marked as replicated", latch.await(10, TimeUnit.SECONDS));
+        }
+
+        replicaToKill = LedgerHandleAdapter
+            .getLedgerMetadata(lh).getEnsembles().get(0L).get(1);
+        LOG.info("Killing second bookie, {}, in ensemble {}", replicaToKill,
+                 LedgerHandleAdapter.getLedgerMetadata(lh).getEnsembles().get(0L));
+        killBookie(replicaToKill);
+
+        getAuditor().submitAuditTask().get(); // ensure auditor runs
+
+        assertTrue("Should be marked as underreplicated", latch.await(5, TimeUnit.SECONDS));
+        latch = new CountDownLatch(1);
+        s = watchUrLedgerNode(urZNode, latch); // should be marked as replicated
+        if (s != null) {
+            assertTrue("Should be marked as replicated", latch.await(5, TimeUnit.SECONDS));
+        }
+
+        // should be able to open ledger without issue
+        bkc.openLedger(lh.getId(), DigestType.CRC32, PASSWD);
+    }
+
     private int getReplicaIndexInLedger(LedgerHandle lh,
  BookieSocketAddress replicaToKill) {
         SortedMap<Long, ArrayList<BookieSocketAddress>> ensembles = LedgerHandleAdapter

Modified: zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java?rev=1589636&r1=1589635&r2=1589636&view=diff
==============================================================================
--- zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java
(original)
+++ zookeeper/bookkeeper/trunk/bookkeeper-server/src/test/java/org/apache/bookkeeper/test/BookKeeperClusterTestCase.java
Thu Apr 24 08:43:56 2014
@@ -44,6 +44,7 @@ import org.apache.bookkeeper.metastore.I
 import org.apache.bookkeeper.net.BookieSocketAddress;
 import org.apache.bookkeeper.proto.BookieServer;
 import org.apache.bookkeeper.replication.AutoRecoveryMain;
+import org.apache.bookkeeper.replication.Auditor;
 import org.apache.bookkeeper.replication.ReplicationException.CompatibilityException;
 import org.apache.bookkeeper.replication.ReplicationException.UnavailableException;
 import org.apache.commons.io.FileUtils;
@@ -528,4 +529,14 @@ public abstract class BookKeeperClusterT
                     + autoRecoveryProcess.getKey().getLocalAddress());
         }
     }
+
+    public Auditor getAuditor() throws Exception {
+        for (AutoRecoveryMain p : autoRecoveryProcesses.values()) {
+            Auditor a = p.getAuditor();
+            if (a != null) {
+                return a;
+            }
+        }
+        throw new Exception("No auditor found");
+    }
 }



Mime
View raw message