Return-Path: X-Original-To: apmail-zookeeper-commits-archive@www.apache.org Delivered-To: apmail-zookeeper-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6275110446 for ; Fri, 7 Jun 2013 19:43:17 +0000 (UTC) Received: (qmail 8456 invoked by uid 500); 7 Jun 2013 19:43:17 -0000 Delivered-To: apmail-zookeeper-commits-archive@zookeeper.apache.org Received: (qmail 8427 invoked by uid 500); 7 Jun 2013 19:43:17 -0000 Mailing-List: contact commits-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@ Delivered-To: mailing list commits@zookeeper.apache.org Received: (qmail 8419 invoked by uid 99); 7 Jun 2013 19:43:16 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Jun 2013 19:43:16 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Jun 2013 19:43:12 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 187B72388A64; Fri, 7 Jun 2013 19:42:52 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1490797 - in /zookeeper/bookkeeper/branches/branch-4.2: ./ bookkeeper-server/src/main/java/org/apache/bookkeeper/client/ bookkeeper-server/src/test/java/org/apache/bookkeeper/client/ Date: Fri, 07 Jun 2013 19:42:51 -0000 To: commits@zookeeper.apache.org From: ivank@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130607194252.187B72388A64@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: ivank Date: Fri Jun 7 19:42:49 2013 New Revision: 1490797 URL: http://svn.apache.org/r1490797 Log: BOOKKEEPER-584: Data loss when ledger metadata is overwritten (sijie via ivank) Modified: zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java Modified: zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt?rev=1490797&r1=1490796&r2=1490797&view=diff ============================================================================== --- zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt (original) +++ zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt Fri Jun 7 19:42:49 2013 @@ -20,6 +20,8 @@ Release 4.2.2 - Unreleased BOOKKEEPER-596: Ledgers are gc'ed by mistake in MSLedgerManagerFactory. (sijie via ivank) + BOOKKEEPER-584: Data loss when ledger metadata is overwritten (sijie via ivank) + hedwig-server: BOOKKEEPER-579: TestSubAfterCloseSub was put in a wrong package (sijie via ivank) Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java?rev=1490797&r1=1490796&r2=1490797&view=diff ============================================================================== --- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java (original) +++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java Fri Jun 7 19:42:49 2013 @@ -1,5 +1,3 @@ -package org.apache.bookkeeper.client; - /* * * Licensed to the Apache Software Foundation (ASF) under one @@ -20,6 +18,7 @@ package org.apache.bookkeeper.client; * under the License. * */ +package org.apache.bookkeeper.client; import java.net.InetSocketAddress; import java.security.GeneralSecurityException; @@ -297,7 +296,12 @@ public class LedgerHandle { } metadata.setLength(prevLength); - if (metadata.resolveConflict(newMeta)) { + if (!metadata.isNewerThan(newMeta) + && !metadata.isConflictWith(newMeta)) { + // use the new metadata's ensemble, in case re-replication already + // replaced some bookies in the ensemble. + metadata.setEnsembles(newMeta.getEnsembles()); + metadata.setVersion(newMeta.version); metadata.setLength(length); metadata.close(lastAddConfirmed); writeLedgerConfig(new CloseCb()); @@ -485,7 +489,7 @@ public class LedgerHandle { final long currentLength; synchronized(this) { // synchronized on this to ensure that - // the ledger isn't closed between checking and + // the ledger isn't closed between checking and // updating lastAddPushed if (metadata.isClosed()) { LOG.warn("Attempt to add to closed ledger: " + ledgerId); @@ -532,6 +536,7 @@ public class LedgerHandle { public void asyncReadLastConfirmed(final ReadLastConfirmedCallback cb, final Object ctx) { ReadLastConfirmedOp.LastConfirmedDataCallback innercb = new ReadLastConfirmedOp.LastConfirmedDataCallback() { + @Override public void readLastConfirmedDataComplete(int rc, DigestManager.RecoveryData data) { if (rc == BKException.Code.OK) { lastAddConfirmed = Math.max(lastAddConfirmed, data.lastAddConfirmed); @@ -781,6 +786,8 @@ public class LedgerHandle { } /** + * Specific resolve conflicts happened when multiple bookies failures in same ensemble. + *

* Resolving the version conflicts between local ledgerMetadata and zk * ledgerMetadata. This will do the following: *

    @@ -790,23 +797,35 @@ public class LedgerHandle { * if the zk ledgerMetadata still contains the failed bookie, then * update zookeeper with the newBookie otherwise send write request *
+ *

*/ private boolean resolveConflict(LedgerMetadata newMeta) { - // close have changed, another client has opened - // the ledger, can't resolve this conflict. + // make sure the ledger isn't closed by other ones. if (metadata.getState() != newMeta.getState()) { return false; } - // update znode version - metadata.setVersion(newMeta.getVersion()); - // Resolve the conflicts if zk metadata still contains failed - // bookie. + + // If the failed the bookie is still existed in the metadata (in zookeeper), it means that + // the ensemble change of the failed bookie is failed due to metadata conflicts. so try to + // update the ensemble change metadata again. Otherwise, it means that the ensemble change + // is already succeed, unset the success and re-adding entries. if (newMeta.currentEnsemble.get(ensembleInfo.bookieIndex).equals( ensembleInfo.addr)) { - // Update ledger metadata in zk, if in-memory metadata doesn't - // contains the failed bookie. + // If the in-memory data doesn't contains the failed bookie, it means the ensemble change + // didn't finish, so try to resolve conflicts with the metadata read from zookeeper and + // update ensemble changed metadata again. if (!metadata.currentEnsemble.get(ensembleInfo.bookieIndex) .equals(ensembleInfo.addr)) { + // if the local metadata is newer than zookeeper metadata, it means that metadata is updated + // again when it was trying re-reading the metatada, re-kick the reread again + if (metadata.isNewerThan(newMeta)) { + rereadMetadata(this); + return true; + } + // make sure the metadata doesn't changed by other ones. + if (metadata.isConflictWith(newMeta)) { + return false; + } LOG.info("Resolve ledger metadata conflict " + "while changing ensemble to: " + ensembleInfo.newEnsemble @@ -814,6 +833,8 @@ public class LedgerHandle { + new String(metadata.serialize()) + "\n, new meta data is \n" + new String(newMeta.serialize())); + // update znode version + metadata.setVersion(newMeta.getVersion()); writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo)); } } else { @@ -906,6 +927,7 @@ public class LedgerHandle { * @param ctx * control object */ + @Override public void readComplete(int rc, LedgerHandle lh, Enumeration seq, Object ctx) { @@ -934,6 +956,7 @@ public class LedgerHandle { * @param ctx * control object */ + @Override public void addComplete(int rc, LedgerHandle lh, long entry, Object ctx) { SyncCounter counter = (SyncCounter) ctx; @@ -947,6 +970,7 @@ public class LedgerHandle { /** * Implementation of callback interface for synchronous read last confirmed method. */ + @Override public void readLastConfirmedComplete(int rc, long lastConfirmed, Object ctx) { LastConfirmedCtx lcCtx = (LastConfirmedCtx) ctx; @@ -966,6 +990,7 @@ public class LedgerHandle { * @param lh * @param ctx */ + @Override public void closeComplete(int rc, LedgerHandle lh, Object ctx) { SyncCounter counter = (SyncCounter) ctx; counter.setrc(rc); Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java?rev=1490797&r1=1490796&r2=1490797&view=diff ============================================================================== --- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java (original) +++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java Fri Jun 7 19:42:49 2013 @@ -139,6 +139,10 @@ public class LedgerMetadata { return ensembles; } + void setEnsembles(SortedMap> ensembles) { + this.ensembles = ensembles; + } + public int getEnsembleSize() { return ensembleSize; } @@ -433,13 +437,26 @@ public class LedgerMetadata { } /** - * Resolve conflict with new updated metadata. + * Is the metadata newer that given newMeta. + * + * @param newMeta + * @return + */ + boolean isNewerThan(LedgerMetadata newMeta) { + if (null == version) { + return false; + } + return Version.Occurred.AFTER == version.compare(newMeta.version); + } + + /** + * Is the metadata conflict with new updated metadata. * * @param newMeta * Re-read metadata - * @return true if the conflict has been resolved, otherwise false. + * @return true if the metadata is conflict. */ - boolean resolveConflict(LedgerMetadata newMeta) { + boolean isConflictWith(LedgerMetadata newMeta) { /* * if length & close have changed, then another client has * opened the ledger, can't resolve this conflict. @@ -453,22 +470,17 @@ public class LedgerMetadata { state != newMeta.state || !digestType.equals(newMeta.digestType) || !Arrays.equals(password, newMeta.password)) { - return false; + return true; } if (state == LedgerMetadataFormat.State.CLOSED && lastEntryId != newMeta.lastEntryId) { - return false; - } - // new meta znode version should be larger than old one - if (null != version && - Version.Occurred.AFTER == version.compare(newMeta.version)) { - return false; + return true; } // if ledger is closed, we can just take the new ensembles if (newMeta.state != LedgerMetadataFormat.State.CLOSED) { // ensemble size should be same if (ensembles.size() != newMeta.ensembles.size()) { - return false; + return true; } // ensemble distribution should be same // we don't check the detail ensemble, since new bookie will be set @@ -479,16 +491,10 @@ public class LedgerMetadata { Long curKey = keyIter.next(); Long newMetaKey = newMetaKeyIter.next(); if (!curKey.equals(newMetaKey)) { - return false; + return true; } } } - /* - * if the conflict has been resolved, then update - * ensemble and znode version - */ - ensembles = newMeta.ensembles; - version = newMeta.version; - return true; + return false; } } Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java?rev=1490797&r1=1490796&r2=1490797&view=diff ============================================================================== --- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java (original) +++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/PendingAddOp.java Fri Jun 7 19:42:49 2013 @@ -21,11 +21,11 @@ import java.util.HashSet; import java.util.Set; import java.net.InetSocketAddress; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; -import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.proto.BookieProtocol; +import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; +import org.jboss.netty.buffer.ChannelBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.jboss.netty.buffer.ChannelBuffer; /** * This represents a pending add operation. When it has got success from all @@ -132,6 +132,11 @@ class PendingAddOp implements WriteCallb public void writeComplete(int rc, long ledgerId, long entryId, InetSocketAddress addr, Object ctx) { int bookieIndex = (Integer) ctx; + if (completed) { + // I am already finished, ignore incoming responses. + // otherwise, we might hit the following error handling logic, which might cause bad things. + return; + } switch (rc) { case BKException.Code.OK: Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java?rev=1490797&r1=1490796&r2=1490797&view=diff ============================================================================== --- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java (original) +++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieWriteLedgerTest.java Fri Jun 7 19:42:49 2013 @@ -58,6 +58,7 @@ public class BookieWriteLedgerTest exten private static class SyncObj { volatile int counter; + volatile int rc; public SyncObj() { counter = 0; @@ -182,6 +183,7 @@ public class BookieWriteLedgerTest exten LOG.debug("Entries counter = " + syncObj1.counter); syncObj1.wait(); } + assertEquals(BKException.Code.OK, syncObj1.rc); } // wait for all entries to be acknowledged for the second ledger synchronized (syncObj2) { @@ -189,6 +191,7 @@ public class BookieWriteLedgerTest exten LOG.debug("Entries counter = " + syncObj2.counter); syncObj2.wait(); } + assertEquals(BKException.Code.OK, syncObj2.rc); } // reading ledger till the last entry @@ -217,12 +220,9 @@ public class BookieWriteLedgerTest exten @Override public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { - if (rc != BKException.Code.OK) - fail("Return code is not OK: " + rc); - SyncObj x = (SyncObj) ctx; - synchronized (x) { + x.rc = rc; x.counter++; x.notify(); }