zookeeper-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From h...@apache.org
Subject zookeeper git commit: ZOOKEEPER-2872: Interrupted snapshot sync causes data loss
Date Fri, 18 Aug 2017 04:32:47 GMT
Repository: zookeeper
Updated Branches:
  refs/heads/master ab182d456 -> 0706b40af


ZOOKEEPER-2872: Interrupted snapshot sync causes data loss

This requires the fix in ZOOKEEPER-2870: Improve the efficiency of AtomicFileOutputStream

Author: Brian Nixon <nixon@fb.com>

Reviewers: Michael Han <hanm@apache.org>

Closes #333 from enixon/snap-sync


Project: http://git-wip-us.apache.org/repos/asf/zookeeper/repo
Commit: http://git-wip-us.apache.org/repos/asf/zookeeper/commit/0706b40a
Tree: http://git-wip-us.apache.org/repos/asf/zookeeper/tree/0706b40a
Diff: http://git-wip-us.apache.org/repos/asf/zookeeper/diff/0706b40a

Branch: refs/heads/master
Commit: 0706b40afad079f19fe9f76c99bbb7ec69780dbd
Parents: ab182d4
Author: Brian Nixon <nixon@fb.com>
Authored: Thu Aug 17 21:32:43 2017 -0700
Committer: Michael Han <hanm@apache.org>
Committed: Thu Aug 17 21:32:43 2017 -0700

----------------------------------------------------------------------
 .../org/apache/zookeeper/server/ZooKeeperServer.java    |  8 ++++++--
 .../apache/zookeeper/server/persistence/FileSnap.java   | 12 ++++++++----
 .../zookeeper/server/persistence/FileTxnSnapLog.java    |  8 +++++---
 .../apache/zookeeper/server/persistence/SnapShot.java   |  8 +++++---
 .../org/apache/zookeeper/server/quorum/Learner.java     |  8 ++++++--
 .../org/apache/zookeeper/server/quorum/Zab1_0Test.java  |  6 +++---
 .../test/org/apache/zookeeper/test/TruncateTest.java    |  2 +-
 7 files changed, 34 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java
----------------------------------------------------------------------
diff --git a/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java b/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java
index f45ac09..1704826 100644
--- a/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java
+++ b/src/java/main/org/apache/zookeeper/server/ZooKeeperServer.java
@@ -300,9 +300,13 @@ public class ZooKeeperServer implements SessionExpirer, ServerStats.Provider
{
         takeSnapshot();
     }
 
-    public void takeSnapshot(){
+    public void takeSnapshot() {
+        takeSnapshot(false);
+    }
+
+    public void takeSnapshot(boolean syncSnap){
         try {
-            txnLogFactory.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts());
+            txnLogFactory.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts(), syncSnap);
         } catch (IOException e) {
             LOG.error("Severe unrecoverable error, exiting", e);
             // This is a severe error that we cannot recover from,

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/main/org/apache/zookeeper/server/persistence/FileSnap.java
----------------------------------------------------------------------
diff --git a/src/java/main/org/apache/zookeeper/server/persistence/FileSnap.java b/src/java/main/org/apache/zookeeper/server/persistence/FileSnap.java
index 30ce7c7..2ea714e 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/FileSnap.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/FileSnap.java
@@ -37,6 +37,7 @@ import org.apache.jute.BinaryInputArchive;
 import org.apache.jute.BinaryOutputArchive;
 import org.apache.jute.InputArchive;
 import org.apache.jute.OutputArchive;
+import org.apache.zookeeper.common.AtomicFileOutputStream;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.zookeeper.server.DataTree;
@@ -213,12 +214,15 @@ public class FileSnap implements SnapShot {
      * @param dt the datatree to be serialized
      * @param sessions the sessions to be serialized
      * @param snapShot the file to store snapshot into
+     * @param fsync sync the file immediately after write
      */
-    public synchronized void serialize(DataTree dt, Map<Long, Integer> sessions, File
snapShot)
+    public synchronized void serialize(DataTree dt, Map<Long, Integer> sessions, File
snapShot, boolean fsync)
             throws IOException {
         if (!close) {
-            try (OutputStream sessOS = new BufferedOutputStream(new FileOutputStream(snapShot));
-                 CheckedOutputStream crcOut = new CheckedOutputStream(sessOS, new Adler32()))
{
+            try (CheckedOutputStream crcOut =
+                         new CheckedOutputStream(new BufferedOutputStream(fsync ? new AtomicFileOutputStream(snapShot)
:
+                                                                                  new FileOutputStream(snapShot)),
+                                                 new Adler32())) {
                 //CheckedOutputStream cout = new CheckedOutputStream()
                 OutputArchive oa = BinaryOutputArchive.getArchive(crcOut);
                 FileHeader header = new FileHeader(SNAP_MAGIC, VERSION, dbId);
@@ -226,7 +230,7 @@ public class FileSnap implements SnapShot {
                 long val = crcOut.getChecksum().getValue();
                 oa.writeLong(val, "val");
                 oa.writeString("/", "path");
-                sessOS.flush();
+                crcOut.flush();
             }
         }
     }

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
----------------------------------------------------------------------
diff --git a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
index 8c2919f..3a03c81 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/FileTxnSnapLog.java
@@ -196,7 +196,7 @@ public class FileTxnSnapLog {
             if (trustEmptyDB) {
                 /* TODO: (br33d) we should either put a ConcurrentHashMap on restore()
                  *       or use Map on save() */
-                save(dt, (ConcurrentHashMap<Long, Integer>)sessions);
+                save(dt, (ConcurrentHashMap<Long, Integer>)sessions, false);
 
                 /* return a zxid of 0, since we know the database is empty */
                 return 0L;
@@ -335,16 +335,18 @@ public class FileTxnSnapLog {
      * @param dataTree the datatree to be serialized onto disk
      * @param sessionsWithTimeouts the session timeouts to be
      * serialized onto disk
+     * @param syncSnap sync the snapshot immediately after write
      * @throws IOException
      */
     public void save(DataTree dataTree,
-            ConcurrentHashMap<Long, Integer> sessionsWithTimeouts)
+                     ConcurrentHashMap<Long, Integer> sessionsWithTimeouts,
+                     boolean syncSnap)
         throws IOException {
         long lastZxid = dataTree.lastProcessedZxid;
         File snapshotFile = new File(snapDir, Util.makeSnapshotName(lastZxid));
         LOG.info("Snapshotting: 0x{} to {}", Long.toHexString(lastZxid),
                 snapshotFile);
-        snapLog.serialize(dataTree, sessionsWithTimeouts, snapshotFile);
+        snapLog.serialize(dataTree, sessionsWithTimeouts, snapshotFile, syncSnap);
 
     }
 

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/main/org/apache/zookeeper/server/persistence/SnapShot.java
----------------------------------------------------------------------
diff --git a/src/java/main/org/apache/zookeeper/server/persistence/SnapShot.java b/src/java/main/org/apache/zookeeper/server/persistence/SnapShot.java
index c964afc..257c12d 100644
--- a/src/java/main/org/apache/zookeeper/server/persistence/SnapShot.java
+++ b/src/java/main/org/apache/zookeeper/server/persistence/SnapShot.java
@@ -44,11 +44,13 @@ public interface SnapShot {
     /**
      * persist the datatree and the sessions into a persistence storage
      * @param dt the datatree to be serialized
-     * @param sessions 
+     * @param sessions the session timeouts to be serialized
+     * @param name the object name to store snapshot into
+     * @param fsync sync the snapshot immediately after write
      * @throws IOException
      */
-    void serialize(DataTree dt, Map<Long, Integer> sessions, 
-            File name) 
+    void serialize(DataTree dt, Map<Long, Integer> sessions,
+                   File name, boolean fsync)
         throws IOException;
     
     /**

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
----------------------------------------------------------------------
diff --git a/src/java/main/org/apache/zookeeper/server/quorum/Learner.java b/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
index f2cccd2..726e688 100644
--- a/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
+++ b/src/java/main/org/apache/zookeeper/server/quorum/Learner.java
@@ -361,6 +361,7 @@ public class Learner {
         // In the DIFF case we don't need to do a snapshot because the transactions will
sync on top of any existing snapshot
         // For SNAP and TRUNC the snapshot is needed to save that history
         boolean snapshotNeeded = true;
+        boolean syncSnapshot = false;
         readPacket(qp);
         LinkedList<Long> packetsCommitted = new LinkedList<Long>();
         LinkedList<PacketInFlight> packetsNotCommitted = new LinkedList<PacketInFlight>();
@@ -387,6 +388,9 @@ public class Learner {
                     throw new IOException("Missing signature");                   
                 }
                 zk.getZKDatabase().setlastProcessedZxid(qp.getZxid());
+
+                // immediately persist the latest snapshot when there is txn log gap
+                syncSnapshot = true;
             } else if (qp.getType() == Leader.TRUNC) {
                 //we need to truncate the log to the lastzxid of the leader
                 LOG.warn("Truncating log to get in sync with the leader 0x"
@@ -513,7 +517,7 @@ public class Learner {
                        }
                     }
                     if (isPreZAB1_0) {
-                        zk.takeSnapshot();
+                        zk.takeSnapshot(syncSnapshot);
                         self.setCurrentEpoch(newEpoch);
                     }
                     self.setZooKeeperServer(zk);
@@ -533,7 +537,7 @@ public class Learner {
                    }
 
                    if (snapshotNeeded) {
-                       zk.takeSnapshot();
+                       zk.takeSnapshot(syncSnapshot);
                    }
                    
                     self.setCurrentEpoch(newEpoch);

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java b/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java
index e7dc90b..acac87e 100644
--- a/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java
+++ b/src/java/test/org/apache/zookeeper/server/quorum/Zab1_0Test.java
@@ -435,7 +435,7 @@ public class Zab1_0Test extends ZKTestCase {
             Assert.assertTrue(zxid > ZxidUtils.makeZxid(1, 0));
             
             // Generate snapshot and close files.
-            snapLog.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts());
+            snapLog.save(zkDb.getDataTree(), zkDb.getSessionWithTimeOuts(), false);
             snapLog.close();
             
             QuorumPeer peer = createQuorumPeer(tmpDir);
@@ -714,7 +714,7 @@ public class Zab1_0Test extends ZKTestCase {
                     Assert.assertEquals(1, f.self.getAcceptedEpoch());
                     Assert.assertEquals(1, f.self.getCurrentEpoch());
                     //Make sure that we did take the snapshot now
-                    verify(f.zk).takeSnapshot();
+                    verify(f.zk).takeSnapshot(true);
                     Assert.assertEquals(firstZxid, f.fzk.getLastProcessedZxid());
                     
                     // Make sure the data was recorded in the filesystem ok
@@ -1367,7 +1367,7 @@ public class Zab1_0Test extends ZKTestCase {
             FileTxnSnapLog logFactory = new FileTxnSnapLog(tmpDir, tmpDir);
             File version2 = new File(tmpDir, "version-2");
             version2.mkdir();
-            logFactory.save(new DataTree(), new ConcurrentHashMap<Long, Integer>());
+            logFactory.save(new DataTree(), new ConcurrentHashMap<Long, Integer>(),
false);
             long zxid = ZxidUtils.makeZxid(3, 3);
             logFactory.append(new Request(1, 1, ZooDefs.OpCode.error,
                     new TxnHeader(1, 1, zxid, 1, ZooDefs.OpCode.error),

http://git-wip-us.apache.org/repos/asf/zookeeper/blob/0706b40a/src/java/test/org/apache/zookeeper/test/TruncateTest.java
----------------------------------------------------------------------
diff --git a/src/java/test/org/apache/zookeeper/test/TruncateTest.java b/src/java/test/org/apache/zookeeper/test/TruncateTest.java
index 955eb1e..ede38d0 100644
--- a/src/java/test/org/apache/zookeeper/test/TruncateTest.java
+++ b/src/java/test/org/apache/zookeeper/test/TruncateTest.java
@@ -75,7 +75,7 @@ public class TruncateTest extends ZKTestCase {
         ZKDatabase zkdb = new ZKDatabase(snaplog);
         // make sure to snapshot, so that we have something there when
         // truncateLog reloads the db
-        snaplog.save(zkdb.getDataTree(), zkdb.getSessionWithTimeOuts());
+        snaplog.save(zkdb.getDataTree(), zkdb.getSessionWithTimeOuts(), false);
 
         for (int i = 1; i <= 100; i++) {
             append(zkdb, i);


Mime
View raw message