hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ha...@apache.org
Subject hadoop git commit: HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs. Contributed by Gautam Gopalakrishnan.
Date Sat, 28 Mar 2015 19:16:43 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 b1b495145 -> b679dc5a8


HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs. Contributed by Gautam
Gopalakrishnan.

(cherry picked from commit 3d9132d434c39e9b6e142e5cf9fd7a8afa4190a6)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b679dc5a
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b679dc5a
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b679dc5a

Branch: refs/heads/branch-2
Commit: b679dc5a8ea4bc27ac2945e492417df8b2ba5124
Parents: b1b4951
Author: Harsh J <harsh@cloudera.com>
Authored: Sun Mar 29 00:45:01 2015 +0530
Committer: Harsh J <harsh@cloudera.com>
Committed: Sun Mar 29 00:46:22 2015 +0530

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |  3 +
 .../hdfs/server/namenode/FSNamesystem.java      |  2 +-
 .../namenode/metrics/TestNameNodeMetrics.java   | 84 ++++++++++++++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b679dc5a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 8feffcb..1391b72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -36,6 +36,9 @@ Release 2.8.0 - UNRELEASED
 
   BUG FIXES
 
+    HDFS-7501. TransactionsSinceLastCheckpoint can be negative on SBNs.
+    (Gautam Gopalakrishnan via harsh)
+
     HDFS-5356. MiniDFSCluster should close all open FileSystems when shutdown()
     (Rakesh R via vinayakumarb)
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b679dc5a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 04d9d67..32dcd5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4784,7 +4784,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
   @Metric({"TransactionsSinceLastCheckpoint",
       "Number of transactions since last checkpoint"})
   public long getTransactionsSinceLastCheckpoint() {
-    return getEditLog().getLastWrittenTxId() -
+    return getFSImage().getLastAppliedOrWrittenTxId() -
         getFSImage().getStorage().getMostRecentCheckpointTxId();
   }
   

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b679dc5a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
index 63ab395..2ba609d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -22,12 +22,16 @@ import static org.apache.hadoop.test.MetricsAsserts.assertCounter;
 import static org.apache.hadoop.test.MetricsAsserts.assertGauge;
 import static org.apache.hadoop.test.MetricsAsserts.assertQuantileGauges;
 import static org.apache.hadoop.test.MetricsAsserts.getMetrics;
+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
 import java.io.DataInputStream;
 import java.io.IOException;
 import java.util.Random;
+import com.google.common.collect.ImmutableList;
+import com.google.common.io.Files;
 
+import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
@@ -39,6 +43,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -47,7 +52,9 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.hdfs.server.namenode.top.TopAuditLogger;
 import org.apache.hadoop.metrics2.MetricsRecordBuilder;
 import org.apache.hadoop.metrics2.MetricsSource;
@@ -69,6 +76,7 @@ public class TestNameNodeMetrics {
     new Path("/testNameNodeMetrics");
   private static final String NN_METRICS = "NameNodeActivity";
   private static final String NS_METRICS = "FSNamesystem";
+  public static final Log LOG = LogFactory.getLog(TestNameNodeMetrics.class);
   
   // Number of datanodes in the cluster
   private static final int DATANODE_COUNT = 3; 
@@ -398,6 +406,82 @@ public class TestNameNodeMetrics {
   }
   
   /**
+   * Testing TransactionsSinceLastCheckpoint. Need a new cluster as
+   * the other tests in here don't use HA. See HDFS-7501.
+   */
+  @Test(timeout = 300000)
+  public void testTransactionSinceLastCheckpointMetrics() throws Exception {
+    Random random = new Random();
+    int retryCount = 0;
+    while (retryCount < 5) {
+      try {
+        int basePort = 10060 + random.nextInt(100) * 2;
+        MiniDFSNNTopology topology = new MiniDFSNNTopology()
+            .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
+            .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(basePort))
+            .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(basePort + 1)));
+
+        HdfsConfiguration conf2 = new HdfsConfiguration();
+        // Lower the checkpoint condition for purpose of testing.
+        conf2.setInt(
+            DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY,
+            100);
+        // Check for checkpoint condition very often, for purpose of testing.
+        conf2.setInt(
+            DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY,
+            1);
+        // Poll and follow ANN txns very often, for purpose of testing.
+        conf2.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+        MiniDFSCluster cluster2 = new MiniDFSCluster.Builder(conf2)
+            .nnTopology(topology).numDataNodes(1).build();
+        cluster2.waitActive();
+        DistributedFileSystem fs2 = cluster2.getFileSystem(0);
+        NameNode nn0 = cluster2.getNameNode(0);
+        NameNode nn1 = cluster2.getNameNode(1);
+        cluster2.transitionToActive(0);
+        fs2.mkdirs(new Path("/tmp-t1"));
+        fs2.mkdirs(new Path("/tmp-t2"));
+        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+        // Test to ensure tracking works before the first-ever
+        // checkpoint.
+        assertEquals("SBN failed to track 2 transactions pre-checkpoint.",
+            4L, // 2 txns added further when catch-up is called.
+            cluster2.getNameNode(1).getNamesystem()
+              .getTransactionsSinceLastCheckpoint());
+        // Complete up to the boundary required for
+        // an auto-checkpoint. Using 94 to expect fsimage
+        // rounded at 100, as 4 + 94 + 2 (catch-up call) = 100.
+        for (int i = 1; i <= 94; i++) {
+          fs2.mkdirs(new Path("/tmp-" + i));
+        }
+        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+        // Assert 100 transactions in checkpoint.
+        HATestUtil.waitForCheckpoint(cluster2, 1, ImmutableList.of(100));
+        // Test to ensure number tracks the right state of
+        // uncheckpointed edits, and does not go negative
+        // (as fixed in HDFS-7501).
+        assertEquals("Should be zero right after the checkpoint.",
+            0L,
+            cluster2.getNameNode(1).getNamesystem()
+              .getTransactionsSinceLastCheckpoint());
+        fs2.mkdirs(new Path("/tmp-t3"));
+        fs2.mkdirs(new Path("/tmp-t4"));
+        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+        // Test to ensure we track the right numbers after
+        // the checkpoint resets it to zero again.
+        assertEquals("SBN failed to track 2 added txns after the ckpt.",
+            4L,
+            cluster2.getNameNode(1).getNamesystem()
+              .getTransactionsSinceLastCheckpoint());
+        cluster2.shutdown();
+        break;
+      } catch (Exception e) {
+        LOG.warn("Unable to set up HA cluster, exception thrown: " + e);
+        retryCount++;
+      }
+    }
+  }
+  /**
    * Test NN checkpoint and transaction-related metrics.
    */
   @Test


Mime
View raw message