hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From t...@apache.org
Subject svn commit: r1461777 - in /hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/qjournal/client/ src/main/java/org/apache/hadoop/hdfs/qjournal/server/
Date Wed, 27 Mar 2013 18:42:44 GMT
Author: todd
Date: Wed Mar 27 18:42:43 2013
New Revision: 1461777

URL: http://svn.apache.org/r1461777
Log:
HDFS-4621. Additional logging to help diagnose slow QJM syncs. Contributed by Todd Lipcon.

Modified:
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
    hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1461777&r1=1461776&r2=1461777&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Mar 27 18:42:43 2013
@@ -371,6 +371,8 @@ Release 2.0.5-beta - UNRELEASED
 
     HDFS-4635. Move BlockManager#computeCapacity to LightWeightGSet. (suresh)
 
+    HDFS-4621. Additional logging to help diagnose slow QJM syncs. (todd)
+
   OPTIMIZATIONS
 
   BUG FIXES

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java?rev=1461777&r1=1461776&r2=1461777&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/IPCLoggerChannel.java
Wed Mar 27 18:42:43 2013
@@ -133,6 +133,8 @@ public class IPCLoggerChannel implements
   private Stopwatch lastHeartbeatStopwatch = new Stopwatch();
   
   private static final long HEARTBEAT_INTERVAL_MILLIS = 1000;
+
+  private static final long WARN_JOURNAL_MILLIS_THRESHOLD = 1000;
   
   static final Factory FACTORY = new AsyncLogger.Factory() {
     @Override
@@ -371,6 +373,12 @@ public class IPCLoggerChannel implements
                 now - submitNanos, TimeUnit.NANOSECONDS);
             metrics.addWriteEndToEndLatency(endToEndTime);
             metrics.addWriteRpcLatency(rpcTime);
+            if (rpcTime / 1000 > WARN_JOURNAL_MILLIS_THRESHOLD) {
+              QuorumJournalManager.LOG.warn(
+                  "Took " + (rpcTime / 1000) + "ms to send a batch of " +
+                  numTxns + " edits (" + data.length + " bytes) to " +
+                  "remote journal " + IPCLoggerChannel.this);
+            }
           }
           synchronized (IPCLoggerChannel.this) {
             highestAckedTxId = firstTxnId + numTxns - 1;

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java?rev=1461777&r1=1461776&r2=1461777&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
Wed Mar 27 18:42:43 2013
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeoutExcep
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.util.Time;
 
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Maps;
 import com.google.common.util.concurrent.FutureCallback;
@@ -120,6 +121,15 @@ class QuorumCall<KEY, RESULT> {
         String msg = String.format(
             "Waited %s ms (timeout=%s ms) for a response for %s",
             waited, millis, operationName);
+        if (!successes.isEmpty()) {
+          msg += ". Succeeded so far: [" + Joiner.on(",").join(successes.keySet()) + "]";
+        }
+        if (!exceptions.isEmpty()) {
+          msg += ". Exceptions so far: [" + getExceptionMapString() + "]";
+        }
+        if (successes.isEmpty() && exceptions.isEmpty()) {
+          msg += ". No responses yet.";
+        }
         if (waited > millis * WAIT_PROGRESS_WARN_THRESHOLD) {
           QuorumJournalManager.LOG.warn(msg);
         } else {
@@ -227,4 +237,22 @@ class QuorumCall<KEY, RESULT> {
     }
     return sb.toString();
   }
+
+  /**
+   * Return a string suitable for displaying to the user, containing
+   * any exceptions that have been received so far.
+   */
+  private String getExceptionMapString() {
+    StringBuilder sb = new StringBuilder();
+    boolean first = true;
+    for (Map.Entry<KEY, Throwable> e : exceptions.entrySet()) {
+      if (!first) {
+        sb.append(", ");
+      }
+      first = false;
+      sb.append(e.getKey()).append(": ")
+        .append(e.getValue().getLocalizedMessage());
+    }
+    return sb.toString();
+  }
 }

Modified: hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java?rev=1461777&r1=1461776&r2=1461777&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
(original)
+++ hadoop/common/trunk/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/Journal.java
Wed Mar 27 18:42:43 2013
@@ -128,6 +128,10 @@ class Journal implements Closeable {
 
   private final JournalMetrics metrics;
 
+  /**
+   * Time threshold for sync calls, beyond which a warning should be logged to the console.
+   */
+  private static final int WARN_SYNC_MILLIS_THRESHOLD = 1000;
 
   Journal(File logDir, String journalId,
       StorageErrorReporter errorReporter) throws IOException {
@@ -370,6 +374,10 @@ class Journal implements Closeable {
     sw.stop();
     
     metrics.addSync(sw.elapsedTime(TimeUnit.MICROSECONDS));
+    if (sw.elapsedTime(TimeUnit.MILLISECONDS) > WARN_SYNC_MILLIS_THRESHOLD) {
+      LOG.warn("Sync of transaction range " + firstTxnId + "-" + lastTxnId +
+               " took " + sw.elapsedTime(TimeUnit.MILLISECONDS) + "ms");
+    }
 
     if (isLagging) {
       // This batch of edits has already been committed on a quorum of other



Mime
View raw message