hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tomwh...@apache.org
Subject svn commit: r563270 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/TaskRunner.java src/java/org/apache/hadoop/mapred/TaskTracker.java
Date Mon, 06 Aug 2007 20:52:01 GMT
Author: tomwhite
Date: Mon Aug  6 13:52:00 2007
New Revision: 563270

URL: http://svn.apache.org/viewvc?view=rev&rev=563270
Log:
HADOOP-1610.  Add metrics for failed tasks.  Contributed by Devaraj Das.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=563270&r1=563269&r2=563270
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon Aug  6 13:52:00 2007
@@ -18,6 +18,9 @@
     easier to read.  Also remove numbering, to make merging easier.
     (cutting)
 
+    HADOOP-1610.  Add metrics for failed tasks.
+    (Devaraj Das via tomwhite)
+
   OPTIMIZATIONS
 
     HADOOP-1565.  Reduce memory usage of NameNode by replacing 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java?view=diff&rev=563270&r1=563269&r2=563270
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskRunner.java Mon Aug  6 13:52:00
2007
@@ -421,6 +421,9 @@
       int exit_code = process.waitFor();
      
       if (!killed && exit_code != 0) {
+        if (exit_code == 65) {
+          tracker.getTaskTrackerMetrics().taskFailedPing();
+        }
         throw new IOException("Task process exit with nonzero status of " +
                               exit_code + ".");
       }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java?view=diff&rev=563270&r1=563269&r2=563270
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/TaskTracker.java Mon Aug  6 13:52:00
2007
@@ -213,9 +213,11 @@
       shuffleMetricsRecord.update();
     }
   }
-  private class TaskTrackerMetrics implements Updater {
+  public class TaskTrackerMetrics implements Updater {
     private MetricsRecord metricsRecord = null;
     private int numCompletedTasks = 0;
+    private int timedoutTasks = 0;
+    private int tasksFailedPing = 0;
       
     TaskTrackerMetrics() {
       JobConf conf = getJobConf();
@@ -232,6 +234,15 @@
     synchronized void completeTask() {
       ++numCompletedTasks;
     }
+    
+    synchronized void timedoutTask() {
+      ++timedoutTasks;
+    }
+    
+    synchronized void taskFailedPing() {
+      ++tasksFailedPing;
+    }
+    
     /**
      * Since this object is a registered updater, this method will be called
      * periodically, e.g. every 5 seconds.
@@ -243,15 +254,23 @@
           metricsRecord.setMetric("reduces_running", reduceTotal);
           metricsRecord.setMetric("taskSlots", (short)maxCurrentTasks);
           metricsRecord.incrMetric("tasks_completed", numCompletedTasks);
-          metricsRecord.update();
+          metricsRecord.incrMetric("tasks_failed_timeout", timedoutTasks);
+          metricsRecord.incrMetric("tasks_failed_ping", tasksFailedPing);
         }
         numCompletedTasks = 0;
+        timedoutTasks = 0;
+        tasksFailedPing = 0;
       }
+      metricsRecord.update();
     }
   }
     
   private TaskTrackerMetrics myMetrics = null;
 
+  public TaskTrackerMetrics getTaskTrackerMetrics() {
+    return myMetrics;
+  }
+  
   /**
    * A list of tips that should be cleaned up.
    */
@@ -991,6 +1010,7 @@
           LOG.info(tip.getTask().getTaskId() + ": " + msg);
           ReflectionUtils.logThreadInfo(LOG, "lost task", 30);
           tip.reportDiagnosticInfo(msg);
+          myMetrics.timedoutTask();
           purgeTask(tip, true);
         }
       }



Mime
View raw message