hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r555114 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/mapred/ src/webapps/job/
Date Tue, 10 Jul 2007 23:30:42 GMT
Author: omalley
Date: Tue Jul 10 16:30:41 2007
New Revision: 555114

URL: http://svn.apache.org/viewvc?view=rev&rev=555114
Log:
HADOOP-1554.  Log killed tasks in the JobHistory. Contributed by Devaraj.


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
    lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp
    lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jul 10 16:30:41 2007
@@ -283,6 +283,8 @@
  87. HADOOP-1571.  Add contrib lib directories to root build.xml
      javadoc classpath.  (Michael Stack via tomwhite)
 
+ 88. HADOOP-1554.  Log killed tasks to the job history and display them on the
+     web/ui. (Devaraj Das via omalley)
 
 Release 0.13.0 - 2007-06-08
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/DefaultJobHistoryParser.java Tue
Jul 10 16:30:41 2007
@@ -169,7 +169,7 @@
   
   
   // call this only for jobs that succeeded for better results. 
-  static class BadNodesFilter implements JobHistory.Listener {
+  static class FailedOnNodesFilter implements JobHistory.Listener {
     private Map<String, Set<String>> badNodesToNumFailedTasks =
       new HashMap<String, Set<String>>();
     
@@ -183,6 +183,34 @@
           recType.equals(JobHistory.RecordTypes.ReduceAttempt)) {
         
         if (Values.FAILED.name().equals(values.get(Keys.TASK_STATUS)) ){
+          String hostName = values.get(Keys.HOSTNAME);
+          String taskid = values.get(Keys.TASKID); 
+          Set<String> tasks = badNodesToNumFailedTasks.get(hostName); 
+          if (null == tasks ){
+            tasks = new TreeSet<String>(); 
+            tasks.add(taskid);
+            badNodesToNumFailedTasks.put(hostName, tasks);
+          }else{
+            tasks.add(taskid);
+          }
+        }
+      }      
+    }
+  }
+  static class KilledOnNodesFilter implements JobHistory.Listener {
+    private Map<String, Set<String>> badNodesToNumFailedTasks =
+      new HashMap<String, Set<String>>();
+    
+    Map<String, Set<String>> getValues(){
+      return badNodesToNumFailedTasks; 
+    }
+    public void handle(JobHistory.RecordTypes recType, Map<Keys, String> values)
+      throws IOException {
+      
+      if (recType.equals(JobHistory.RecordTypes.MapAttempt) || 
+          recType.equals(JobHistory.RecordTypes.ReduceAttempt)) {
+        
+        if (Values.KILLED.name().equals(values.get(Keys.TASK_STATUS)) ){
           String hostName = values.get(Keys.HOSTNAME);
           String taskid = values.get(Keys.TASKID); 
           Set<String> tasks = badNodesToNumFailedTasks.get(hostName); 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobHistory.java Tue Jul 10 16:30:41
2007
@@ -564,6 +564,28 @@
                                        String.valueOf(timestamp), hostName, error}); 
         }
       }
+    }
+    /**
+     * Log task attempt failed event.  
+     * @param jobId jobid
+     * @param taskId taskid
+     * @param taskAttemptId task attempt id
+     * @param timestamp timestamp
+     * @param hostName hostname of this task attempt.
+     * @param error error message if any for this task attempt. 
+     */
+    public static void logKilled(String jobId, String taskId, String taskAttemptId, 
+                                 long timestamp, String hostName, String error){
+      if (!disableHistory){
+        PrintWriter writer = (PrintWriter)openJobs.get(JOBTRACKER_START_TIME + "_" + jobId);
+        if (null != writer){
+          JobHistory.log(writer, RecordTypes.MapAttempt, 
+                         new Enum[]{Keys.TASK_TYPE, Keys.TASKID, Keys.TASK_ATTEMPT_ID, Keys.TASK_STATUS,

+                                    Keys.FINISH_TIME, Keys.HOSTNAME, Keys.ERROR},
+                         new String[]{ Values.MAP.name(), taskId, taskAttemptId, Values.KILLED.name(),
+                                       String.valueOf(timestamp), hostName, error}); 
+        }
+      }
     } 
   }
   /**
@@ -638,6 +660,29 @@
         }
       }
     }
+    /**
+     * Log failed reduce task attempt. 
+     * @param jobId job id 
+     * @param taskId task id
+     * @param taskAttemptId task attempt id
+     * @param timestamp time stamp when task failed
+     * @param hostName host name of the task attempt.  
+     * @param error error message of the task. 
+     */
+    public static void logKilled(String jobId, String taskId, String taskAttemptId, long
timestamp, 
+                                 String hostName, String error){
+      if (!disableHistory){
+        PrintWriter writer = (PrintWriter)openJobs.get(JOBTRACKER_START_TIME + "_" + jobId);
+        if (null != writer){
+          JobHistory.log(writer, RecordTypes.ReduceAttempt, 
+                         new Enum[]{  Keys.TASK_TYPE, Keys.TASKID, Keys.TASK_ATTEMPT_ID,
Keys.TASK_STATUS, 
+                                      Keys.FINISH_TIME, Keys.HOSTNAME, Keys.ERROR },
+                         new String[]{ Values.REDUCE.name(), taskId, taskAttemptId, Values.KILLED.name(),

+                                       String.valueOf(timestamp), hostName, error }); 
+        }
+      }
+    }
+
   }
   /**
    * Callback interface for reading back log events from JobHistory. This interface 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobInProgress.java Tue Jul 10 16:30:41
2007
@@ -929,18 +929,30 @@
     String taskTrackerName = status.getTaskTracker();
     if (status.getIsMap()) {
       JobHistory.MapAttempt.logStarted(profile.getJobId(), 
-                                       tip.getTIPId(), status.getTaskId(), status.getStartTime(),

-                                       taskTrackerName); 
-      JobHistory.MapAttempt.logFailed(profile.getJobId(), 
-                                      tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
-                                      taskTrackerName, status.getDiagnosticInfo()); 
+                tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
+                taskTrackerName);
+      if (status.getRunState() == TaskStatus.State.FAILED) {
+        JobHistory.MapAttempt.logFailed(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      } else {
+        JobHistory.MapAttempt.logKilled(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      }
     } else {
       JobHistory.ReduceAttempt.logStarted(profile.getJobId(), 
-                                          tip.getTIPId(), status.getTaskId(), status.getStartTime(),

-                                          taskTrackerName); 
-      JobHistory.ReduceAttempt.logFailed(profile.getJobId(), 
-                                         tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
-                                         taskTrackerName, status.getDiagnosticInfo()); 
+                tip.getTIPId(), status.getTaskId(), status.getStartTime(), 
+                taskTrackerName);
+      if (status.getRunState() == TaskStatus.State.FAILED) {
+        JobHistory.ReduceAttempt.logFailed(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      } else {
+        JobHistory.ReduceAttempt.logKilled(profile.getJobId(), 
+                tip.getTIPId(), status.getTaskId(), System.currentTimeMillis(),
+                taskTrackerName, status.getDiagnosticInfo());
+      }
     }
         
     // After this, try to assign tasks with the one after this, so that

Modified: lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp (original)
+++ lucene/hadoop/trunk/src/webapps/job/jobdetailshistory.jsp Tue Jul 10 16:30:41 2007
@@ -29,26 +29,29 @@
 <b>Launched At : </b> <%=StringUtils.getFormattedTimeWithDiff(dateFormat,
job.getLong(Keys.LAUNCH_TIME), job.getLong(Keys.SUBMIT_TIME)) %><br/>
 <b>Finished At : </b>  <%=StringUtils.getFormattedTimeWithDiff(dateFormat,
job.getLong(Keys.FINISH_TIME), job.getLong(Keys.LAUNCH_TIME)) %><br/>
 <b>Status : </b> <%= ((job.get(Keys.JOB_STATUS) == null)?"Incomplete" :job.get(Keys.JOB_STATUS))
%><br/> 
-<b><a href="analysejobhistory.jsp?jobid=<%=jobid %>&jobTrackerId=<%=jobTrackerId
%>">Analyse This Job</a></b> 
-<hr/>
-<center>
 <%
 	Map<String, JobHistory.Task> tasks = job.getAllTasks();
 	int totalMaps = 0 ; 
 	int totalReduces = 0; 
 	int failedMaps = 0; 
+	int killedMaps = 0;
 	int failedReduces = 0 ; 
+	int killedReduces = 0;
 	
 	long mapStarted = 0 ; 
 	long mapFinished = 0 ; 
 	long reduceStarted = 0 ; 
 	long reduceFinished = 0; 
+        
+        Map <String,String> allHosts = new TreeMap<String,String>();
 	
 	for( JobHistory.Task task : tasks.values() ) {
 	  
 	  long startTime = task.getLong(Keys.START_TIME) ; 
 	  long finishTime = task.getLong(Keys.FINISH_TIME) ; 
 	  
+          allHosts.put(task.get(Keys.HOSTNAME), null);
+
 	  if( Values.MAP.name().equals(task.get(Keys.TASK_TYPE)) ){
 	    if( mapStarted==0 || mapStarted > startTime ){
 	      mapStarted = startTime; 
@@ -63,6 +66,9 @@
 	        if( Values.FAILED.name().equals(attempt.get(Keys.TASK_STATUS)) ) {
 	            failedMaps++; 
 	        }
+	        if( Values.KILLED.name().equals(attempt.get(Keys.TASK_STATUS)) ) {
+	            killedMaps++; 
+	        }
 	    }
 	  }else{
 	    if( reduceStarted==0||reduceStarted > startTime ){
@@ -77,13 +83,20 @@
 	        if( Values.FAILED.name().equals(attempt.get(Keys.TASK_STATUS)) ) {
 	            failedReduces++; 
 	        }
+	        if( Values.KILLED.name().equals(attempt.get(Keys.TASK_STATUS)) ) {
+	            killedReduces++; 
+	        }
 	    }
 	  }
 	}
 %>
+<b>Number of nodes used: </b> <%=allHosts.size() %><br/>
+<b><a href="analysejobhistory.jsp?jobid=<%=jobid %>&jobTrackerId=<%=jobTrackerId
%>">Analyse This Job</a></b> 
+<hr/>
+<center>
 <table border="2" cellpadding="5" cellspacing="2">
 <tr>
-<td>Kind</td><td>Total Tasks</td><td>Finished tasks</td><td>Failed
tasks</td><td>Start Time</td><td>Finish Time</td>
+<td>Kind</td><td>Total Tasks(successful+failed+killed)</td><td>Successful
tasks</td><td>Failed tasks</td><td>Killed tasks</td><td>Start
Time</td><td>Finish Time</td>
 </tr>
 <tr>
 <td>Map</td>
@@ -93,6 +106,8 @@
 	  <%=job.getInt(Keys.FINISHED_MAPS) %></a></td>
 	<td><a href="jobtaskshistory.jsp?jobid=<%=jobid %>&jobTrackerId=<%=jobTrackerId
%>&taskType=<%=Values.MAP.name() %>&status=<%=Values.FAILED %>">
 	  <%=failedMaps %></a></td>
+	<td><a href="jobtaskshistory.jsp?jobid=<%=jobid %>&jobTrackerId=<%=jobTrackerId
%>&taskType=<%=Values.MAP.name() %>&status=<%=Values.KILLED %>">
+	  <%=killedMaps %></a></td>
 	<td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, mapStarted, 0) %></td>
 	<td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, mapFinished, mapStarted)
%></td>
 </tr>
@@ -104,6 +119,8 @@
 	  <%=job.getInt(Keys.FINISHED_REDUCES)%></a></td>
 	<td><a href="jobtaskshistory.jsp?jobid=<%=jobid %>&jobTrackerId=<%=jobTrackerId
%>&taskType=<%=Values.REDUCE.name() %>&status=<%=Values.FAILED %>">
 	  <%=failedReduces%></a></td>
+	<td><a href="jobtaskshistory.jsp?jobid=<%=jobid %>&jobTrackerId=<%=jobTrackerId
%>&taskType=<%=Values.REDUCE.name() %>&status=<%=Values.KILLED %>">
+	  <%=killedReduces%></a></td>  
 	<td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, reduceStarted, 0) %></td>
 	<td><%=StringUtils.getFormattedTimeWithDiff(dateFormat, reduceFinished, reduceStarted)
%></td>
 </tr>
@@ -111,7 +128,7 @@
 
 <br/>
  <%
-	DefaultJobHistoryParser.BadNodesFilter filter = new DefaultJobHistoryParser.BadNodesFilter();
+	DefaultJobHistoryParser.FailedOnNodesFilter filter = new DefaultJobHistoryParser.FailedOnNodesFilter();
 	String dir = System.getProperty("hadoop.log.dir") + File.separator + "history" ; 
  
 	JobHistory.parseHistory(new File(dir, jobTrackerId+"_" + jobid), filter); 
@@ -143,6 +160,40 @@
 	}
  %>
 </table>
+<br/>
+ <%
+	DefaultJobHistoryParser.KilledOnNodesFilter killedFilter = new DefaultJobHistoryParser.KilledOnNodesFilter();
+	dir = System.getProperty("hadoop.log.dir") + File.separator + "history" ; 
+ 
+	JobHistory.parseHistory(new File(dir, jobTrackerId+"_" + jobid), filter); 
+	badNodes = killedFilter.getValues(); 
+	if( badNodes.size() > 0 ) {
+ %>
+<h3>Killed tasks attempts by nodes </h3>
+<table border="1">
+<tr><td>Hostname</td><td>Killed Tasks</td></tr>
+ <%	  
+  for (Map.Entry<String, Set<String>> entry : badNodes.entrySet()) {
+    String node = entry.getKey();
+    Set<String> killedTasks = entry.getValue();
+%>
+	<tr>
+		<td><%=node %></td>
+		<td>
+<%
+		for( String t : killedTasks ) {
+%>
+		 <a href="taskdetailshistory.jsp?jobid=<%=jobid%>&jobTrackerId=<%=jobTrackerId
%>&taskid=<%=t %>"><%=t %></a>,&nbsp;
+<%		  
+		}
+%>	
+		</td>
+	</tr>
+<%	  
+     }
+	}
+ %>
+</table>
  </center>
 
-</body></html>
\ No newline at end of file
+</body></html>

Modified: lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp?view=diff&rev=555114&r1=555113&r2=555114
==============================================================================
--- lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp (original)
+++ lucene/hadoop/trunk/src/webapps/job/jobtaskshistory.jsp Tue Jul 10 16:30:41 2007
@@ -33,9 +33,12 @@
 <%
 	for( JobHistory.Task task : tasks.values() ) {
 	  if( taskType.equals(task.get(Keys.TASK_TYPE) ) ){
-	    if( taskStatus.equals(task.get(Keys.TASK_STATUS)) || taskStatus.equals("all")){
-	       printTask(jobid, jobTrackerId, task, out); 
-	    }
+            Map <String, TaskAttempt> taskAttempts = task.getTaskAttempts();
+            for (JobHistory.TaskAttempt taskAttempt : taskAttempts.values()) {
+	      if( taskStatus.equals(taskAttempt.get(Keys.TASK_STATUS)) || taskStatus.equals("all")){
+	         printTask(jobid, jobTrackerId, task, out); 
+	      }
+            }
 	  }
 	}
 %>



Mime
View raw message