hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject svn commit: r761637 - in /hadoop/core/branches/branch-0.20: ./ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/
Date Fri, 03 Apr 2009 12:01:54 GMT
Author: ddas
Date: Fri Apr  3 12:01:54 2009
New Revision: 761637

URL: http://svn.apache.org/viewvc?rev=761637&view=rev
Log:
Merge -r 761631:761632 from trunk onto 0.20 branch. Fixes HADOOP-5337.

Added:
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerSafeMode.java
      - copied unchanged from r761632, hadoop/core/trunk/src/test/org/apache/hadoop/mapred/TestJobTrackerSafeMode.java
Modified:
    hadoop/core/branches/branch-0.20/   (props changed)
    hadoop/core/branches/branch-0.20/CHANGES.txt   (contents, props changed)
    hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobTracker.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/MiniMRCluster.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithLostTracker.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestLostTracker.java

Propchange: hadoop/core/branches/branch-0.20/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Apr  3 12:01:54 2009
@@ -1,2 +1,2 @@
 /hadoop/core/branches/branch-0.19:713112
-/hadoop/core/trunk:727001,727117,727191,727212,727217,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,736426,738328,738697,740077,740157,741703,741762,743745,743816,743892,744894,745180,746010,746206,746227,746233,746274,746338,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755960,755986,755998,756352,757448,757624,757849,758156,759398,759932,760502,760783,761046,761482
+/hadoop/core/trunk:727001,727117,727191,727212,727217,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,736426,738328,738697,740077,740157,741703,741762,743745,743816,743892,744894,745180,746010,746206,746227,746233,746274,746338,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755960,755986,755998,756352,757448,757624,757849,758156,759398,759932,760502,760783,761046,761482,761632

Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=761637&r1=761636&r2=761637&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Fri Apr  3 12:01:54 2009
@@ -836,6 +836,10 @@
 
     HADOOP-5605. All the replicas incorrectly got marked as corrupt. (hairong)
 
+    HADOOP-5337. JobTracker, upon restart, now waits for the TaskTrackers to
+    join back before scheduling new tasks. This fixes race conditions associated
+    with greedy scheduling as was the case earlier. (Amar Kamat via ddas) 
+
 Release 0.19.2 - Unreleased
 
   BUG FIXES

Propchange: hadoop/core/branches/branch-0.20/CHANGES.txt
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Fri Apr  3 12:01:54 2009
@@ -1,3 +1,3 @@
 /hadoop/core/branches/branch-0.18/CHANGES.txt:727226
 /hadoop/core/branches/branch-0.19/CHANGES.txt:713112
-/hadoop/core/trunk/CHANGES.txt:727001,727117,727191,727212,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,735082,736426,738602,738697,739416,740077,740157,741703,741762,743296,743745,743816,743892,744894,745180,745268,746010,746193,746206,746227,746233,746274,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752514,752555,752590,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755986,755998,756352,757448,757624,757849,758156,759398,759932,760502,760783,761046,761482
+/hadoop/core/trunk/CHANGES.txt:727001,727117,727191,727212,727228,727255,727869,728187,729052,729987,732385,732572,732613,732777,732838,732869,733887,734870,734916,735082,736426,738602,738697,739416,740077,740157,741703,741762,743296,743745,743816,743892,744894,745180,745268,746010,746193,746206,746227,746233,746274,746902-746903,746925,746944,746968,746970,747279,747289,747802,748084,748090,748783,749262,749318,749863,750533,752073,752514,752555,752590,752609,752834,752836,752913,752932,753112-753113,753346,754645,754847,754927,755035,755226,755348,755370,755418,755426,755790,755905,755938,755986,755998,756352,757448,757624,757849,758156,759398,759932,760502,760783,761046,761482,761632

Modified: hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobTracker.java?rev=761637&r1=761636&r2=761637&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobTracker.java (original)
+++ hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/JobTracker.java Fri
Apr  3 12:01:54 2009
@@ -672,6 +672,9 @@
     Set<JobID> jobsToRecover; // set of jobs to be recovered
     
     private int totalEventsRecovered = 0;
+
+    Set<String> recoveredTrackers = 
+      Collections.synchronizedSet(new HashSet<String>());
     
     /** A custom listener that replays the events in the order in which the 
      * events (task attempts) occurred. 
@@ -848,6 +851,18 @@
       return jobsToRecover.size() != 0;
     }
 
+    public boolean shouldSchedule() {
+      return recoveredTrackers.isEmpty();
+    }
+
+    private void markTracker(String trackerName) {
+      recoveredTrackers.add(trackerName);
+    }
+
+    void unMarkTracker(String trackerName) {
+      recoveredTrackers.remove(trackerName);
+    }
+
     Set<JobID> getJobsToRecover() {
       return jobsToRecover;
     }
@@ -984,6 +999,7 @@
       // IV. Register a new tracker
       boolean isTrackerRegistered = getTaskTracker(trackerName) != null;
       if (!isTrackerRegistered) {
+        markTracker(trackerName); // add the tracker to recovery-manager
         addNewTracker(ttStatus);
       }
       
@@ -2321,6 +2337,8 @@
         // started JobTracker
         if (hasRestarted()) {
           addRestartInfo = true;
+          // inform the recovery manager about this tracker joining back
+          recoveryManager.unMarkTracker(trackerName);
         } else {
           // Jobtracker might have restarted but no recovery is needed
           // otherwise this code should not be reached
@@ -2362,7 +2380,7 @@
     List<TaskTrackerAction> actions = new ArrayList<TaskTrackerAction>();
       
     // Check for new tasks to be executed on the tasktracker
-    if (acceptNewTasks && !isBlacklisted) {
+    if (recoveryManager.shouldSchedule() && acceptNewTasks && !isBlacklisted)
{
       TaskTrackerStatus taskTrackerStatus = getTaskTracker(trackerName);
       if (taskTrackerStatus == null) {
         LOG.warn("Unknown task tracker polling; ignoring: " + trackerName);
@@ -3306,6 +3324,9 @@
       trackerToJobsToCleanup.remove(trackerName);
     }
     
+    // Inform the recovery manager
+    recoveryManager.unMarkTracker(trackerName);
+    
     Set<TaskAttemptID> lostTasks = trackerToTaskMap.get(trackerName);
     trackerToTaskMap.remove(trackerName);
 

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/MiniMRCluster.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/MiniMRCluster.java?rev=761637&r1=761636&r2=761637&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/MiniMRCluster.java
(original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/MiniMRCluster.java
Fri Apr  3 12:01:54 2009
@@ -438,7 +438,7 @@
     this.jobTrackerPort = jobTrackerPort;
     this.taskTrackerPort = taskTrackerPort;
     this.jobTrackerInfoPort = 0;
-    this.numTaskTrackers = numTaskTrackers;
+    this.numTaskTrackers = 0;
     this.namenode = namenode;
     this.ugi = ugi;
     this.conf = conf; // this is the conf the mr starts with
@@ -448,27 +448,18 @@
 
     // Create the TaskTrackers
     for (int idx = 0; idx < numTaskTrackers; idx++) {
+      String rack = null;
+      String host = null;
       if (racks != null) {
-        StaticMapping.addNodeToRack(hosts[idx],racks[idx]);
+        rack = racks[idx];
       }
       if (hosts != null) {
-        NetUtils.addStaticResolution(hosts[idx], "localhost");
+        host = hosts[idx];
       }
-      TaskTrackerRunner taskTracker;
-      taskTracker = new TaskTrackerRunner(idx, numDir, 
-          hosts == null ? null : hosts[idx], conf);
       
-      Thread taskTrackerThread = new Thread(taskTracker);
-      taskTrackerList.add(taskTracker);
-      taskTrackerThreadList.add(taskTrackerThread);
+      startTaskTracker(host, rack, idx, numDir);
     }
 
-    // Start the MiniMRCluster
-        
-    for (Thread taskTrackerThread : taskTrackerThreadList){
-      taskTrackerThread.start();
-    }
-    
     this.job = createJobConf(conf);
     waitUntilIdle();
   }
@@ -598,20 +589,44 @@
    * Kill the tasktracker.
    */
   public void stopTaskTracker(int id) {
-    taskTrackerList.get(id).shutdown();
+    TaskTrackerRunner tracker = taskTrackerList.remove(id);
+    tracker.shutdown();
 
-    taskTrackerThreadList.get(id).interrupt();
+    Thread thread = taskTrackerThreadList.remove(id);
+    thread.interrupt();
     
     try {
-      taskTrackerThreadList.get(id).join();
+      thread.join();
       // This will break the wait until idle loop
-      taskTrackerList.get(id).isDead = true;
+      tracker.isDead = true;
+      --numTaskTrackers;
     } catch (InterruptedException ex) {
       LOG.error("Problem waiting for task tracker to finish", ex);
     }
   }
   
   /**
+   * Start the tasktracker.
+   */
+  public void startTaskTracker(String host, String rack, int idx, int numDir) 
+  throws IOException {
+    if (rack != null) {
+      StaticMapping.addNodeToRack(host, rack);
+    }
+    if (host != null) {
+      NetUtils.addStaticResolution(host, "localhost");
+    }
+    TaskTrackerRunner taskTracker;
+    taskTracker = new TaskTrackerRunner(idx, numDir, host, conf);
+    
+    Thread taskTrackerThread = new Thread(taskTracker);
+    taskTrackerList.add(taskTracker);
+    taskTrackerThreadList.add(taskTrackerThread);
+    taskTrackerThread.start();
+    ++numTaskTrackers;
+  }
+  
+  /**
    * Get the tasktrackerID in MiniMRCluster with given trackerName.
    */
   int getTaskTrackerID(String trackerName) {

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithLostTracker.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithLostTracker.java?rev=761637&r1=761636&r2=761637&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithLostTracker.java
(original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestJobTrackerRestartWithLostTracker.java
Fri Apr  3 12:01:54 2009
@@ -105,10 +105,9 @@
     UtilsForTests.waitTillDone(jobClient);
 
     // Check if the tasks on the lost tracker got re-executed
-    assertTrue("Tracker killed while the jobtracker was down did not get lost "
-                + "upon restart", 
-                jobClient.getClusterStatus().getTaskTrackers() 
-                < mr.getNumTaskTrackers());
+    assertEquals("Tracker killed while the jobtracker was down did not get lost "
+                 + "upon restart", 
+                 jobClient.getClusterStatus().getTaskTrackers(), 1);
 
     // validate the history file
     TestJobHistory.validateJobHistoryFileFormat(id, job, "SUCCESS", true);

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestLostTracker.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestLostTracker.java?rev=761637&r1=761636&r2=761637&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestLostTracker.java
(original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestLostTracker.java
Fri Apr  3 12:01:54 2009
@@ -86,8 +86,7 @@
     UtilsForTests.waitTillDone(jobClient);
 
     // Check if the tasks on the lost tracker got killed and re-executed
-    assertTrue(jobClient.getClusterStatus().getTaskTrackers() 
-                < mr.getNumTaskTrackers());
+    assertEquals(jobClient.getClusterStatus().getTaskTrackers(), 1);
     assertEquals(JobStatus.SUCCEEDED, rJob.getJobState());
     TaskInProgress tip = mr.getJobTrackerRunner().getJobTracker().
                          getTip(taskid.getTaskID());



Mime
View raw message