hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r502716 - in /lucene/hadoop/trunk: CHANGES.txt src/java/org/apache/hadoop/mapred/JobTracker.java
Date Fri, 02 Feb 2007 20:08:54 GMT
Author: cutting
Date: Fri Feb  2 12:08:53 2007
New Revision: 502716

URL: http://svn.apache.org/viewvc?view=rev&rev=502716
Log:
HADOOP-969.  Fix a deadlock in the JobTracker.  Contributed by Owen.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=502716&r1=502715&r2=502716
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Feb  2 12:08:53 2007
@@ -135,6 +135,8 @@
     whose comparators and/or i/o types were in the job's jar.
     (Dennis Kubes via cutting)
 
+42. HADOOP-969.  Fix a deadlock in JobTracker.  (omalley via cutting)
+
 
 Release 0.10.1 - 2007-01-10
 

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java?view=diff&rev=502716&r1=502715&r2=502716
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobTracker.java Fri Feb  2 12:08:53
2007
@@ -293,41 +293,42 @@
             while (shouldRun) {
               try {
                 Thread.sleep(RETIRE_JOB_CHECK_INTERVAL);
-                 
-                synchronized (jobs) {
-                    synchronized (jobsByArrival) {
+                List<JobInProgress> retiredJobs = new ArrayList();
+                long retireBefore = System.currentTimeMillis() - 
+                                       RETIRE_JOB_INTERVAL;
+                synchronized (jobsByArrival) {
+                  for(JobInProgress job: jobsByArrival) {
+                    if (job.getStatus().getRunState() != JobStatus.RUNNING &&
+                        job.getStatus().getRunState() != JobStatus.PREP &&
+                        (job.getFinishTime()  < retireBefore)) {
+                      retiredJobs.add(job);
+                    }
+                  }
+                }
+                if (!retiredJobs.isEmpty()) {
+                  synchronized (JobTracker.this) {
+                    synchronized (jobs) {
+                      synchronized (jobsByArrival) {
                         synchronized (jobInitQueue) {
-                            for (Iterator it = jobs.keySet().iterator(); it.hasNext(); )
{
-                                String jobid = (String) it.next();
-                                JobInProgress job = (JobInProgress) jobs.get(jobid);
-
-                                if (job.getStatus().getRunState() != JobStatus.RUNNING &&
-                                    job.getStatus().getRunState() != JobStatus.PREP &&
-                                    (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis()))
{
-                                    // Ok, this call to removeTaskEntries
-                                    // is dangerous in some very very obscure
-                                    // cases; e.g. when job completed, exceeded
-                                    // RETIRE_JOB_INTERVAL time-limit and yet
-                                    // some task (taskid) wasn't complete!
-                                    removeJobTasks(job);
-                                    
-                                    it.remove();
-                                    synchronized (userToJobsMap) {
-                                        ArrayList<JobInProgress> userJobs =
-                                            userToJobsMap.get(job.getProfile().getUser());
-                                        synchronized (userJobs) {
-                                            userJobs.remove(job);
-                                        }
-                                    }
-                                    jobInitQueue.remove(job);
-                                    jobsByArrival.remove(job);
-                                    
-                                    LOG.info("Retired job with id: '" + 
-                                            job.getProfile().getJobId() + "'");
-                                }
+                          for (JobInProgress job: retiredJobs) {
+                            removeJobTasks(job);
+                            jobs.remove(job.getProfile().getJobId());
+                            jobInitQueue.remove(job);
+                            jobsByArrival.remove(job);
+                            synchronized (userToJobsMap) {
+                              ArrayList<JobInProgress> userJobs =
+                                userToJobsMap.get(job.getProfile().getUser());
+                              synchronized (userJobs) {
+                                userJobs.remove(job);
+                              }
                             }
+                            LOG.info("Retired job with id: '" + 
+                                     job.getProfile().getJobId() + "'");
+                          }
                         }
+                      }
                     }
+                  }
                 }
               } catch (InterruptedException t) {
                 shouldRun = false;
@@ -446,8 +447,8 @@
     //
 
     // All the known jobs.  (jobid->JobInProgress)
-    TreeMap jobs = new TreeMap();
-    Vector jobsByArrival = new Vector();
+    Map<String, JobInProgress> jobs = new TreeMap();
+    List<JobInProgress> jobsByArrival = new ArrayList();
 
     // (user -> list of JobInProgress)
     TreeMap<String, ArrayList<JobInProgress>> userToJobsMap = new TreeMap();



Mime
View raw message