hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yhema...@apache.org
Subject svn commit: r755591 - in /hadoop/core/branches/branch-0.20: ./ src/core/org/apache/hadoop/util/ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/util/
Date Wed, 18 Mar 2009 13:22:54 GMT
Author: yhemanth
Date: Wed Mar 18 13:22:53 2009
New Revision: 755591

URL: http://svn.apache.org/viewvc?rev=755591&view=rev
Log:
HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread that comes when monitored
processes disappear when the thread is running. Contributed by Vinod Kumar Vavilapalli.

Modified:
    hadoop/core/branches/branch-0.20/CHANGES.txt
    hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
    hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
    hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java

Modified: hadoop/core/branches/branch-0.20/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=755591&r1=755590&r2=755591&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/CHANGES.txt (original)
+++ hadoop/core/branches/branch-0.20/CHANGES.txt Wed Mar 18 13:22:53 2009
@@ -751,6 +751,10 @@
     HADOOP-5514. Fix JobTracker metrics and add metrics for wating, failed
     tasks. (cdouglas)
 
+    HADOOP-5516. Fix NullPointerException in TaskMemoryManagerThread that comes when
+    monitored processes disappear when the thread is running.
+    (Vinod Kumar Vavilapalli via yhemanth)
+
 Release 0.19.2 - Unreleased
 
   BUG FIXES

Modified: hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java?rev=755591&r1=755590&r2=755591&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
(original)
+++ hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java
Wed Mar 18 13:22:53 2009
@@ -85,7 +85,8 @@
   }
 
   /**
-   * Get the process-tree with latest state.
+   * Get the process-tree with latest state. If the root-process is not alive,
+   * an empty tree will be returned.
    * 
    * @return the process-tree with latest state.
    */
@@ -101,14 +102,19 @@
       for (Integer proc : processList) {
         // Get information for each process
         ProcessInfo pInfo = new ProcessInfo(proc);
-        constructProcessInfo(pInfo);
-        allProcessInfo.put(proc, pInfo);
-        if (proc.equals(this.pid)) {
-          me = pInfo; // cache 'me'
-          processTree.put(proc, pInfo);
+        if (constructProcessInfo(pInfo) != null) {
+          allProcessInfo.put(proc, pInfo);
+          if (proc.equals(this.pid)) {
+            me = pInfo; // cache 'me'
+            processTree.put(proc, pInfo);
+          }
         }
       }
 
+      if (me == null) {
+        return this; 
+      }
+
       // Add each process to its parent.
       for (Map.Entry<Integer, ProcessInfo> entry : allProcessInfo.entrySet()) {
         Integer pID = entry.getKey();

Modified: hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java?rev=755591&r1=755590&r2=755591&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
(original)
+++ hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java
Wed Mar 18 13:22:53 2009
@@ -30,6 +30,7 @@
 import org.apache.hadoop.mapred.TaskTracker;
 import org.apache.hadoop.mapred.TaskTracker.TaskInProgress;
 import org.apache.hadoop.util.ProcfsBasedProcessTree;
+import org.apache.hadoop.util.StringUtils;
 
 /**
  * Manages memory usage of tasks running under this TT. Kills any task-trees
@@ -163,76 +164,84 @@
       // Now, check memory usage and kill any overflowing tasks
       for (Iterator<Map.Entry<TaskAttemptID, ProcessTreeInfo>> it = processTreeInfoMap
           .entrySet().iterator(); it.hasNext();) {
-
         Map.Entry<TaskAttemptID, ProcessTreeInfo> entry = it.next();
         TaskAttemptID tid = entry.getKey();
         ProcessTreeInfo ptInfo = entry.getValue();
-        String pId = ptInfo.getPID();
+        try {
+          String pId = ptInfo.getPID();
 
-        // Initialize any uninitialized processTrees
-        if (pId == null) {
-          // get pid from pid-file
-          pId = getPid(ptInfo.pidFile); 
-          if (pId != null) {
-            // PID will be null, either if the pid file is yet to be created
-            // or if the tip is finished and we removed pidFile, but the TIP
-            // itself is still retained in runningTasks till successful
-            // transmission to JT
-
-            // create process tree object
-            ProcfsBasedProcessTree pt = new ProcfsBasedProcessTree(pId);
-            LOG.debug("Tracking ProcessTree " + pId + " for the first time");
-
-            ptInfo.setPid(pId);
-            ptInfo.setProcessTree(pt);
-            processTreeInfoMap.put(tid, ptInfo);
+          // Initialize any uninitialized processTrees
+          if (pId == null) {
+            // get pid from pid-file
+            pId = getPid(ptInfo.pidFile);
+            if (pId != null) {
+              // PID will be null, either if the pid file is yet to be created
+              // or if the tip is finished and we removed pidFile, but the TIP
+              // itself is still retained in runningTasks till successful
+              // transmission to JT
+
+              // create process tree object
+              ProcfsBasedProcessTree pt = new ProcfsBasedProcessTree(pId);
+              LOG.debug("Tracking ProcessTree " + pId + " for the first time");
+
+              ptInfo.setPid(pId);
+              ptInfo.setProcessTree(pt);
+            }
           }
-        }
-        // End of initializing any uninitialized processTrees
+          // End of initializing any uninitialized processTrees
 
-        if (pId == null) {
-          continue; // processTree cannot be tracked
-        }
+          if (pId == null) {
+            continue; // processTree cannot be tracked
+          }
 
-        LOG.debug("Constructing ProcessTree for : PID = " + pId + " TID = "
-            + tid);
-        ProcfsBasedProcessTree pTree = ptInfo.getProcessTree();
-        pTree = pTree.getProcessTree(); // get the updated process-tree
-        ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of
-                                      // updated state
-        long currentMemUsage = pTree.getCumulativeVmem();
-        long limit = ptInfo.getMemLimit();
-        LOG.info("Memory usage of ProcessTree " + pId + " :" + currentMemUsage
-            + "bytes. Limit : " + limit + "bytes");
-
-        if (limit > taskTracker.getLimitMaxVMemPerTask()) {
-          // TODO: With monitoring enabled and no scheduling based on
-          // memory,users can seriously hijack the system by specifying memory
-          // requirements well above the cluster wide limit. Ideally these jobs
-          // should have been rejected by JT/scheduler. Because we can't do
-          // that, in the minimum we should fail the tasks and hence the job.
-          LOG.warn("Task " + tid
-              + " 's maxVmemPerTask is greater than TT's limitMaxVmPerTask");
-        }
+          LOG.debug("Constructing ProcessTree for : PID = " + pId + " TID = "
+              + tid);
+          ProcfsBasedProcessTree pTree = ptInfo.getProcessTree();
+          pTree = pTree.getProcessTree(); // get the updated process-tree
+          ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of
+          // updated state
+          long currentMemUsage = pTree.getCumulativeVmem();
+          long limit = ptInfo.getMemLimit();
+          LOG.info("Memory usage of ProcessTree " + pId + " :"
+              + currentMemUsage + "bytes. Limit : " + limit + "bytes");
+
+          if (limit > taskTracker.getLimitMaxVMemPerTask()) {
+            // TODO: With monitoring enabled and no scheduling based on
+            // memory,users can seriously hijack the system by specifying memory
+            // requirements well above the cluster wide limit. Ideally these
+            // jobs
+            // should have been rejected by JT/scheduler. Because we can't do
+            // that, in the minimum we should fail the tasks and hence the job.
+            LOG.warn("Task " + tid
+                + " 's maxVmemPerTask is greater than TT's limitMaxVmPerTask");
+          }
 
-        if (limit != JobConf.DISABLED_MEMORY_LIMIT
-            && currentMemUsage > limit) {
-          // Task (the root process) is still alive and overflowing memory.
-          // Clean up.
-          String msg = "TaskTree [pid=" + pId + ",tipID=" + tid
-              + "] is running beyond memory-limits. Current usage : "
-              + currentMemUsage + "bytes. Limit : " + limit + "bytes. Killing task.";
-          LOG.warn(msg);
-          taskTracker.cleanUpOverMemoryTask(tid, true, msg);
-
-          // Now destroy the ProcessTree, remove it from monitoring map.
-          pTree.destroy();
-          it.remove();
-          LOG.info("Removed ProcessTree with root " + pId);
-        } else {
-          // Accounting the total memory in usage for all tasks that are still
-          // alive and within limits.
-          memoryStillInUsage += currentMemUsage;
+          if (limit != JobConf.DISABLED_MEMORY_LIMIT
+              && currentMemUsage > limit) {
+            // Task (the root process) is still alive and overflowing memory.
+            // Clean up.
+            String msg =
+                "TaskTree [pid=" + pId + ",tipID=" + tid
+                    + "] is running beyond memory-limits. Current usage : "
+                    + currentMemUsage + "bytes. Limit : " + limit
+                    + "bytes. Killing task.";
+            LOG.warn(msg);
+            taskTracker.cleanUpOverMemoryTask(tid, true, msg);
+
+            // Now destroy the ProcessTree, remove it from monitoring map.
+            pTree.destroy();
+            it.remove();
+            LOG.info("Removed ProcessTree with root " + pId);
+          } else {
+            // Accounting the total memory in usage for all tasks that are still
+            // alive and within limits.
+            memoryStillInUsage += currentMemUsage;
+          }
+        } catch (Exception e) {
+          // Log the exception and proceed to the next task.
+          LOG.warn("Uncaught exception in TaskMemoryManager "
+              + "while managing memory of " + tid + " : "
+              + StringUtils.stringifyException(e));
         }
       }
 

Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java?rev=755591&r1=755590&r2=755591&view=diff
==============================================================================
--- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
(original)
+++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java
Wed Mar 18 13:22:53 2009
@@ -137,7 +137,8 @@
       LOG.info("Interrupted.");
     }
 
-    assertEquals(false, p.isAlive()); // processtree should should be gone
+    assertFalse("ProcessTree must have been gone", p.isAlive());
+
     // Not able to join thread sometimes when forking with large N.
     try {
       t.join(2000);
@@ -145,5 +146,13 @@
     } catch (InterruptedException ie) {
       LOG.info("Interrupted while joining RogueTaskThread.");
     }
+
+    // ProcessTree is gone now. Any further calls should be sane.
+    p = p.getProcessTree();
+    assertFalse("ProcessTree must have been gone", p.isAlive());
+    assertTrue("Cumulative vmem for the gone-process is "
+        + p.getCumulativeVmem() + " . It should be zero.", p
+        .getCumulativeVmem() == 0);
+    assertTrue(p.toString().equals("[ ]"));
   }
 }



Mime
View raw message