Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 40272 invoked from network); 28 May 2009 06:37:55 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 28 May 2009 06:37:55 -0000 Received: (qmail 74447 invoked by uid 500); 28 May 2009 06:38:03 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 74402 invoked by uid 500); 28 May 2009 06:38:02 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 74372 invoked by uid 99); 28 May 2009 06:38:02 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 28 May 2009 06:38:02 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 28 May 2009 06:37:54 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 1A90023888C2; Thu, 28 May 2009 06:37:34 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r779450 - in /hadoop/core/branches/branch-0.20: ./ src/core/org/apache/hadoop/util/ src/mapred/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/mapred/ src/test/org/apache/hadoop/util/ Date: Thu, 28 May 2009 06:37:33 -0000 To: core-commits@hadoop.apache.org From: yhemanth@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090528063734.1A90023888C2@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: yhemanth Date: Thu May 28 06:37:33 2009 New Revision: 779450 URL: http://svn.apache.org/viewvc?rev=779450&view=rev Log: HADOOP-5883. Fixed tasktracker memory monitoring to account for momentary spurts in memory usage due to java's fork() model. Contributed by Hemanth Yamijala. Modified: hadoop/core/branches/branch-0.20/CHANGES.txt hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java Modified: hadoop/core/branches/branch-0.20/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/CHANGES.txt?rev=779450&r1=779449&r2=779450&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/CHANGES.txt (original) +++ hadoop/core/branches/branch-0.20/CHANGES.txt Thu May 28 06:37:33 2009 @@ -89,6 +89,10 @@ HADOOP-4626. Correct the API links in hdfs forrest doc so that they point to the same version of hadoop. (szetszwo) + HADOOP-5883. Fixed tasktracker memory monitoring to account for + momentary spurts in memory usage due to java's fork() model. + (yhemanth) + Release 0.20.0 - 2009-04-15 INCOMPATIBLE CHANGES Modified: hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java?rev=779450&r1=779449&r2=779450&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java (original) +++ hadoop/core/branches/branch-0.20/src/core/org/apache/hadoop/util/ProcfsBasedProcessTree.java Thu May 28 06:37:33 2009 @@ -52,14 +52,23 @@ private static final Pattern PROCFS_STAT_FILE_FORMAT = Pattern .compile("^([0-9-]+)\\s([^\\s]+)\\s[^\\s]\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+)\\s([0-9-]+\\s){16}([0-9]+)(\\s[0-9-]+){16}"); + // to enable testing, using this variable which can be configured + // to a test directory. + private String procfsDir; + private Integer pid = -1; private Map processTree = new HashMap(); public ProcfsBasedProcessTree(String pid) { - this.pid = getValidPID(pid); + this(pid, PROCFS); } + public ProcfsBasedProcessTree(String pid, String procfsDir) { + this.pid = getValidPID(pid); + this.procfsDir = procfsDir; + } + public void setSigKillInterval(long interval) { sleepTimeBeforeSigKill = interval; } @@ -96,13 +105,17 @@ List processList = getProcessList(); Map allProcessInfo = new HashMap(); + + // cache the processTree to get the age for processes + Map oldProcs = + new HashMap(processTree); processTree.clear(); ProcessInfo me = null; for (Integer proc : processList) { // Get information for each process ProcessInfo pInfo = new ProcessInfo(proc); - if (constructProcessInfo(pInfo) != null) { + if (constructProcessInfo(pInfo, procfsDir) != null) { allProcessInfo.put(proc, pInfo); if (proc.equals(this.pid)) { me = pInfo; // cache 'me' @@ -138,6 +151,16 @@ pInfoQueue.addAll(pInfo.getChildren()); } + // update age values. + for (Map.Entry procs : processTree.entrySet()) { + ProcessInfo oldInfo = oldProcs.get(procs.getKey()); + if (oldInfo != null) { + if (procs.getValue() != null) { + procs.getValue().updateAge(oldInfo); + } + } + } + if (LOG.isDebugEnabled()) { // Log.debug the ProcfsBasedProcessTree LOG.debug(this.toString()); @@ -197,9 +220,23 @@ * @return cumulative virtual memory used by the process-tree in bytes. */ public long getCumulativeVmem() { + // include all processes.. all processes will be older than 0. + return getCumulativeVmem(0); + } + + /** + * Get the cumulative virtual memory used by all the processes in the + * process-tree that are older than the passed in age. + * + * @param olderThanAge processes above this age are included in the + * memory addition + * @return cumulative virtual memory used by the process-tree in bytes, + * for processes older than this age. + */ + public long getCumulativeVmem(int olderThanAge) { long total = 0; for (ProcessInfo p : processTree.values()) { - if (p != null) { + if ((p != null) && (p.getAge() > olderThanAge)) { total += p.getVmem(); } } @@ -268,13 +305,13 @@ * Get the list of all processes in the system. */ private List getProcessList() { - String[] processDirs = (new File(PROCFS)).list(); + String[] processDirs = (new File(procfsDir)).list(); List processList = new ArrayList(); for (String dir : processDirs) { try { int pd = Integer.parseInt(dir); - if ((new File(PROCFS + dir)).isDirectory()) { + if ((new File(procfsDir, dir)).isDirectory()) { processList.add(Integer.valueOf(pd)); } } catch (NumberFormatException n) { @@ -292,12 +329,29 @@ * same. Returns null on failing to read from procfs, */ private ProcessInfo constructProcessInfo(ProcessInfo pinfo) { + return constructProcessInfo(pinfo, PROCFS); + } + + /** + * Construct the ProcessInfo using the process' PID and procfs rooted at the + * specified directory and return the same. It is provided mainly to assist + * testing purposes. + * + * Returns null on failing to read from procfs, + * + * @param pinfo ProcessInfo that needs to be updated + * @param procfsDir root of the proc file system + * @return updated ProcessInfo, null on errors. + */ + private ProcessInfo constructProcessInfo(ProcessInfo pinfo, + String procfsDir) { ProcessInfo ret = null; - // Read "/proc//stat" file + // Read "procfsDir//stat" file BufferedReader in = null; FileReader fReader = null; try { - fReader = new FileReader(PROCFS + pinfo.getPid() + "/stat"); + File pidDir = new File(procfsDir, String.valueOf(pinfo.getPid())); + fReader = new FileReader(new File(pidDir, "/stat")); in = new BufferedReader(fReader); } catch (FileNotFoundException f) { // The process vanished in the interim! @@ -311,7 +365,7 @@ boolean mat = m.find(); if (mat) { // Set ( name ) ( ppid ) ( pgrpId ) (session ) (vsize ) - pinfo.update(m.group(2), Integer.parseInt(m.group(3)), Integer + pinfo.updateProcessInfo(m.group(2), Integer.parseInt(m.group(3)), Integer .parseInt(m.group(4)), Integer.parseInt(m.group(5)), Long .parseLong(m.group(7))); } @@ -338,7 +392,7 @@ return ret; } - + /** * Is the process with PID pid still alive? */ @@ -391,7 +445,6 @@ } } } - /** * Returns a string printing PIDs of process present in the * ProcfsBasedProcessTree. Output format : [pid pid ..] @@ -417,10 +470,14 @@ private Integer ppid; // parent process-id private Integer sessionId; // session-id private Long vmem; // virtual memory usage + // how many times has this process been seen alive + private int age; private List children = new ArrayList(); // list of children public ProcessInfo(int pid) { this.pid = Integer.valueOf(pid); + // seeing this the first time. + this.age = 1; } public Integer getPid() { @@ -447,6 +504,10 @@ return vmem; } + public int getAge() { + return age; + } + public boolean isParent(ProcessInfo p) { if (pid.equals(p.getPpid())) { return true; @@ -454,7 +515,7 @@ return false; } - public void update(String name, Integer ppid, Integer pgrpId, + public void updateProcessInfo(String name, Integer ppid, Integer pgrpId, Integer sessionId, Long vmem) { this.name = name; this.ppid = ppid; @@ -463,6 +524,10 @@ this.vmem = vmem; } + public void updateAge(ProcessInfo oldInfo) { + this.age = oldInfo.age + 1; + } + public boolean addChild(ProcessInfo p) { return children.add(p); } Modified: hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java?rev=779450&r1=779449&r2=779450&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java (original) +++ hadoop/core/branches/branch-0.20/src/mapred/org/apache/hadoop/mapred/TaskMemoryManagerThread.java Thu May 28 06:37:33 2009 @@ -51,21 +51,34 @@ private List tasksToBeRemoved; public TaskMemoryManagerThread(TaskTracker taskTracker) { + + this(taskTracker.getTotalMemoryAllottedForTasksOnTT() * 1024 * 1024L, + taskTracker.getJobConf().getLong( + "mapred.tasktracker.taskmemorymanager.monitoring-interval", + 5000L), + taskTracker.getJobConf().getLong( + "mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill", + ProcfsBasedProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL)); + this.taskTracker = taskTracker; + } + + // mainly for test purposes. note that the tasktracker variable is + // not set here. + TaskMemoryManagerThread(long maxMemoryAllowedForAllTasks, + long monitoringInterval, + long sleepTimeBeforeSigKill) { setName(this.getClass().getName()); processTreeInfoMap = new HashMap(); tasksToBeAdded = new HashMap(); tasksToBeRemoved = new ArrayList(); - maxMemoryAllowedForAllTasks = - taskTracker.getTotalMemoryAllottedForTasksOnTT() * 1024 * 1024L; + this.maxMemoryAllowedForAllTasks = maxMemoryAllowedForAllTasks; - monitoringInterval = taskTracker.getJobConf().getLong( - "mapred.tasktracker.taskmemorymanager.monitoring-interval", 5000L); - sleepTimeBeforeSigKill = taskTracker.getJobConf().getLong( - "mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill", - ProcfsBasedProcessTree.DEFAULT_SLEEPTIME_BEFORE_SIGKILL); + this.monitoringInterval = monitoringInterval; + + this.sleepTimeBeforeSigKill = sleepTimeBeforeSigKill; } public void addTask(TaskAttemptID tid, long memLimit, String pidFile) { @@ -200,12 +213,15 @@ ptInfo.setProcessTree(pTree); // update ptInfo with proces-tree of // updated state long currentMemUsage = pTree.getCumulativeVmem(); + // as processes begin with an age 1, we want to see if there + // are processes more than 1 iteration old. + long curMemUsageOfAgedProcesses = pTree.getCumulativeVmem(1); long limit = ptInfo.getMemLimit(); LOG.info("Memory usage of ProcessTree " + pId + " :" + currentMemUsage + "bytes. Limit : " + limit + "bytes"); - if (limit != JobConf.DISABLED_MEMORY_LIMIT - && currentMemUsage > limit) { + if (isProcessTreeOverLimit(tid.toString(), currentMemUsage, + curMemUsageOfAgedProcesses, limit)) { // Task (the root process) is still alive and overflowing memory. // Clean up. String msg = @@ -254,6 +270,65 @@ } } + /** + * Check whether a task's process tree's current memory usage is over limit. + * + * When a java process exec's a program, it could momentarily account for + * double the size of it's memory, because the JVM does a fork()+exec() + * which at fork time creates a copy of the parent's memory. If the + * monitoring thread detects the memory used by the task tree at the same + * instance, it could assume it is over limit and kill the tree, for no + * fault of the process itself. + * + * We counter this problem by employing a heuristic check: + * - if a process tree exceeds the memory limit by more than twice, + * it is killed immediately + * - if a process tree has processes older than the monitoring interval + * exceeding the memory limit by even 1 time, it is killed. Else it is given + * the benefit of doubt to lie around for one more iteration. + * + * @param tId Task Id for the task tree + * @param currentMemUsage Memory usage of a task tree + * @param curMemUsageOfAgedProcesses Memory usage of processes older than + * an iteration in a task tree + * @param limit The limit specified for the task + * @return true if the memory usage is more than twice the specified limit, + * or if processes in the tree, older than this thread's + * monitoring interval, exceed the memory limit. False, + * otherwise. + */ + boolean isProcessTreeOverLimit(String tId, + long currentMemUsage, + long curMemUsageOfAgedProcesses, + long limit) { + boolean isOverLimit = false; + + if (currentMemUsage > (2*limit)) { + LOG.warn("Process tree for task: " + tId + " running over twice " + + "the configured limit. Limit=" + limit + + ", current usage = " + currentMemUsage); + isOverLimit = true; + } else if (curMemUsageOfAgedProcesses > limit) { + LOG.warn("Process tree for task: " + tId + " has processes older than 1 " + + "iteration running over the configured limit. Limit=" + limit + + ", current usage = " + curMemUsageOfAgedProcesses); + isOverLimit = true; + } + + return isOverLimit; + } + + // method provided just for easy testing purposes + boolean isProcessTreeOverLimit(ProcfsBasedProcessTree pTree, + String tId, long limit) { + long currentMemUsage = pTree.getCumulativeVmem(); + // as processes begin with an age 1, we want to see if there are processes + // more than 1 iteration old. + long curMemUsageOfAgedProcesses = pTree.getCumulativeVmem(1); + return isProcessTreeOverLimit(tId, currentMemUsage, + curMemUsageOfAgedProcesses, limit); + } + private void killTasksWithLeastProgress(long memoryStillInUsage) { List tasksToKill = new ArrayList(); Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java?rev=779450&r1=779449&r2=779450&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java (original) +++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/mapred/TestTaskTrackerMemoryManager.java Thu May 28 06:37:33 2009 @@ -18,6 +18,7 @@ package org.apache.hadoop.mapred; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -27,12 +28,15 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.examples.SleepJob; import org.apache.hadoop.util.MemoryCalculatorPlugin; import org.apache.hadoop.util.ProcfsBasedProcessTree; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.util.TestProcfsBasedProcessTree; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.fs.FileSystem; @@ -45,6 +49,9 @@ private static final Log LOG = LogFactory.getLog(TestTaskTrackerMemoryManager.class); + private static String TEST_ROOT_DIR = new Path(System.getProperty( + "test.build.data", "/tmp")).toString().replace(' ', '+'); + private MiniMRCluster miniMRCluster; private String taskOverLimitPatternString = @@ -350,4 +357,91 @@ // Test succeeded, kill the job. job.killJob(); } + + /** + * Test to verify the check for whether a process tree is over limit or not. + * @throws IOException if there was a problem setting up the + * fake procfs directories or files. + */ + public void testProcessTreeLimits() throws IOException { + + // set up a dummy proc file system + File procfsRootDir = new File(TEST_ROOT_DIR, "proc"); + String[] pids = { "100", "200", "300", "400", "500", "600", "700" }; + try { + TestProcfsBasedProcessTree.setupProcfsRootDir(procfsRootDir); + + // create pid dirs. + TestProcfsBasedProcessTree.setupPidDirs(procfsRootDir, pids); + + // create process infos. + TestProcfsBasedProcessTree.ProcessStatInfo[] procs = + new TestProcfsBasedProcessTree.ProcessStatInfo[7]; + + // assume pids 100, 500 are in 1 tree + // 200,300,400 are in another + // 600,700 are in a third + procs[0] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"100", "proc1", "1", "100", "100", "100000"}); + procs[1] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"200", "proc2", "1", "200", "200", "200000"}); + procs[2] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"300", "proc3", "200", "200", "200", "300000"}); + procs[3] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"400", "proc4", "200", "200", "200", "400000"}); + procs[4] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"500", "proc5", "100", "100", "100", "1500000"}); + procs[5] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"600", "proc6", "1", "600", "600", "100000"}); + procs[6] = new TestProcfsBasedProcessTree.ProcessStatInfo( + new String[] {"700", "proc7", "600", "600", "600", "100000"}); + // write stat files. + TestProcfsBasedProcessTree.writeStatFiles(procfsRootDir, pids, procs); + + // vmem limit + long limit = 700000; + + // Create TaskMemoryMonitorThread + TaskMemoryManagerThread test = new TaskMemoryManagerThread(1000000L, + 5000L, + 5000L); + // create process trees + // tree rooted at 100 is over limit immediately, as it is + // twice over the mem limit. + ProcfsBasedProcessTree pTree = new ProcfsBasedProcessTree( + "100", + procfsRootDir.getAbsolutePath()); + pTree.getProcessTree(); + assertTrue("tree rooted at 100 should be over limit " + + "after first iteration.", + test.isProcessTreeOverLimit(pTree, "dummyId", limit)); + + // the tree rooted at 200 is initially below limit. + pTree = new ProcfsBasedProcessTree("200", + procfsRootDir.getAbsolutePath()); + pTree.getProcessTree(); + assertFalse("tree rooted at 200 shouldn't be over limit " + + "after one iteration.", + test.isProcessTreeOverLimit(pTree, "dummyId", limit)); + // second iteration - now the tree has been over limit twice, + // hence it should be declared over limit. + pTree.getProcessTree(); + assertTrue("tree rooted at 200 should be over limit after 2 iterations", + test.isProcessTreeOverLimit(pTree, "dummyId", limit)); + + // the tree rooted at 600 is never over limit. + pTree = new ProcfsBasedProcessTree("600", + procfsRootDir.getAbsolutePath()); + pTree.getProcessTree(); + assertFalse("tree rooted at 600 should never be over limit.", + test.isProcessTreeOverLimit(pTree, "dummyId", limit)); + + // another iteration does not make any difference. + pTree.getProcessTree(); + assertFalse("tree rooted at 600 should never be over limit.", + test.isProcessTreeOverLimit(pTree, "dummyId", limit)); + } finally { + FileUtil.fullyDelete(procfsRootDir); + } + } } Modified: hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java?rev=779450&r1=779449&r2=779450&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java (original) +++ hadoop/core/branches/branch-0.20/src/test/org/apache/hadoop/util/TestProcfsBasedProcessTree.java Thu May 28 06:37:33 2009 @@ -18,6 +18,7 @@ package org.apache.hadoop.util; +import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; @@ -25,6 +26,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Shell.ExitCodeException; import org.apache.hadoop.util.Shell.ShellCommandExecutor; @@ -34,6 +37,9 @@ private static final Log LOG = LogFactory .getLog(TestProcfsBasedProcessTree.class); + private static String TEST_ROOT_DIR = new Path(System.getProperty( + "test.build.data", "/tmp")).toString().replace(' ', '+'); + private ShellCommandExecutor shexec = null; private String pidFile; private String shellScript; @@ -155,4 +161,234 @@ .getCumulativeVmem() == 0); assertTrue(p.toString().equals("[ ]")); } + + public static class ProcessStatInfo { + // sample stat in a single line : 3910 (gpm) S 1 3910 3910 0 -1 4194624 + // 83 0 0 0 0 0 0 0 16 0 1 0 7852 2408448 88 4294967295 134512640 + // 134590050 3220521392 3220520036 10975138 0 0 4096 134234626 + // 4294967295 0 0 17 1 0 0 + String pid; + String name; + String ppid; + String pgrpId; + String session; + String vmem; + + public ProcessStatInfo(String[] statEntries) { + pid = statEntries[0]; + name = statEntries[1]; + ppid = statEntries[2]; + pgrpId = statEntries[3]; + session = statEntries[4]; + vmem = statEntries[5]; + } + + // construct a line that mimics the procfs stat file. + // all unused numerical entries are set to 0. + public String getStatLine() { + return String.format("%s (%s) S %s %s %s 0 0 0" + + " 0 0 0 0 0 0 0 0 0 0 0 0 0 %s 0 0 0" + + " 0 0 0 0 0 0 0 0" + + " 0 0 0 0 0", + pid, name, ppid, pgrpId, session, vmem); + } + } + + /** + * A basic test that creates a few process directories and writes + * stat files. Verifies that the virtual memory is correctly + * computed. + * @throws IOException if there was a problem setting up the + * fake procfs directories or files. + */ + public void testVirtualMemoryForProcessTree() throws IOException { + + // test processes + String[] pids = { "100", "200", "300", "400" }; + // create the fake procfs root directory. + File procfsRootDir = new File(TEST_ROOT_DIR, "proc"); + + try { + setupProcfsRootDir(procfsRootDir); + setupPidDirs(procfsRootDir, pids); + + // create stat objects. + // assuming processes 100, 200, 300 are in tree and 400 is not. + ProcessStatInfo[] procInfos = new ProcessStatInfo[4]; + procInfos[0] = new ProcessStatInfo(new String[] + {"100", "proc1", "1", "100", "100", "100000"}); + procInfos[1] = new ProcessStatInfo(new String[] + {"200", "proc2", "100", "100", "100", "200000"}); + procInfos[2] = new ProcessStatInfo(new String[] + {"300", "proc3", "200", "100", "100", "300000"}); + procInfos[3] = new ProcessStatInfo(new String[] + {"400", "proc4", "1", "400", "400", "400000"}); + + writeStatFiles(procfsRootDir, pids, procInfos); + + // crank up the process tree class. + ProcfsBasedProcessTree processTree = + new ProcfsBasedProcessTree("100", procfsRootDir.getAbsolutePath()); + // build the process tree. + processTree.getProcessTree(); + + // verify cumulative memory + assertEquals("Cumulative memory does not match", + Long.parseLong("600000"), processTree.getCumulativeVmem()); + } finally { + FileUtil.fullyDelete(procfsRootDir); + } + } + + /** + * Tests that cumulative memory is computed only for + * processes older than a given age. + * @throws IOException if there was a problem setting up the + * fake procfs directories or files. + */ + public void testVMemForOlderProcesses() throws IOException { + // initial list of processes + String[] pids = { "100", "200", "300", "400" }; + // create the fake procfs root directory. + File procfsRootDir = new File(TEST_ROOT_DIR, "proc"); + + try { + setupProcfsRootDir(procfsRootDir); + setupPidDirs(procfsRootDir, pids); + + // create stat objects. + // assuming 100, 200 and 400 are in tree, 300 is not. + ProcessStatInfo[] procInfos = new ProcessStatInfo[4]; + procInfos[0] = new ProcessStatInfo(new String[] + {"100", "proc1", "1", "100", "100", "100000"}); + procInfos[1] = new ProcessStatInfo(new String[] + {"200", "proc2", "100", "100", "100", "200000"}); + procInfos[2] = new ProcessStatInfo(new String[] + {"300", "proc3", "1", "300", "300", "300000"}); + procInfos[3] = new ProcessStatInfo(new String[] + {"400", "proc4", "100", "100", "100", "400000"}); + + writeStatFiles(procfsRootDir, pids, procInfos); + + // crank up the process tree class. + ProcfsBasedProcessTree processTree = + new ProcfsBasedProcessTree("100", procfsRootDir.getAbsolutePath()); + // build the process tree. + processTree.getProcessTree(); + + // verify cumulative memory + assertEquals("Cumulative memory does not match", + Long.parseLong("700000"), processTree.getCumulativeVmem()); + + // write one more process as child of 100. + String[] newPids = { "500" }; + setupPidDirs(procfsRootDir, newPids); + + ProcessStatInfo[] newProcInfos = new ProcessStatInfo[1]; + newProcInfos[0] = new ProcessStatInfo(new String[] + {"500", "proc5", "100", "100", "100", "500000"}); + writeStatFiles(procfsRootDir, newPids, newProcInfos); + + // check vmem includes the new process. + processTree.getProcessTree(); + assertEquals("Cumulative memory does not include new process", + Long.parseLong("1200000"), processTree.getCumulativeVmem()); + + // however processes older than 1 iteration will retain the older value + assertEquals("Cumulative memory shouldn't have included new process", + Long.parseLong("700000"), processTree.getCumulativeVmem(1)); + + // one more process + newPids = new String[]{ "600" }; + setupPidDirs(procfsRootDir, newPids); + + newProcInfos = new ProcessStatInfo[1]; + newProcInfos[0] = new ProcessStatInfo(new String[] + {"600", "proc6", "100", "100", "100", "600000"}); + writeStatFiles(procfsRootDir, newPids, newProcInfos); + + // refresh process tree + processTree.getProcessTree(); + + // processes older than 2 iterations should be same as before. + assertEquals("Cumulative memory shouldn't have included new processes", + Long.parseLong("700000"), processTree.getCumulativeVmem(2)); + + // processes older than 1 iteration should not include new process, + // but include process 500 + assertEquals("Cumulative memory shouldn't have included new processes", + Long.parseLong("1200000"), processTree.getCumulativeVmem(1)); + + // no processes older than 3 iterations, this should be 0 + assertEquals("Getting non-zero vmem for processes older than 3 iterations", + 0L, processTree.getCumulativeVmem(3)); + } finally { + FileUtil.fullyDelete(procfsRootDir); + } + } + + /** + * Create a directory to mimic the procfs file system's root. + * @param procfsRootDir root directory to create. + * @throws IOException if could not delete the procfs root directory + */ + public static void setupProcfsRootDir(File procfsRootDir) + throws IOException { + // cleanup any existing process root dir. + if (procfsRootDir.exists()) { + assertTrue(FileUtil.fullyDelete(procfsRootDir)); + } + + // create afresh + assertTrue(procfsRootDir.mkdirs()); + } + + /** + * Create PID directories under the specified procfs root directory + * @param procfsRootDir root directory of procfs file system + * @param pids the PID directories to create. + * @throws IOException If PID dirs could not be created + */ + public static void setupPidDirs(File procfsRootDir, String[] pids) + throws IOException { + for (String pid : pids) { + File pidDir = new File(procfsRootDir, pid); + pidDir.mkdir(); + if (!pidDir.exists()) { + throw new IOException ("couldn't make process directory under " + + "fake procfs"); + } else { + LOG.info("created pid dir"); + } + } + } + + /** + * Write stat files under the specified pid directories with data + * setup in the corresponding ProcessStatInfo objects + * @param procfsRootDir root directory of procfs file system + * @param pids the PID directories under which to create the stat file + * @param procs corresponding ProcessStatInfo objects whose data should be + * written to the stat files. + * @throws IOException if stat files could not be written + */ + public static void writeStatFiles(File procfsRootDir, String[] pids, + ProcessStatInfo[] procs) throws IOException { + for (int i=0; i