Return-Path: X-Original-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BB9F83BFD for ; Thu, 28 Apr 2011 04:15:34 +0000 (UTC) Received: (qmail 14955 invoked by uid 500); 28 Apr 2011 04:15:34 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 14844 invoked by uid 500); 28 Apr 2011 04:15:33 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 14831 invoked by uid 99); 28 Apr 2011 04:15:30 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 28 Apr 2011 04:15:30 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 28 Apr 2011 04:15:29 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 118702388897; Thu, 28 Apr 2011 04:15:09 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1097315 - in /hadoop/mapreduce/trunk: CHANGES.txt src/docs/src/documentation/content/xdocs/rumen.xml src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java src/tools/org/apache/hadoop/tools/rumen/Folder.java Date: Thu, 28 Apr 2011 04:15:08 -0000 To: mapreduce-commits@hadoop.apache.org From: amarrk@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110428041509.118702388897@eris.apache.org> Author: amarrk Date: Thu Apr 28 04:15:08 2011 New Revision: 1097315 URL: http://svn.apache.org/viewvc?rev=1097315&view=rev Log: Feature to instruct rumen-folder utility to skip jobs worth Modified: hadoop/mapreduce/trunk/CHANGES.txt hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/rumen.xml hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Folder.java Modified: hadoop/mapreduce/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=1097315&r1=1097314&r2=1097315&view=diff ============================================================================== --- hadoop/mapreduce/trunk/CHANGES.txt (original) +++ hadoop/mapreduce/trunk/CHANGES.txt Thu Apr 28 04:15:08 2011 @@ -7,6 +7,8 @@ Trunk (unreleased changes) NEW FEATURES IMPROVEMENTS + MAPREDUCE-1461. Feature to instruct rumen-folder utility to skip jobs worth + of specific duration. (Rajesh Balamohan via amarrk) MAPREDUCE-2172. Added test-patch.properties required by test-patch.sh (nigel) Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/rumen.xml URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/rumen.xml?rev=1097315&r1=1097314&r2=1097315&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/rumen.xml (original) +++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/rumen.xml Thu Apr 28 04:15:08 2011 @@ -311,6 +311,17 @@ + -starts-after + Specify the time (in milliseconds) relative to the start of + the trace, after which this utility should consider the + jobs from input trace. + + If this value is specified as 10000, Folder would ignore + first 10000ms worth of jobs in the trace and + start considering the rest of the jobs in the trace for folding. + + + -temp-directory Temporary directory for the Folder. By default the output folder's parent directory is used as the scratch space. Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java?rev=1097315&r1=1097314&r2=1097315&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java (original) +++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java Thu Apr 28 04:15:08 2011 @@ -20,6 +20,12 @@ package org.apache.hadoop.tools.rumen; import java.io.IOException; import java.io.InputStream; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -71,6 +77,96 @@ public class TestRumenFolder { TestRumenFolder. jsonFileMatchesGold(conf, lfs, foldedTracePath, foldedGoldFile, LoggedJob.class, "trace"); } + + @Test + public void testStartsAfterOption() throws Exception { + final Configuration conf = new Configuration(); + final FileSystem lfs = FileSystem.getLocal(conf); + + @SuppressWarnings("deprecation") + final Path rootInputDir = + new Path(System.getProperty("test.tools.input.dir", "")) + .makeQualified(lfs); + @SuppressWarnings("deprecation") + final Path rootTempDir = + new Path(System.getProperty("test.build.data", "/tmp")) + .makeQualified(lfs); + + final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test"); + final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces"); + lfs.delete(tempDir, true); + + final Path inputFile = + new Path(rootInputFile, "goldFoldedTrace.json.gz"); + + final Path foldedTracePath = new Path(tempDir, + "folded-skippedjob-trace.json"); + String[] args = + { "-input-cycle", "300S", "-output-duration", "300S", + "-starts-after", "30S", + inputFile.toString(), foldedTracePath.toString() }; + + Folder folder = new Folder(); + int result = ToolRunner.run(folder, args); + assertEquals("Non-zero exit", 0, result); + + TestRumenFolder. checkValidityAfterSkippingJobs(conf, lfs, foldedTracePath, + inputFile, LoggedJob.class, "trace", 30000, 300000); + } + + static private void + checkValidityAfterSkippingJobs(Configuration conf, + FileSystem lfs, Path result, Path inputFile, + Class clazz, String fileDescription, + long startsAfter, long duration) throws IOException { + + JsonObjectMapperParser inputFileParser = + new JsonObjectMapperParser(inputFile, clazz, conf); + InputStream resultStream = lfs.open(result); + JsonObjectMapperParser resultParser = + new JsonObjectMapperParser(resultStream, clazz); + List gpSubmitTimes = new LinkedList(); + List rpSubmitTimes = new LinkedList(); + try { + //Get submitTime of first job + LoggedJob firstJob = (LoggedJob)inputFileParser.getNext(); + gpSubmitTimes.add(firstJob.getSubmitTime()); + long absoluteStartsAfterTime = firstJob.getSubmitTime() + startsAfter; + + //total duration + long endTime = firstJob.getSubmitTime() + duration; + + //read original trace + LoggedJob oriJob = null; + while((oriJob = (LoggedJob)inputFileParser.getNext()) != null) { + gpSubmitTimes.add(oriJob.getSubmitTime()); + } + + //check if retained jobs have submittime > starts-after + LoggedJob job = null; + while((job = (LoggedJob) resultParser.getNext()) != null) { + assertTrue("job's submit time in the output trace is less " + + "than the specified value of starts-after", + (job.getSubmitTime() >= absoluteStartsAfterTime)); + rpSubmitTimes.add(job.getSubmitTime()); + } + + List skippedJobs = new LinkedList(); + skippedJobs.addAll(gpSubmitTimes); + skippedJobs.removeAll(rpSubmitTimes); + + //check if the skipped job submittime < starts-after + for(Long submitTime : skippedJobs) { + assertTrue("skipped job submit time " + submitTime + + " in the trace is greater " + + "than the specified value of starts-after " + + absoluteStartsAfterTime, + (submitTime < absoluteStartsAfterTime)); + } + } finally { + IOUtils.cleanup(null, inputFileParser, resultParser); + } + } static private void jsonFileMatchesGold( Configuration conf, FileSystem lfs, Path result, Path gold, Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Folder.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Folder.java?rev=1097315&r1=1097314&r2=1097315&view=diff ============================================================================== --- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Folder.java (original) +++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/Folder.java Thu Apr 28 04:15:08 2011 @@ -61,6 +61,7 @@ public class Folder extends Configured i private boolean debug = false; private boolean allowMissorting = false; private int skewBufferLength = 0; + private long startsAfter = -1; static final private Log LOG = LogFactory.getLog(Folder.class); @@ -130,8 +131,9 @@ public class Folder extends Configured i for (int i = 0; i < args.length; ++i) { String thisArg = args[i]; - - if (thisArg.equalsIgnoreCase("-output-duration")) { + if (thisArg.equalsIgnoreCase("-starts-after")) { + startsAfter = parseDuration(args[++i]); + } else if (thisArg.equalsIgnoreCase("-output-duration")) { outputDuration = parseDuration(args[++i]); } else if (thisArg.equalsIgnoreCase("-input-cycle")) { inputCycle = parseDuration(args[++i]); @@ -274,6 +276,31 @@ public class Folder extends Configured i return EMPTY_JOB_TRACE; } + + // If starts-after time is specified, skip the number of jobs till we reach + // the starting time limit. + if (startsAfter > 0) { + LOG.info("starts-after time is specified. Initial job submit time : " + + job.getSubmitTime()); + + long approximateTime = job.getSubmitTime() + startsAfter; + job = reader.nextJob(); + long skippedCount = 0; + while (job != null && job.getSubmitTime() < approximateTime) { + job = reader.nextJob(); + skippedCount++; + } + + LOG.debug("Considering jobs with submit time greater than " + + startsAfter + " ms. Skipped " + skippedCount + " jobs."); + + if (job == null) { + LOG.error("No more jobs to process in the trace with 'starts-after'"+ + " set to " + startsAfter + "ms."); + return EMPTY_JOB_TRACE; + } + LOG.info("The first job has a submit time of " + job.getSubmitTime()); + } firstJobSubmitTime = job.getSubmitTime(); long lastJobSubmitTime = firstJobSubmitTime;