Return-Path: Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: (qmail 4794 invoked from network); 8 Mar 2011 06:02:57 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 8 Mar 2011 06:02:57 -0000 Received: (qmail 83724 invoked by uid 500); 8 Mar 2011 06:02:57 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 83695 invoked by uid 500); 8 Mar 2011 06:02:57 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 83664 invoked by uid 99); 8 Mar 2011 06:02:57 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 08 Mar 2011 06:02:57 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 08 Mar 2011 06:02:55 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id BABA92388A3C; Tue, 8 Mar 2011 06:02:35 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1079270 - in /hadoop/mapreduce/branches/yahoo-merge/src: docs/src/documentation/content/xdocs/rumen.xml test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java tools/org/apache/hadoop/tools/rumen/Folder.java Date: Tue, 08 Mar 2011 06:02:35 -0000 To: mapreduce-commits@hadoop.apache.org From: omalley@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20110308060235.BABA92388A3C@eris.apache.org> Author: omalley Date: Tue Mar 8 06:02:35 2011 New Revision: 1079270 URL: http://svn.apache.org/viewvc?rev=1079270&view=rev Log: commit 856e0b31712d11b0ad455cf8cec64ac767d64ec6 Author: Rajesh Balamohan Date: Thu Feb 24 14:06:49 2011 +0530 : Feature to instruct rumen-folder utility to skip jobs worth of specific duration Modified: hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java Modified: hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml?rev=1079270&r1=1079269&r2=1079270&view=diff ============================================================================== --- hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml (original) +++ hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/rumen.xml Tue Mar 8 06:02:35 2011 @@ -323,6 +323,17 @@ + -starts-after + Specify the time in milliseconds relative to the begining of + trace, after which this utility should start considering the jobs + from input trace. + + If this value is specified as 10000, Folder would ignore + first 10000ms worth of jobs in the trace and + start considering the rest of the jobs in the trace for folding. + + + -temp-directory Temporary directory for the Folder. By default the output folder's parent directory is used as the scratch space. Modified: hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java?rev=1079270&r1=1079269&r2=1079270&view=diff ============================================================================== --- hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java (original) +++ hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenFolder.java Tue Mar 8 06:02:35 2011 @@ -71,6 +71,73 @@ public class TestRumenFolder { TestRumenFolder. jsonFileMatchesGold(conf, lfs, foldedTracePath, foldedGoldFile, LoggedJob.class, "trace"); } + + @Test + public void testStartsAfterOption() throws Exception { + final Configuration conf = new Configuration(); + final FileSystem lfs = FileSystem.getLocal(conf); + + @SuppressWarnings("deprecation") + final Path rootInputDir = + new Path(System.getProperty("test.tools.input.dir", "")) + .makeQualified(lfs); + @SuppressWarnings("deprecation") + final Path rootTempDir = + new Path(System.getProperty("test.build.data", "/tmp")) + .makeQualified(lfs); + + final Path rootInputFile = new Path(rootInputDir, "rumen/small-trace-test"); + final Path tempDir = new Path(rootTempDir, "TestRumenJobTraces"); + lfs.delete(tempDir, true); + + final Path foldedTracePath = new Path(tempDir, "folded-trace.json"); + + final Path inputFile = + new Path(rootInputFile, "folder-input-trace.json.gz"); + + String[] args = + { "-input-cycle", "100S", "-output-duration", "300S", + "-skew-buffer-length", "1", "-seed", "100", "-starts-after", "30S", "-concentration", "2", + inputFile.toString(), foldedTracePath.toString() }; + + final Path foldedGoldFile = + new Path(rootInputFile, "goldFoldedTrace.json.gz"); + + Folder folder = new Folder(); + int result = ToolRunner.run(folder, args); + assertEquals("Non-zero exit", 0, result); + + TestRumenFolder. checkValidityAfterSkippingJobs(conf, lfs, foldedTracePath, + foldedGoldFile, LoggedJob.class, "trace", 30000); + } + + static private void + checkValidityAfterSkippingJobs(Configuration conf, + FileSystem lfs, Path result, Path gold, + Class clazz, String fileDescription, + long startsAfter) throws IOException { + JsonObjectMapperParser goldParser = + new JsonObjectMapperParser(gold, clazz, conf); + InputStream resultStream = lfs.open(result); + JsonObjectMapperParser resultParser = + new JsonObjectMapperParser(resultStream, clazz); + try { + long submitTime = ((LoggedJob)goldParser.getNext()).getSubmitTime(); + long target = submitTime + startsAfter; + while(true) { + DeepCompare resultJob = resultParser.getNext(); + if (resultJob == null) { + break; + } + LoggedJob job = (LoggedJob) resultJob; + assertTrue("job's submit time in the output trace is less " + + "than the specified value of starts-after", + (job.getSubmitTime() >= target)); + } + } finally { + IOUtils.cleanup(null, goldParser, resultParser); + } + } static private void jsonFileMatchesGold( Configuration conf, FileSystem lfs, Path result, Path gold, Modified: hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java?rev=1079270&r1=1079269&r2=1079270&view=diff ============================================================================== --- hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java (original) +++ hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Folder.java Tue Mar 8 06:02:35 2011 @@ -61,6 +61,7 @@ public class Folder extends Configured i private boolean debug = false; private boolean allowMissorting = false; private int skewBufferLength = 0; + private long startsAfter = -1; static final private Log LOG = LogFactory.getLog(Folder.class); @@ -130,8 +131,9 @@ public class Folder extends Configured i for (int i = 0; i < args.length; ++i) { String thisArg = args[i]; - - if (thisArg.equalsIgnoreCase("-output-duration")) { + if (thisArg.equalsIgnoreCase("-starts-after")) { + startsAfter = parseDuration(args[++i]); + } else if (thisArg.equalsIgnoreCase("-output-duration")) { outputDuration = parseDuration(args[++i]); } else if (thisArg.equalsIgnoreCase("-input-cycle")) { inputCycle = parseDuration(args[++i]); @@ -274,6 +276,29 @@ public class Folder extends Configured i return EMPTY_JOB_TRACE; } + + // If starts-after time is specified, skip the number of jobs till we reach + // the starting time limit. + if (startsAfter > 0) { + LOG.info("starts-after time is specified. Initial job submit time : " + job.getSubmitTime()); + + long approximateTime = job.getSubmitTime() + startsAfter; + job = reader.nextJob(); + long skippedCount = 0; + while (job != null && job.getSubmitTime() < approximateTime) { + job = reader.nextJob(); + skippedCount++; + } + + LOG.debug("Skipped the initial " + startsAfter+ " ms jobs. Total number of skipped Jobs : " + + skippedCount); + + if (job == null) { + LOG.error("No more jobs to process in the trace..May be you skipped too many jobs.."); + return EMPTY_JOB_TRACE; + } + LOG.info("The first job has a submit time of " + job.getSubmitTime()); + } firstJobSubmitTime = job.getSubmitTime(); long lastJobSubmitTime = firstJobSubmitTime;