hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amareshw...@apache.org
Subject svn commit: r999769 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
Date Wed, 22 Sep 2010 05:58:04 GMT
Author: amareshwari
Date: Wed Sep 22 05:58:04 2010
New Revision: 999769

URL: http://svn.apache.org/viewvc?rev=999769&view=rev
Log:
MAPREDUCE-2078. Fixes TraceBuilder to generate traces when a globbed job history path is given.
Contributed by Amar Kamat

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=999769&r1=999768&r2=999769&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Wed Sep 22 05:58:04 2010
@@ -298,6 +298,9 @@ Trunk (unreleased changes)
 
     MAPREDUCE-1918. Adds documentation to Rumen. (Amar Kamat via amareshwari)
 
+    MAPREDUCE-2078. Fixes TraceBuilder to generate traces when a globbed job
+    history path is given. (Amar Kamat via amareshwari)
+
 Release 0.21.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java?rev=999769&r1=999768&r2=999769&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
(original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
Wed Sep 22 05:58:04 2010
@@ -46,6 +46,7 @@ import org.apache.hadoop.mapreduce.jobhi
 import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
 import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
+import org.apache.hadoop.tools.rumen.TraceBuilder.MyOptions;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
@@ -290,6 +291,61 @@ public class TestRumenJobTraces {
   }
 
   /**
+   * Test if {@link TraceBuilder} can process globbed input file paths.
+   */
+  @Test
+  public void testGlobbedInput() throws Exception {
+    final Configuration conf = new Configuration();
+    final FileSystem lfs = FileSystem.getLocal(conf);
+    
+    // define the test's root temporary directory
+    final Path rootTempDir =
+      new Path(System.getProperty("test.build.data", "/tmp"))
+            .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
+    // define the test's root input directory
+    Path testRootInputDir = new Path(rootTempDir, "TestGlobbedInputPath");
+    // define the nested input directory
+    Path nestedInputDir = new Path(testRootInputDir, "1/2/3/4");
+    // define the globbed version of the nested input directory
+    Path globbedInputNestedDir = 
+      lfs.makeQualified(new Path(testRootInputDir, "*/*/*/*/*"));
+    
+    // define a file in the nested test input directory
+    Path inputPath1 = new Path(nestedInputDir, "test.txt");
+    // define a sub-folder in the nested test input directory
+    Path inputPath2Parent = new Path(nestedInputDir, "test");
+    lfs.mkdirs(inputPath2Parent);
+    // define a file in the sub-folder within the nested test input directory
+    Path inputPath2 = new Path(inputPath2Parent, "test.txt");
+    
+    // create empty input files
+    lfs.createNewFile(inputPath1);
+    lfs.createNewFile(inputPath2);
+    
+    // define the output trace and topology files
+    Path outputTracePath = new Path(testRootInputDir, "test.json");
+    Path outputTopologyTracePath = new Path(testRootInputDir, "topology.json");
+    
+    String[] args = 
+      new String[] {outputTracePath.toString(), 
+                    outputTopologyTracePath.toString(), 
+                    globbedInputNestedDir.toString() };
+    
+    // invoke TraceBuilder's MyOptions command options parsing module/utility
+    MyOptions options = new TraceBuilder.MyOptions(args, conf);
+    
+    lfs.delete(testRootInputDir, true);
+    
+    assertEquals("Error in detecting globbed input FileSystem paths", 
+                 2, options.inputs.size());
+    
+    assertTrue("Missing input file " + inputPath1, 
+               options.inputs.contains(inputPath1));
+    assertTrue("Missing input file " + inputPath2, 
+               options.inputs.contains(inputPath2));
+  }
+  
+  /**
    * Test if {@link CurrentJHParser} can read events from current JH files.
    */
   @Test

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java?rev=999769&r1=999768&r2=999769&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java Wed Sep
22 05:58:04 2010
@@ -81,11 +81,32 @@ public class TraceBuilder extends Config
       topologyOutput = new Path(args[1 + switchTop]);
 
       for (int i = 2 + switchTop; i < args.length; ++i) {
-
-        Path thisPath = new Path(args[i]);
-
-        FileSystem fs = thisPath.getFileSystem(conf);
-        if (fs.getFileStatus(thisPath).isDirectory()) {
+        processInputArguments(args[i], conf);
+      }
+    }
+    
+    /** Processes the input file/folder arguments. If the input is a file then 
+     *  it is directly considered for further processing. If the input is a 
+     *  folder, then all the files in the input folder are considered for 
+     *  further processing.
+     *
+     *  NOTE: If the input represents a globbed path, then it is first flattened
+     *        and then the individual paths represented by the globbed input
+     *        path are processed.
+     */
+    private void processInputArguments(String input, Configuration conf) 
+    throws IOException {
+      Path inPath = new Path(input);
+      FileSystem fs = inPath.getFileSystem(conf);
+      FileStatus[] inStatuses = fs.globStatus(inPath);
+      
+      if (inStatuses == null || inStatuses.length == 0) {
+        return;
+      }
+      
+      for (FileStatus inStatus : inStatuses) {
+        Path thisPath = inStatus.getPath();
+        if (inStatus.isDirectory()) {
           FileStatus[] statuses = fs.listStatus(thisPath);
 
           List<String> dirNames = new ArrayList<String>();



Mime
View raw message