hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r504682 - in /lucene/hadoop/trunk: CHANGES.txt build.xml src/contrib/smallJobsBenchmark/ src/test/org/apache/hadoop/mapred/MRBench.java src/test/org/apache/hadoop/test/AllTestDriver.java
Date Wed, 07 Feb 2007 20:45:10 GMT
Author: cutting
Date: Wed Feb  7 12:45:09 2007
New Revision: 504682

URL: http://svn.apache.org/viewvc?view=rev&rev=504682
Log:
HADOOP-858.  Move contrib/smallJobsBenchmark to src/test.  Contributed by Nigel.

Added:
    lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/MRBench.java
Removed:
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/build.xml
    lucene/hadoop/trunk/src/test/org/apache/hadoop/test/AllTestDriver.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=504682&r1=504681&r2=504682
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Feb  7 12:45:09 2007
@@ -9,6 +9,9 @@
  2. HADOOP-982.  Add some setters and a toString() method to
     BytesWritable.  (omalley via cutting)
 
+ 3. HADOOP-858.  Move contrib/smallJobsBenchmark to src/test, removing
+    obsolete bits. (Nigel Daley via cutting)
+
 
 Branch 0.11 - unreleased
 

Modified: lucene/hadoop/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/build.xml?view=diff&rev=504682&r1=504681&r2=504682
==============================================================================
--- lucene/hadoop/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Wed Feb  7 12:45:09 2007
@@ -493,7 +493,6 @@
     	<packageset dir="${examples.dir}"/>
 
     	<packageset dir="src/contrib/streaming/src/java"/>
-    	<packageset dir="src/contrib/smallJobsBenchmark/src/java"/>
     	<packageset dir="src/contrib/abacus/src/java"/>
 
         <link href="${javadoc.link.java}"/>
@@ -503,7 +502,6 @@
     	<group title="Examples" packages="org.apache.hadoop.examples*"/>
 
        <group title="contrib: Streaming" packages="org.apache.hadoop.streaming*"/>
-       <group title="contrib: Small Jobs Benchmark" packages="org.apache.hadoop.benchmarks.mapred*"/>
        <group title="contrib: Abacus" packages="org.apache.hadoop.abacus*"/>
 
     </javadoc>

Added: lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/MRBench.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/MRBench.java?view=auto&rev=504682
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/MRBench.java (added)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/mapred/MRBench.java Wed Feb  7 12:45:09
2007
@@ -0,0 +1,308 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * Runs a job multiple times and takes average of all runs.
+ *
+ * @author Sanjay Dahiya
+ * @author Nigel Daley
+ */
+public class MRBench {
+  
+  private static final Log LOG = LogFactory.getLog(MRBench.class);
+  private static Path BASE_DIR =
+    new Path(System.getProperty("test.build.data","/benchmarks/MRBench"));
+  private static Path INPUT_DIR = new Path(BASE_DIR, "mr_input");
+  private static Path OUTPUT_DIR = new Path(BASE_DIR, "mr_output");
+  
+  public static enum Order {RANDOM, ASCENDING, DESCENDING}; 
+  
+  /**
+   * Takes input format as text lines, runs some processing on it and 
+   * writes out data as text again. 
+   */
+  public static class Map extends MapReduceBase implements Mapper {
+    public void map(WritableComparable key, Writable value,
+        OutputCollector output, Reporter reporter) throws IOException 
+    {
+      String line = value.toString();
+      output.collect(new UTF8(process(line)), new UTF8(""));		
+    }
+    public String process(String line) {
+      return line; 
+    }
+  }
+
+  /**
+   * Ignores the key and writes values to the output. 
+   */
+  public static class Reduce extends MapReduceBase implements Reducer {
+    public void reduce(WritableComparable key, Iterator values,
+        OutputCollector output, Reporter reporter) throws IOException 
+    {
+      while(values.hasNext()) {
+        output.collect(key, new UTF8(values.next().toString()));
+      }
+    }
+  }
+
+  /**
+   * Generate a text file on the given filesystem with the given path name.
+   * The text file will contain the given number of lines of generated data.
+   * The generated data are string representations of numbers.  Each line
+   * is the same length, which is achieved by padding each number with
+   * an appropriate number of leading '0' (zero) characters.  The order of
+   * generated data is one of ascending, descending, or random.
+   */
+  public static void generateTextFile(FileSystem fs, Path inputFile, 
+    long numLines, Order sortOrder) throws IOException 
+  {
+    LOG.info("creating control file: "+numLines+" numLines, "+sortOrder+" sortOrder");
+    PrintStream output = null;
+    try {
+      output = new PrintStream(fs.create(inputFile));
+      int padding = String.valueOf(numLines).length();
+      switch(sortOrder) {
+        case RANDOM:
+          for (long l = 0; l < numLines; l++) {
+            output.println(pad((new Random()).nextLong(), padding));
+          }
+          break; 
+        case ASCENDING: 
+          for (long l = 0; l < numLines; l++) {
+            output.println(pad(l, padding));
+          }
+          break;
+        case DESCENDING: 
+          for (long l = numLines; l > 0; l--) {
+            output.println(pad(l, padding));
+          }
+          break;
+      }
+    } finally {
+      if (output != null)
+        output.close();
+    }
+    LOG.info("created control file: " + inputFile);
+  }
+  
+  /**
+   * Convert the given number to a string and pad the number with 
+   * leading '0' (zero) characters so that the string is exactly
+   * the given length.
+   */
+  private static String pad(long number, int length) {
+    String str = String.valueOf(number);
+    StringBuffer value = new StringBuffer(); 
+    for (int i = str.length(); i < length; i++) {
+      value.append("0"); 
+    }
+    value.append(str); 
+    return value.toString();
+  }
+  
+  /**
+   * Create the job configuration.
+   */
+  private static JobConf setupJob(int numMaps, int numReduces, String jarFile) {
+    JobConf jobConf = new JobConf(MRBench.class);
+    jobConf.addInputPath(INPUT_DIR);
+    
+    jobConf.setInputFormat(TextInputFormat.class);
+    jobConf.setOutputFormat(TextOutputFormat.class);
+    
+    jobConf.setInputKeyClass(LongWritable.class);
+    jobConf.setOutputValueClass(UTF8.class);
+    
+    jobConf.setMapOutputKeyClass(UTF8.class);
+    jobConf.setMapOutputValueClass(UTF8.class);
+    
+    if ( null != jarFile ) {
+      jobConf.setJar(jarFile);
+    }
+    jobConf.setMapperClass(Map.class);
+    jobConf.setReducerClass(Reduce.class);
+    
+    jobConf.setNumMapTasks(numMaps);
+    jobConf.setNumReduceTasks(numReduces);
+    
+    return jobConf; 
+  }
+  
+  /**
+   * Runs a MapReduce task, given number of times. The input to each run
+   * is the same file.
+   */
+  private static ArrayList<Long> runJobInSequence(JobConf masterJobConf, int numRuns)
throws IOException {
+    Path intrimData = null; 
+    Random rand = new Random();
+    ArrayList<Long> execTimes = new ArrayList<Long>(); 
+    
+    for (int i = 0; i < numRuns; i++) {
+      // create a new job conf every time, reusing same object does not work 
+      JobConf jobConf = new JobConf(masterJobConf);
+      // reset the job jar because the copy constructor doesn't
+      jobConf.setJar(masterJobConf.getJar());
+      // give a new random name to output of the mapred tasks
+      jobConf.setOutputPath(new Path(OUTPUT_DIR, "output_" + rand.nextInt()));
+
+      LOG.info("Running job " + i + ":" +
+        " input=" + jobConf.getInputPaths()[0] + 
+        " output=" + jobConf.getOutputPath());
+      
+      // run the mapred task now 
+      long curTime = System.currentTimeMillis();
+      JobClient.runJob(jobConf);
+      execTimes.add(new Long(System.currentTimeMillis() - curTime));
+    }
+    return execTimes;
+  }
+  
+  /**
+   * <pre>
+   * Usage: mrbench
+   *    [-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>]
+   *    [-jar <local path to job jar file containing Mapper and Reducer implementations,
default is current jar file>]
+   *    [-numRuns <number of times to run the job, default is 1>]
+   *    [-maps <number of maps for each run, default is 2>]
+   *    [-reduces <number of reduces for each run, default is 1>]
+   *    [-inputLines <number of input lines to generate, default is 1>]
+   *    [-inputType <type of input to generate, one of ascending (default), descending,
random>]
+   *    [-verbose]
+   * </pre>
+   */
+  public static void main (String[] args) throws IOException {
+    String version = "MRBenchmark.0.0.2";
+    System.out.println(version);
+
+    String usage = 
+      "Usage: mrbench " +
+      "[-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>]
" + 
+      "[-jar <local path to job jar file containing Mapper and Reducer implementations,
default is current jar file>] " + 
+      "[-numRuns <number of times to run the job, default is 1>] " +
+      "[-maps <number of maps for each run, default is 2>] " +
+      "[-reduces <number of reduces for each run, default is 1>] " +
+      "[-inputLines <number of input lines to generate, default is 1>] " +
+      "[-inputType <type of input to generate, one of ascending (default), descending,
random>] " + 
+      "[-verbose]";
+    
+    String jarFile = null;
+    int inputLines = 1; 
+    int numRuns = 1;
+    int numMaps = 2; 
+    int numReduces = 1;
+    boolean verbose = false;         
+    Order inputSortOrder = Order.ASCENDING;     
+    for (int i = 0; i < args.length; i++) { // parse command line
+      if (args[i].equals("-jar")) {
+        jarFile = args[++i];
+      } else if (args[i].equals("-numRuns")) {
+        numRuns = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-baseDir")) {
+        BASE_DIR = new Path(args[++i]);
+      } else if (args[i].equals("-maps")) {
+        numMaps = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-reduces")) {
+        numReduces = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-inputLines")) {
+        inputLines = Integer.parseInt(args[++i]);
+      } else if (args[i].equals("-inputType")) {
+        String s = args[++i]; 
+        if (s.equalsIgnoreCase("ascending")) {
+          inputSortOrder = Order.ASCENDING;
+        } else if (s.equalsIgnoreCase("descending")) {
+          inputSortOrder = Order.DESCENDING; 
+        } else if (s.equalsIgnoreCase("random")) {
+          inputSortOrder = Order.RANDOM;
+        } else {
+          inputSortOrder = null;
+        }
+      } else if (args[i].equals("-verbose")) {
+        verbose = true;
+      } else {
+        System.err.println(usage);
+        System.exit(-1);
+      }
+    }
+    
+    if (numRuns < 1 ||  // verify args
+      numMaps < 1 ||
+      numReduces < 1 ||
+      inputLines < 0 ||
+      inputSortOrder == null)
+    {
+      System.err.println(usage);
+      System.exit(-1);
+    }
+
+    JobConf jobConf = setupJob(numMaps, numReduces, jarFile);
+    FileSystem fs = FileSystem.get(jobConf);
+    Path inputFile = new Path(INPUT_DIR, "input_" + (new Random()).nextInt() + ".txt");
+    generateTextFile(fs, inputFile, inputLines, inputSortOrder);
+
+    // setup test output directory
+    fs.mkdirs(BASE_DIR); 
+    ArrayList<Long> execTimes = new ArrayList<Long>();
+    try {
+      execTimes = runJobInSequence(jobConf, numRuns);
+    } finally {
+      // delete output -- should we really do this?
+      fs.delete(BASE_DIR);
+    }
+    
+    if (verbose) {
+      // Print out a report 
+      System.out.println("Total MapReduce jobs executed: " + numRuns);
+      System.out.println("Total lines of data per job: " + inputLines);
+      System.out.println("Maps per job: " + numMaps);
+      System.out.println("Reduces per job: " + numReduces);
+    }
+    int i = 0;
+    long totalTime = 0; 
+    for (Long time : execTimes) {
+      totalTime += time.longValue(); 
+      if (verbose) {
+        System.out.println("Total milliseconds for task: " + (++i) + 
+            " = " +  time);
+      }
+    }
+    long avgTime = totalTime / numRuns;    
+    System.out.println("DataLines\tMaps\tReduces\tAvgTime (milliseconds)");
+    System.out.println(inputLines + "\t\t" + numMaps + "\t" + 
+        numReduces + "\t" + avgTime);
+  }
+  
+}

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/test/AllTestDriver.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/test/AllTestDriver.java?view=diff&rev=504682&r1=504681&r2=504682
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/test/AllTestDriver.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/test/AllTestDriver.java Wed Feb  7 12:45:09
2007
@@ -19,49 +19,49 @@
 package org.apache.hadoop.test;
 
 import org.apache.hadoop.util.ProgramDriver;
+import org.apache.hadoop.mapred.MRBench;
 import org.apache.hadoop.mapred.TestMapRed;
 import org.apache.hadoop.mapred.TestTextInputFormat;
 import org.apache.hadoop.mapred.TestSequenceFileInputFormat;
 import org.apache.hadoop.dfs.ClusterTestDFS;
 import org.apache.hadoop.dfs.NNBench;
+import org.apache.hadoop.fs.DistributedFSCheck;
+import org.apache.hadoop.fs.TestDFSIO;
+import org.apache.hadoop.fs.DFSCIOTest;
 import org.apache.hadoop.fs.TestFileSystem;
 import org.apache.hadoop.io.TestArrayFile;
 import org.apache.hadoop.io.TestSetFile;
 import org.apache.hadoop.io.TestSequenceFile;
 import org.apache.hadoop.ipc.TestIPC;
 import org.apache.hadoop.ipc.TestRPC;
-import org.apache.hadoop.fs.DistributedFSCheck;
-import org.apache.hadoop.fs.TestDFSIO;
-import org.apache.hadoop.fs.DFSCIOTest;
 
 public class AllTestDriver {
   
   /**
    * A description of the test program for running all the tests using jar file
-   * @date April 2006
    */
-    
-    public static void main(String argv[]){
-	ProgramDriver pgd = new ProgramDriver();
-	try {
-            pgd.addClass("nnbench", NNBench.class, "A benchmark that stresses the namenode.");
-	    pgd.addClass("mapredtest", TestMapRed.class, "A map/reduce test check.");
-	    pgd.addClass("clustertestdfs", ClusterTestDFS.class, "A pseudo distributed test for
DFS.");
-	    pgd.addClass("testfilesystem", TestFileSystem.class, "A test for FileSystem read/write.");
-	    pgd.addClass("testsequencefile", TestSequenceFile.class, "A test for flat files of binary
key value pairs.");
-	    pgd.addClass("testsetfile", TestSetFile.class, "A test for flat files of binary key/value
pairs.");
-	    pgd.addClass("testarrayfile", TestArrayFile.class, "A test for flat files of binary
key/value pairs.");
-	    pgd.addClass("testrpc", TestRPC.class, "A test for rpc.");
-	    pgd.addClass("testipc", TestIPC.class, "A test for ipc.");
-	    pgd.addClass("testsequencefileinputformat", TestSequenceFileInputFormat.class, "A test
for sequence file input format.");
-	    pgd.addClass("testtextinputformat", TestTextInputFormat.class, "A test for text input
format.");
+  public static void main(String argv[]){
+    ProgramDriver pgd = new ProgramDriver();
+    try {
+      pgd.addClass("mrbench", MRBench.class, "A map/reduce benchmark that can create many
small jobs");
+      pgd.addClass("nnbench", NNBench.class, "A benchmark that stresses the namenode.");
+      pgd.addClass("mapredtest", TestMapRed.class, "A map/reduce test check.");
+      pgd.addClass("clustertestdfs", ClusterTestDFS.class, "A pseudo distributed test for
DFS.");
+      pgd.addClass("testfilesystem", TestFileSystem.class, "A test for FileSystem read/write.");
+      pgd.addClass("testsequencefile", TestSequenceFile.class, "A test for flat files of
binary key value pairs.");
+      pgd.addClass("testsetfile", TestSetFile.class, "A test for flat files of binary key/value
pairs.");
+      pgd.addClass("testarrayfile", TestArrayFile.class, "A test for flat files of binary
key/value pairs.");
+      pgd.addClass("testrpc", TestRPC.class, "A test for rpc.");
+      pgd.addClass("testipc", TestIPC.class, "A test for ipc.");
+      pgd.addClass("testsequencefileinputformat", TestSequenceFileInputFormat.class, "A test
for sequence file input format.");
+      pgd.addClass("testtextinputformat", TestTextInputFormat.class, "A test for text input
format.");
       pgd.addClass("TestDFSIO", TestDFSIO.class, "Distributed i/o benchmark.");
       pgd.addClass("DFSCIOTest", DFSCIOTest.class, "Distributed i/o benchmark of libhdfs.");
       pgd.addClass("DistributedFSCheck", DistributedFSCheck.class, "Distributed checkup of
the file system consistency.");
-	    pgd.driver(argv);
-	}
-	catch(Throwable e){
-	    e.printStackTrace();
-	}
+      pgd.driver(argv);
+    } catch(Throwable e) {
+      e.printStackTrace();
     }
+  }
 }
+



Mime
View raw message