hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r423049 - in /lucene/hadoop/trunk: ./ src/contrib/smallJobsBenchmark/ src/contrib/smallJobsBenchmark/bin/ src/contrib/smallJobsBenchmark/src/ src/contrib/smallJobsBenchmark/src/java/ src/contrib/smallJobsBenchmark/src/java/org/ src/contrib/...
Date Tue, 18 Jul 2006 11:05:11 GMT
Author: cutting
Date: Tue Jul 18 04:05:10 2006
New Revision: 423049

URL: http://svn.apache.org/viewvc?rev=423049&view=rev
Log:
HADOOP-307.  Add smallJobsBenchmark contrib module.  This runs lots of small jobs to determine
per-task overheads.  Contributed by Sanjay Dahiya.

Added:
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh   (with props)
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh   (with props)
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java
    lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=423049&r1=423048&r2=423049&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Tue Jul 18 04:05:10 2006
@@ -43,6 +43,10 @@
 12. HADOOP-356.  Add contrib to "compile" and "test" build targets, so
     that this code is better maintained. (Michel Tourn via cutting)
 
+13. HADOOP-307.  Add smallJobsBenchmark contrib module.  This runs
+    lots of small jobs, in order to determine per-task overheads.
+    (Sanjay Dahiya via cutting)
+
 
 Release 0.4.0 - 2006-06-28
 

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/Readme.txt Tue Jul 18 04:05:10 2006
@@ -0,0 +1,40 @@
+SmallJobsBenchmark Readme : 
+
+Building the benchmark. 
+to build - 
+$ cd smallJobsBenchmark
+$ ant deploy
+
+Running the benchmark
+$ cd build/contrib/smallJobsBenchmark
+$ bin/run.sh
+
+after successfully running the benchmark see logs/report.txt for consolidated output of all
the runs. 
+
+change this script to configure options. 
+
+Configurable options are - 
+
+-inputLines noOfLines 
+  no of lines of input to generate. 
+
+-inputType (ascending, descending, random)
+  type of input to generate. 
+
+-jar jarFilePath 
+  Jar file containing Mapper and Reducer implementations in jar file. By default ant build
creates MRBenchmark.jar file containing default Mapper and Reducer. 
+  
+-times numJobs 
+No of times to run each MapReduce task, time is calculated as average of all runs. 
+
+-workDir dfsPath 
+DFS path to put output of MR tasks. 
+
+-maps numMaps 
+No of maps for wach task 
+
+-reduces numReduces 
+No of reduces for each task
+
+-ignoreOutput
+Doesn't copy the output back to local disk. Otherwise it creates the output back to a temp
location on local disk. 

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh Tue Jul 18 04:05:10 2006
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+echo "DataLines, Maps, Reduces, AvgTime " > logs/report.txt
+for logFile in `ls logs/*.log`
+do
+#       tail -n $((${TIMES}+5))  ${logFile} >> logs/report.txt
+        tail -n 1  ${logFile} >> logs/report.txt
+done
+
+cat  logs/report.txt

Propchange: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/report.sh
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh Tue Jul 18 04:05:10 2006
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+if [ -z $HADOOP_HOME ] 
+then
+  echo "Error HADOOP_HOME not defined"  ;
+  exit 1;
+fi
+
+if [ -z $JAVA_HOME ] 
+then
+  echo "Error JAVA_HOME not defined"  ;
+  exit 1;
+fi
+
+export CLASSPATH=$HADOOP_HOME/conf:/export/crawlspace/kryptonite/java/jdk/lib/tools.jar:$HADOOP_HOME/build/classes:$HADOOP_HOME/build:$HADOOP_HOME/build/test/classes:$HADOOP_HOME/hadoop-*.jar:$HADOOP_HOME/lib/commons-cli-2.0-SNAPSHOT.jar:$HADOOP_HOME/lib/commons-logging-1.0.4.jar:$HADOOP_HOME/lib/commons-logging-api-1.0.4.jar:$HADOOP_HOME/lib/jetty-5.1.4.jar:$HADOOP_HOME/lib/junit-3.8.1.jar:$HADOOP_HOME/lib/log4j-1.2.13.jar:$HADOOP_HOME/lib/lucene-core-1.9.1.jar:$HADOOP_HOME/lib/servlet-api.jar:$HADOOP_HOME/lib/jetty-ext/ant.jar:$HADOOP_HOME/lib/jetty-ext/commons-el.jar:$HADOOP_HOME/lib/jetty-ext/jasper-compiler.jar:$HADOOP_HOME/lib/jetty-ext/jasper-runtime.jar:$HADOOP_HOME/lib/jetty-ext/jsp-api.jar
+
+mkdir -p logs;
+
+export TIMES=2
+
+#for dataLines in 1 10000 10000000 
+for dataLines in 1 100
+ do 
+
+for maps in 1 18
+	do 
+	for reduces in 1 18
+	do
+$JAVA_HOME/bin/java -classpath $CLASSPATH:./classes org.apache.hadoop.benchmarks.mapred.MultiJobRunner
-inputLines ${dataLines} -output /hadoop/mapred/MROutput -jar MRBenchmark.jar -times ${TIMES}
-workDir /hadoop/mapred/work -maps ${maps} -reduces ${reduces} -inputType ascending  -ignoreOutput
 2>&1 | tee logs/benchmark_${dataLines}_${maps}_${reduces}.log
+
+	done
+	done
+	done
+
+bin/report.sh

Propchange: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/bin/run.sh
------------------------------------------------------------------------------
    svn:executable = *

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml (added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/build.xml Tue Jul 18 04:05:10 2006
@@ -0,0 +1,27 @@
+<?xml version="1.0"?>
+
+<project name="smallJobsBenchmark" default="jar">
+
+  <import file="../build-contrib.xml"/>
+  
+  <!-- Same name is used by shell scripts running this 
+       benchmark -->
+  <property name="benchmarkJarName" value="MRBenchmark.jar"/>
+    
+  <target name="jar" depends="compile">
+    <jar
+      jarfile="${build.dir}/${benchmarkJarName}"
+      basedir="${build.classes}">
+    </jar>
+  </target>
+  
+  <target name="deploy" depends="jar">
+    <mkdir dir="${build.dir}/bin"/>
+    <copy todir="${build.dir}/bin">
+      <fileset dir="${root}/bin">
+        <include name="**/*.*"/>
+      </fileset>
+    </copy> 
+  </target>  
+  
+  </project>

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java
(added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkMapper.java
Tue Jul 18 04:05:10 2006
@@ -0,0 +1,34 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.IOException;
+import java.io.StringWriter;
+
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * takes inpt format as text lines, runs some processing on it and 
+ * writes out data as text again. 
+ * 
+ * @author sanjaydahiya
+ *
+ */
+public class BenchmarkMapper extends MapReduceBase implements Mapper {
+  
+  public void map(WritableComparable key, Writable value,
+      OutputCollector output, Reporter reporter) throws IOException {
+    
+    String line = ((UTF8)value).toString();
+    output.collect(new UTF8(process(line)), new UTF8(""));		
+  }
+  
+  public String process(String line){
+    return line ; 
+  }
+  
+}

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java
(added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/BenchmarkReducer.java
Tue Jul 18 04:05:10 2006
@@ -0,0 +1,32 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+
+/**
+ * 
+ * @author sanjaydahiya
+ *
+ */
+public class BenchmarkReducer extends MapReduceBase implements Reducer {
+  
+  public void reduce(WritableComparable key, Iterator values,
+      OutputCollector output, Reporter reporter) throws IOException {
+    
+    // ignore the key and write values to output
+    while(values.hasNext()){
+      output.collect(key, new UTF8(values.next().toString()));
+    }
+  }
+  
+  public String process(String line){
+    return line ;
+  }
+}

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java
(added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/GenData.java
Tue Jul 18 04:05:10 2006
@@ -0,0 +1,64 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Random;
+
+
+public class GenData {
+  public static final int RANDOM = 1; 
+  public static final int ASCENDING = 2; 
+  public static final int DESCENDING = 3; 
+  
+  public static void generateText(long numLines, File file, int sortType) throws IOException{
+    
+    PrintStream output = new PrintStream(new FileOutputStream(file)); 
+    int padding = String.valueOf(numLines).length();
+    
+    switch(sortType){
+    
+    case RANDOM : 
+      for(long l = 0 ; l<numLines ; l++ ){
+        output.println(pad((new Random()).nextLong(), padding));
+      }
+      break ; 
+      
+    case ASCENDING: 
+      for(long l = 0 ; l<numLines ; l++ ){
+        output.println(pad(l, padding));
+      }
+      break ;
+      
+    case DESCENDING: 
+      for(long l = numLines ; l>0 ; l-- ){
+        output.println(pad(l, padding));
+      }
+      break ;
+      
+    }
+    output.close() ; 
+  }
+  
+  private static String pad( long number, int size ){
+    String str = String.valueOf(number);
+    
+    StringBuffer value = new StringBuffer(); 
+    for( int i = str.length(); i< size ; i++ ){
+      value.append("0"); 
+    }
+    value.append(str); 
+    return value.toString();
+  }
+  
+  public static void main(String[] args){
+    try{
+      // test 
+      generateText(100, new File("/Users/sanjaydahiya/dev/temp/sort.txt"), ASCENDING);
+    }catch(Exception e){
+      e.printStackTrace();
+    }
+  }
+  
+}

Added: lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java?rev=423049&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java
(added)
+++ lucene/hadoop/trunk/src/contrib/smallJobsBenchmark/src/java/org/apache/hadoop/benchmarks/mapred/MultiJobRunner.java
Tue Jul 18 04:05:10 2006
@@ -0,0 +1,405 @@
+package org.apache.hadoop.benchmarks.mapred;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.UTF8;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+
+/**
+ * Runs a job multiple times and takesaverage of all runs. 
+ * @author sanjaydahiya
+ *
+ */
+public class MultiJobRunner {
+  
+  private String jarFile = "MRBenchmark.jar" ;
+  private String input ; 
+  private String output ; 
+  private int numJobs = 2000 ; // default value
+  private static final Log LOG = LogFactory.getLog(MultiJobRunner.class);
+  private int numMaps = 2; 
+  private int numReduces = 1;
+  private int dataLines = 1; 
+  private boolean ignoreOutput = false ; 
+  private boolean verbose = false ; 
+  
+  // just to print in the end
+  ArrayList execTimes = new ArrayList(); 
+  
+  private static String context = "/mapred/benchmark"; 
+  
+  /**
+   * Input is a local file. 
+   * @param input
+   * @param output
+   * @param jarFile
+   */
+  public MultiJobRunner(String input, String output, String jarFile){
+    this.input = input ; 
+    this.output = output ; 
+    this.jarFile = jarFile ; 
+  }
+  
+  public String getInput() {
+    return input;
+  }
+  
+  public void setInput(String input) {
+    this.input = input;
+  } 
+  
+  public String getJarFile() {
+    return jarFile;
+  }
+  
+  public void setJarFile(String jarFile) {
+    this.jarFile = jarFile;
+  }
+  
+  public String getOutput() {
+    return output;
+  }
+  
+  public void setOutput(String output) {
+    this.output = output;
+  }
+  
+  public int getNumJobs() {
+    return numJobs;
+  }
+  
+  public void setNumJobs(int numJobs) {
+    this.numJobs = numJobs;
+  }
+  
+  
+  public int getDataLines() {
+    return dataLines;
+  }
+  
+  public void setDataLines(int dataLines) {
+    this.dataLines = dataLines;
+  }
+  
+  public boolean isIgnoreOutput(){
+    return this.ignoreOutput ; 
+  }
+  
+  public void setIgnoreOutput(boolean ignore){
+    this.ignoreOutput = ignore ; 
+  }
+  
+  public void setVerbose(boolean verbose){
+    this.verbose = verbose ; 
+  }
+  public boolean getVerbose(){
+    return this.verbose; 
+  }
+  
+  /**
+   * Prepare the jobConf.
+   * @return
+   */
+  private JobConf setupJob(){
+    JobConf job = new JobConf() ;
+    
+    job.addInputPath(new Path(context + "/input"));
+    
+    job.setInputFormat(TextInputFormat.class);
+    job.setOutputFormat(TextOutputFormat.class);
+    
+    job.setInputKeyClass(LongWritable.class);
+    job.setOutputValueClass(UTF8.class);
+    
+    job.setMapOutputKeyClass(UTF8.class);
+    job.setMapOutputValueClass(UTF8.class);
+    
+    job.setOutputPath(new Path(output));
+    
+    job.setJar(jarFile);
+    job.setMapperClass(BenchmarkMapper.class);
+    job.setReducerClass(BenchmarkReducer.class);
+    
+    job.setNumMapTasks(this.numMaps);
+    job.setNumReduceTasks(this.numReduces);
+    
+    return job ; 
+  }
+  
+  /**
+   * Runs a MapReduce task, given number of times. The input to each task is the same file.

+   * @param job
+   * @param times
+   * @throws IOException
+   */
+  private void runJobInSequence(int times) throws IOException{
+    Path intrimData = null ; 
+    Random rand = new Random();
+    
+    for( int i= 0;i<times;i++){
+      // create a new job conf every time, reusing same object doesnt seem to work. 
+      JobConf job = setupJob();
+      
+      // give a new random name to output of the mapred tasks
+      // TODO: see if something better can be done
+      intrimData = new Path(context+"/temp/multiMapRedOutput_" + 
+          rand.nextInt() );
+      job.setOutputPath(intrimData);
+      
+      // run the mapred task now 
+      LOG.info("Running job, Input : " + job.getInputPaths()[0] + 
+          " Output : " + job.getOutputPath());
+      long curTime = System.currentTimeMillis();
+      JobClient.runJob(job);
+      execTimes.add(new Long(System.currentTimeMillis() - curTime));
+      
+      // pull the output out of DFS for validation
+      File localOutputFile = File.createTempFile("MROutput" + 
+          new Random().nextInt(), ".txt" );
+      String localOutputPath = localOutputFile.getAbsolutePath() ; 
+      localOutputFile.delete(); 
+      
+      if( ! ignoreOutput ){
+        copyFromDFS(intrimData, localOutputPath);
+      }
+      
+      // diff(input, localOutputPath);
+    }
+  }
+  
+  /**
+   * Not using it. 
+   */
+  private boolean diff(String path1, String path2) throws IOException{
+    boolean ret = false ; 
+    
+    return ret ; 
+  }
+  
+  /**
+   * Runs a sequence of map reduce tasks, output of each reduce is input 
+   * to next map. input should be a pre configured array of JobConfs. 
+   * 
+   */
+  public Path runJobsInSequence(JobConf[] jobs) throws IOException{
+    
+    // input location = jobs[0] input loc
+    Path finalOutput = null ; 
+    
+    for( int i=0;i<jobs.length; i++){
+      if( 0 != i ) {
+        // run the first job in sequence. 
+        jobs[i].addInputPath(finalOutput) ; 
+      }
+      
+      JobClient.runJob(jobs[i]);
+      finalOutput = jobs[i].getOutputPath(); 
+    }
+    
+    return finalOutput; 
+  }
+  
+  /**
+   * 
+   * Copy the input file from local disk to DFS. 
+   * @param localFile
+   * @param remotePath
+   * @return
+   * @throws IOException
+   */
+  private Path copyToDFS(String localFile, Path remotePath) throws IOException{
+    if( null == remotePath){ 
+      // use temp path under /mapred in DFS
+      remotePath =  new Path( context+"/input/MRBenchmark_" + 
+          new Random().nextInt()) ;
+    }
+    //new File(localPath).
+    Configuration conf = new Configuration();
+    FileSystem localFS = FileSystem.getNamed("local", conf);
+    FileSystem remoteFS = FileSystem.get(conf);
+    
+    FileUtil.copy(localFS, new Path(localFile), remoteFS, 
+        remotePath, false, conf);
+    
+    if( ignoreOutput) {
+      // delete local copy 
+      new File(localFile).delete();
+    }
+    
+    return remotePath; 
+  }
+  
+  private void copyFromDFS(Path remotePath, String localPath)
+  throws IOException{
+    
+    Configuration conf = new Configuration();
+    FileSystem localFS = FileSystem.getNamed("local", conf);
+    FileSystem remoteFS = FileSystem.get(conf);
+    
+    FileUtil.copy(remoteFS, remotePath, 
+        localFS, new Path(localPath), false, conf);
+  }
+  
+  private void setupContext() throws IOException{
+    FileSystem.get(new Configuration()).mkdirs(new Path(context));
+  }
+  private void clearContext() throws IOException{
+    FileSystem.get(new Configuration()).delete(new Path(context));
+  }
+  /**
+   * Run the benchmark. 
+   * @throws IOException
+   */
+  public void run() throws IOException{
+    
+    setupContext(); 
+    Path path = copyToDFS(input, null);
+    
+    long time = System.currentTimeMillis();
+    
+    try{
+      runJobInSequence(numJobs);
+    }finally{
+      clearContext(); 
+    }
+    
+    if( verbose ) {
+      // Print out a report 
+      System.out.println("Total MapReduce tasks executed: " + this.numJobs);
+      System.out.println("Total lines of data : " + this.dataLines);
+      System.out.println("Maps : " + this.numMaps + 
+          " ,  Reduces : " + this.numReduces);
+    }
+    int i =0 ; 
+    long totalTime = 0 ; 
+    for( Iterator iter = execTimes.iterator() ; iter.hasNext();){
+      totalTime +=  ((Long)iter.next()).longValue() ; 
+      if( verbose ) {
+        System.out.println("Total time for task : " + ++i + 
+            " , =  " +  (Long)iter.next());
+      }
+    }
+    
+    long avgTime = totalTime / numJobs ;
+    if( verbose ) {
+      System.out.println("Avg time : " + avgTime);
+    }
+    
+    System.out.println("DataLines  Maps    Reduces    AvgTime");
+    System.out.println(this.dataLines + ", " + this.numMaps + ", " + 
+        this.numReduces + ", " + avgTime);
+    
+  }
+  
+  public int getNumMaps() {
+    return numMaps;
+  }
+  
+  public void setNumMaps(int numMaps) {
+    this.numMaps = numMaps;
+  }
+  
+  public int getNumReduces() {
+    return numReduces;
+  }
+  
+  public void setNumReduces(int numReduces) {
+    this.numReduces = numReduces;
+  }
+  
+  public static void main (String[] args) throws IOException{
+    
+    String version = "MRBenchmark.0.0.1";
+    String usage = 
+      "Usage: MultiJobRunner -inputLines noOfLines -jar jarFilePath " + 
+      "[-output dfsPath] [-times numJobs] -workDir dfsPath" +  
+      "[-inputType (ascending, descending, random)]" + 
+      " -maps numMaps -reduces numReduces -ignoreOutput -verbose" ;
+    
+    System.out.println(version);
+    
+    if (args.length == 0) {
+      System.err.println(usage);
+      System.exit(-1);
+    }
+    
+    String output = "";
+    String jarFile = "MRBenchmark.jar" ; 
+    int numJobs = 0 ; 
+    int numMaps = 2; 
+    int numReduces = 1 ; 
+    int dataLines = 1 ; 
+    int inputType = GenData.RANDOM ; 
+    boolean ignoreOutput = false ; 
+    boolean verbose = false ; 
+    
+    for (int i = 0; i < args.length; i++) { // parse command line
+      if (args[i].equals("-output")) {
+        output = args[++i];
+      }else if (args[i].equals("-jar")) {
+        jarFile = args[++i];
+      }else if (args[i].equals("-times")) {
+        numJobs = Integer.parseInt(args[++i]);
+      }else if(args[i].equals("-workDir")) {
+        context = args[++i];
+      }else if(args[i].equals("-maps")) {
+        numMaps = Integer.parseInt(args[++i]);
+      }else if(args[i].equals("-reduces")) {
+        numReduces = Integer.parseInt(args[++i]);
+      }else if(args[i].equals("-inputLines")) {
+        dataLines = Integer.parseInt(args[++i]);
+      }else if(args[i].equals("-inputType")) {
+        String s = args[++i] ; 
+        if( s.equals("ascending")){
+          inputType = GenData.ASCENDING ;
+        }else if(s.equals("descending")){
+          inputType = GenData.DESCENDING ; 
+        }else if(s.equals("random")){
+          inputType = GenData.RANDOM ;
+        }
+      }else if(args[i].equals("-ignoreOutput")) {
+        ignoreOutput = true ;
+      }else if(args[i].equals("-verbose")) {
+        verbose = true ;
+      }
+    }
+    
+    File inputFile = File.createTempFile("SortedInput_" + 
+        new Random().nextInt(),".txt" );
+    GenData.generateText(dataLines, inputFile, inputType);
+    
+    MultiJobRunner runner = new MultiJobRunner(inputFile.getAbsolutePath(), 
+        output, jarFile );
+    runner.setInput(inputFile.getAbsolutePath());
+    runner.setNumMaps(numMaps);
+    runner.setNumReduces(numReduces);
+    runner.setDataLines(dataLines);
+    runner.setIgnoreOutput(ignoreOutput);
+    runner.setVerbose(verbose);
+    
+    if( 0 != numJobs ){
+      runner.setNumJobs(numJobs);
+    }
+    
+    try{
+      runner.run(); 
+    }catch(IOException e){
+      e.printStackTrace();
+    }
+  }
+  
+}



Mime
View raw message