tez-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bi...@apache.org
Subject [03/10] TEZ-1055. Rename tez-mapreduce-examples to tez-examples (Hitesh Shah via bikas)
Date Sat, 16 Aug 2014 00:54:51 GMT
http://git-wip-us.apache.org/repos/asf/tez/blob/41f5cd8a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/MapredWordCount.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/MapredWordCount.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/MapredWordCount.java
new file mode 100644
index 0000000..eef6223
--- /dev/null
+++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/MapredWordCount.java
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.mapreduce.examples;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This is an example Hadoop Map/Reduce application using the mapred apis.
+ * It reads the text input files, breaks each line into words
+ * and counts them. The output is a locally sorted list of words and the
+ * count of how often they occurred.
+ *
+ * To run: bin/hadoop jar examples.jar wordcount
+ *            [-m <i>maps</i>] [-r <i>reduces</i>] <i>in-dir</i> <i>out-dir</i>
+ */
+public class MapredWordCount extends Configured implements Tool {
+
+  private static final Log LOG = LogFactory.getLog(MapredWordCount.class);
+
+  /**
+   * Counts the words in each line.
+   * For each line of input, break the line into words and emit them as
+   * (<b>word</b>, <b>1</b>).
+   */
+  public static class MapClass extends MapReduceBase
+    implements Mapper<LongWritable, Text, Text, IntWritable> {
+
+    private final static IntWritable one = new IntWritable(1);
+    private Text word = new Text();
+
+    public void map(LongWritable key, Text value,
+                    OutputCollector<Text, IntWritable> output,
+                    Reporter reporter) throws IOException {
+      String line = value.toString();
+      StringTokenizer itr = new StringTokenizer(line);
+      while (itr.hasMoreTokens()) {
+        word.set(itr.nextToken());
+        output.collect(word, one);
+      }
+    }
+  }
+
+  /**
+   * A reducer class that just emits the sum of the input values.
+   */
+  public static class Reduce extends MapReduceBase
+    implements Reducer<Text, IntWritable, Text, IntWritable> {
+
+    public void reduce(Text key, Iterator<IntWritable> values,
+                       OutputCollector<Text, IntWritable> output,
+                       Reporter reporter) throws IOException {
+      int sum = 0;
+      while (values.hasNext()) {
+        sum += values.next().get();
+      }
+      output.collect(key, new IntWritable(sum));
+    }
+  }
+
+  static int printUsage() {
+    System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
+    ToolRunner.printGenericCommandUsage(System.out);
+    return -1;
+  }
+
+  /**
+   * The main driver for word count map/reduce program.
+   * Invoke this method to submit the map/reduce job.
+   * @throws IOException When there is communication problems with the
+   *                     job tracker.
+   */
+  public int run(String[] args) throws Exception {
+    JobConf conf = new JobConf(getConf(), MapredWordCount.class);
+    conf.setJobName("wordcount");
+    LOG.info("Running WordCount job using mapred apis");
+
+    // the keys are words (strings)
+    conf.setOutputKeyClass(Text.class);
+    // the values are counts (ints)
+    conf.setOutputValueClass(IntWritable.class);
+
+    conf.setMapperClass(MapClass.class);
+    conf.setCombinerClass(Reduce.class);
+    conf.setReducerClass(Reduce.class);
+
+    List<String> other_args = new ArrayList<String>();
+    for(int i=0; i < args.length; ++i) {
+      try {
+        if ("-m".equals(args[i])) {
+          conf.setNumMapTasks(Integer.parseInt(args[++i]));
+        } else if ("-r".equals(args[i])) {
+          conf.setNumReduceTasks(Integer.parseInt(args[++i]));
+        } else {
+          other_args.add(args[i]);
+        }
+      } catch (NumberFormatException except) {
+        LOG.error("Integer expected instead of " + args[i]);
+        return printUsage();
+      } catch (ArrayIndexOutOfBoundsException except) {
+        LOG.error("Required parameter missing from " + args[i-1]);
+        return printUsage();
+      }
+    }
+    // Make sure there are exactly 2 parameters left.
+    if (other_args.size() != 2) {
+      LOG.error("Wrong number of parameters: " +
+          other_args.size() + " instead of 2.");
+      return printUsage();
+    }
+    FileInputFormat.setInputPaths(conf, other_args.get(0));
+    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
+
+    JobClient.runJob(conf);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(),
+        new MapredWordCount(), args);
+    System.exit(res);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/41f5cd8a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomTextWriter.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomTextWriter.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomTextWriter.java
new file mode 100644
index 0000000..8251b78
--- /dev/null
+++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomTextWriter.java
@@ -0,0 +1,757 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.mapreduce.examples;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program uses map/reduce to just run a distributed job where there is
+ * no interaction between the tasks and each task writes a large unsorted
+ * random sequence of words.
+ * In order for this program to generate data for terasort with a 5-10 words
+ * per key and 20-100 words per value, have the following config:
+ * <xmp>
+ * <?xml version="1.0"?>
+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ * <configuration>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.minwordskey</name>
+ *     <value>5</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.maxwordskey</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.minwordsvalue</name>
+ *     <value>20</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.maxwordsvalue</name>
+ *     <value>100</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomtextwriter.totalbytes</name>
+ *     <value>1099511627776</value>
+ *   </property>
+ * </configuration></xmp>
+ * 
+ * Equivalently, {@link RandomTextWriter} also supports all the above options
+ * and ones supported by {@link Tool} via the command-line.
+ * 
+ * To run: bin/hadoop jar hadoop-${version}-examples.jar randomtextwriter
+ *            [-outFormat <i>output format class</i>] <i>output</i> 
+ */
+public class RandomTextWriter extends Configured implements Tool {
+  public static final String TOTAL_BYTES = 
+    "mapreduce.randomtextwriter.totalbytes";
+  public static final String BYTES_PER_MAP = 
+    "mapreduce.randomtextwriter.bytespermap";
+  public static final String MAPS_PER_HOST = 
+    "mapreduce.randomtextwriter.mapsperhost";
+  public static final String MAX_VALUE = "mapreduce.randomtextwriter.maxwordsvalue";
+  public static final String MIN_VALUE = "mapreduce.randomtextwriter.minwordsvalue";
+  public static final String MIN_KEY = "mapreduce.randomtextwriter.minwordskey";
+  public static final String MAX_KEY = "mapreduce.randomtextwriter.maxwordskey";
+  
+  static int printUsage() {
+    System.out.println("randomtextwriter " +
+                       "[-outFormat <output format class>] " + 
+                       "<output>");
+    ToolRunner.printGenericCommandUsage(System.out);
+    return 2;
+  }
+  
+  /**
+   * User counters
+   */
+  static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
+
+  static class RandomTextMapper extends Mapper<Text, Text, Text, Text> {
+    
+    private long numBytesToWrite;
+    private int minWordsInKey;
+    private int wordsInKeyRange;
+    private int minWordsInValue;
+    private int wordsInValueRange;
+    private Random random = new Random();
+    
+    /**
+     * Save the configuration value that we need to write the data.
+     */
+    public void setup(Context context) {
+      Configuration conf = context.getConfiguration();
+      numBytesToWrite = conf.getLong(BYTES_PER_MAP,
+                                    1*1024*1024*1024);
+      minWordsInKey = conf.getInt(MIN_KEY, 5);
+      wordsInKeyRange = (conf.getInt(MAX_KEY, 10) - minWordsInKey);
+      minWordsInValue = conf.getInt(MIN_VALUE, 10);
+      wordsInValueRange = (conf.getInt(MAX_VALUE, 100) - minWordsInValue);
+    }
+    
+    /**
+     * Given an output filename, write a bunch of random records to it.
+     */
+    public void map(Text key, Text value,
+                    Context context) throws IOException,InterruptedException {
+      int itemCount = 0;
+      while (numBytesToWrite > 0) {
+        // Generate the key/value 
+        int noWordsKey = minWordsInKey + 
+          (wordsInKeyRange != 0 ? random.nextInt(wordsInKeyRange) : 0);
+        int noWordsValue = minWordsInValue + 
+          (wordsInValueRange != 0 ? random.nextInt(wordsInValueRange) : 0);
+        Text keyWords = generateSentence(noWordsKey);
+        Text valueWords = generateSentence(noWordsValue);
+        
+        // Write the sentence 
+        context.write(keyWords, valueWords);
+        
+        numBytesToWrite -= (keyWords.getLength() + valueWords.getLength());
+        
+        // Update counters, progress etc.
+        context.getCounter(Counters.BYTES_WRITTEN).increment(
+                  keyWords.getLength() + valueWords.getLength());
+        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
+        if (++itemCount % 200 == 0) {
+          context.setStatus("wrote record " + itemCount + ". " + 
+                             numBytesToWrite + " bytes left.");
+        }
+      }
+      context.setStatus("done with " + itemCount + " records.");
+    }
+    
+    private Text generateSentence(int noWords) {
+      StringBuffer sentence = new StringBuffer();
+      String space = " ";
+      for (int i=0; i < noWords; ++i) {
+        sentence.append(words[random.nextInt(words.length)]);
+        sentence.append(space);
+      }
+      return new Text(sentence.toString());
+    }
+  }
+  
+  /**
+   * This is the main routine for launching a distributed random write job.
+   * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
+   * The reduce doesn't do anything.
+   * 
+   * @throws IOException 
+   */
+  public int run(String[] args) throws Exception {    
+    if (args.length == 0) {
+      return printUsage();    
+    }
+    
+    Configuration conf = getConf();
+    JobClient client = new JobClient(conf);
+    ClusterStatus cluster = client.getClusterStatus();
+    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
+    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
+                                             1*1024*1024*1024);
+    if (numBytesToWritePerMap == 0) {
+      System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
+      return -2;
+    }
+    long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
+    if (numMaps == 0 && totalBytesToWrite > 0) {
+      numMaps = 1;
+      conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
+    }
+    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
+    
+    Job job = new Job(conf);
+    
+    job.setJarByClass(RandomTextWriter.class);
+    job.setJobName("random-text-writer");
+    
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(Text.class);
+    
+    job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
+    job.setMapperClass(RandomTextMapper.class);        
+    
+    Class<? extends OutputFormat> outputFormatClass = 
+      SequenceFileOutputFormat.class;
+    List<String> otherArgs = new ArrayList<String>();
+    for(int i=0; i < args.length; ++i) {
+      try {
+        if ("-outFormat".equals(args[i])) {
+          outputFormatClass = 
+            Class.forName(args[++i]).asSubclass(OutputFormat.class);
+        } else {
+          otherArgs.add(args[i]);
+        }
+      } catch (ArrayIndexOutOfBoundsException except) {
+        System.out.println("ERROR: Required parameter missing from " +
+            args[i-1]);
+        return printUsage(); // exits
+      }
+    }
+
+    job.setOutputFormatClass(outputFormatClass);
+    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
+    
+    System.out.println("Running " + numMaps + " maps.");
+    
+    // reducer NONE
+    job.setNumReduceTasks(0);
+    
+    Date startTime = new Date();
+    System.out.println("Job started: " + startTime);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    Date endTime = new Date();
+    System.out.println("Job ended: " + endTime);
+    System.out.println("The job took " + 
+                       (endTime.getTime() - startTime.getTime()) /1000 + 
+                       " seconds.");
+    
+    return ret;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new RandomTextWriter(), args);
+    System.exit(res);
+  }
+
+  /**
+   * A random list of 100 words from /usr/share/dict/words
+   */
+  private static String[] words = {
+                                   "diurnalness", "Homoiousian",
+                                   "spiranthic", "tetragynian",
+                                   "silverhead", "ungreat",
+                                   "lithograph", "exploiter",
+                                   "physiologian", "by",
+                                   "hellbender", "Filipendula",
+                                   "undeterring", "antiscolic",
+                                   "pentagamist", "hypoid",
+                                   "cacuminal", "sertularian",
+                                   "schoolmasterism", "nonuple",
+                                   "gallybeggar", "phytonic",
+                                   "swearingly", "nebular",
+                                   "Confervales", "thermochemically",
+                                   "characinoid", "cocksuredom",
+                                   "fallacious", "feasibleness",
+                                   "debromination", "playfellowship",
+                                   "tramplike", "testa",
+                                   "participatingly", "unaccessible",
+                                   "bromate", "experientialist",
+                                   "roughcast", "docimastical",
+                                   "choralcelo", "blightbird",
+                                   "peptonate", "sombreroed",
+                                   "unschematized", "antiabolitionist",
+                                   "besagne", "mastication",
+                                   "bromic", "sviatonosite",
+                                   "cattimandoo", "metaphrastical",
+                                   "endotheliomyoma", "hysterolysis",
+                                   "unfulminated", "Hester",
+                                   "oblongly", "blurredness",
+                                   "authorling", "chasmy",
+                                   "Scorpaenidae", "toxihaemia",
+                                   "Dictograph", "Quakerishly",
+                                   "deaf", "timbermonger",
+                                   "strammel", "Thraupidae",
+                                   "seditious", "plerome",
+                                   "Arneb", "eristically",
+                                   "serpentinic", "glaumrie",
+                                   "socioromantic", "apocalypst",
+                                   "tartrous", "Bassaris",
+                                   "angiolymphoma", "horsefly",
+                                   "kenno", "astronomize",
+                                   "euphemious", "arsenide",
+                                   "untongued", "parabolicness",
+                                   "uvanite", "helpless",
+                                   "gemmeous", "stormy",
+                                   "templar", "erythrodextrin",
+                                   "comism", "interfraternal",
+                                   "preparative", "parastas",
+                                   "frontoorbital", "Ophiosaurus",
+                                   "diopside", "serosanguineous",
+                                   "ununiformly", "karyological",
+                                   "collegian", "allotropic",
+                                   "depravity", "amylogenesis",
+                                   "reformatory", "epidymides",
+                                   "pleurotropous", "trillium",
+                                   "dastardliness", "coadvice",
+                                   "embryotic", "benthonic",
+                                   "pomiferous", "figureheadship",
+                                   "Megaluridae", "Harpa",
+                                   "frenal", "commotion",
+                                   "abthainry", "cobeliever",
+                                   "manilla", "spiciferous",
+                                   "nativeness", "obispo",
+                                   "monilioid", "biopsic",
+                                   "valvula", "enterostomy",
+                                   "planosubulate", "pterostigma",
+                                   "lifter", "triradiated",
+                                   "venialness", "tum",
+                                   "archistome", "tautness",
+                                   "unswanlike", "antivenin",
+                                   "Lentibulariaceae", "Triphora",
+                                   "angiopathy", "anta",
+                                   "Dawsonia", "becomma",
+                                   "Yannigan", "winterproof",
+                                   "antalgol", "harr",
+                                   "underogating", "ineunt",
+                                   "cornberry", "flippantness",
+                                   "scyphostoma", "approbation",
+                                   "Ghent", "Macraucheniidae",
+                                   "scabbiness", "unanatomized",
+                                   "photoelasticity", "eurythermal",
+                                   "enation", "prepavement",
+                                   "flushgate", "subsequentially",
+                                   "Edo", "antihero",
+                                   "Isokontae", "unforkedness",
+                                   "porriginous", "daytime",
+                                   "nonexecutive", "trisilicic",
+                                   "morphiomania", "paranephros",
+                                   "botchedly", "impugnation",
+                                   "Dodecatheon", "obolus",
+                                   "unburnt", "provedore",
+                                   "Aktistetae", "superindifference",
+                                   "Alethea", "Joachimite",
+                                   "cyanophilous", "chorograph",
+                                   "brooky", "figured",
+                                   "periclitation", "quintette",
+                                   "hondo", "ornithodelphous",
+                                   "unefficient", "pondside",
+                                   "bogydom", "laurinoxylon",
+                                   "Shiah", "unharmed",
+                                   "cartful", "noncrystallized",
+                                   "abusiveness", "cromlech",
+                                   "japanned", "rizzomed",
+                                   "underskin", "adscendent",
+                                   "allectory", "gelatinousness",
+                                   "volcano", "uncompromisingly",
+                                   "cubit", "idiotize",
+                                   "unfurbelowed", "undinted",
+                                   "magnetooptics", "Savitar",
+                                   "diwata", "ramosopalmate",
+                                   "Pishquow", "tomorn",
+                                   "apopenptic", "Haversian",
+                                   "Hysterocarpus", "ten",
+                                   "outhue", "Bertat",
+                                   "mechanist", "asparaginic",
+                                   "velaric", "tonsure",
+                                   "bubble", "Pyrales",
+                                   "regardful", "glyphography",
+                                   "calabazilla", "shellworker",
+                                   "stradametrical", "havoc",
+                                   "theologicopolitical", "sawdust",
+                                   "diatomaceous", "jajman",
+                                   "temporomastoid", "Serrifera",
+                                   "Ochnaceae", "aspersor",
+                                   "trailmaking", "Bishareen",
+                                   "digitule", "octogynous",
+                                   "epididymitis", "smokefarthings",
+                                   "bacillite", "overcrown",
+                                   "mangonism", "sirrah",
+                                   "undecorated", "psychofugal",
+                                   "bismuthiferous", "rechar",
+                                   "Lemuridae", "frameable",
+                                   "thiodiazole", "Scanic",
+                                   "sportswomanship", "interruptedness",
+                                   "admissory", "osteopaedion",
+                                   "tingly", "tomorrowness",
+                                   "ethnocracy", "trabecular",
+                                   "vitally", "fossilism",
+                                   "adz", "metopon",
+                                   "prefatorial", "expiscate",
+                                   "diathermacy", "chronist",
+                                   "nigh", "generalizable",
+                                   "hysterogen", "aurothiosulphuric",
+                                   "whitlowwort", "downthrust",
+                                   "Protestantize", "monander",
+                                   "Itea", "chronographic",
+                                   "silicize", "Dunlop",
+                                   "eer", "componental",
+                                   "spot", "pamphlet",
+                                   "antineuritic", "paradisean",
+                                   "interruptor", "debellator",
+                                   "overcultured", "Florissant",
+                                   "hyocholic", "pneumatotherapy",
+                                   "tailoress", "rave",
+                                   "unpeople", "Sebastian",
+                                   "thermanesthesia", "Coniferae",
+                                   "swacking", "posterishness",
+                                   "ethmopalatal", "whittle",
+                                   "analgize", "scabbardless",
+                                   "naught", "symbiogenetically",
+                                   "trip", "parodist",
+                                   "columniform", "trunnel",
+                                   "yawler", "goodwill",
+                                   "pseudohalogen", "swangy",
+                                   "cervisial", "mediateness",
+                                   "genii", "imprescribable",
+                                   "pony", "consumptional",
+                                   "carposporangial", "poleax",
+                                   "bestill", "subfebrile",
+                                   "sapphiric", "arrowworm",
+                                   "qualminess", "ultraobscure",
+                                   "thorite", "Fouquieria",
+                                   "Bermudian", "prescriber",
+                                   "elemicin", "warlike",
+                                   "semiangle", "rotular",
+                                   "misthread", "returnability",
+                                   "seraphism", "precostal",
+                                   "quarried", "Babylonism",
+                                   "sangaree", "seelful",
+                                   "placatory", "pachydermous",
+                                   "bozal", "galbulus",
+                                   "spermaphyte", "cumbrousness",
+                                   "pope", "signifier",
+                                   "Endomycetaceae", "shallowish",
+                                   "sequacity", "periarthritis",
+                                   "bathysphere", "pentosuria",
+                                   "Dadaism", "spookdom",
+                                   "Consolamentum", "afterpressure",
+                                   "mutter", "louse",
+                                   "ovoviviparous", "corbel",
+                                   "metastoma", "biventer",
+                                   "Hydrangea", "hogmace",
+                                   "seizing", "nonsuppressed",
+                                   "oratorize", "uncarefully",
+                                   "benzothiofuran", "penult",
+                                   "balanocele", "macropterous",
+                                   "dishpan", "marten",
+                                   "absvolt", "jirble",
+                                   "parmelioid", "airfreighter",
+                                   "acocotl", "archesporial",
+                                   "hypoplastral", "preoral",
+                                   "quailberry", "cinque",
+                                   "terrestrially", "stroking",
+                                   "limpet", "moodishness",
+                                   "canicule", "archididascalian",
+                                   "pompiloid", "overstaid",
+                                   "introducer", "Italical",
+                                   "Christianopaganism", "prescriptible",
+                                   "subofficer", "danseuse",
+                                   "cloy", "saguran",
+                                   "frictionlessly", "deindividualization",
+                                   "Bulanda", "ventricous",
+                                   "subfoliar", "basto",
+                                   "scapuloradial", "suspend",
+                                   "stiffish", "Sphenodontidae",
+                                   "eternal", "verbid",
+                                   "mammonish", "upcushion",
+                                   "barkometer", "concretion",
+                                   "preagitate", "incomprehensible",
+                                   "tristich", "visceral",
+                                   "hemimelus", "patroller",
+                                   "stentorophonic", "pinulus",
+                                   "kerykeion", "brutism",
+                                   "monstership", "merciful",
+                                   "overinstruct", "defensibly",
+                                   "bettermost", "splenauxe",
+                                   "Mormyrus", "unreprimanded",
+                                   "taver", "ell",
+                                   "proacquittal", "infestation",
+                                   "overwoven", "Lincolnlike",
+                                   "chacona", "Tamil",
+                                   "classificational", "lebensraum",
+                                   "reeveland", "intuition",
+                                   "Whilkut", "focaloid",
+                                   "Eleusinian", "micromembrane",
+                                   "byroad", "nonrepetition",
+                                   "bacterioblast", "brag",
+                                   "ribaldrous", "phytoma",
+                                   "counteralliance", "pelvimetry",
+                                   "pelf", "relaster",
+                                   "thermoresistant", "aneurism",
+                                   "molossic", "euphonym",
+                                   "upswell", "ladhood",
+                                   "phallaceous", "inertly",
+                                   "gunshop", "stereotypography",
+                                   "laryngic", "refasten",
+                                   "twinling", "oflete",
+                                   "hepatorrhaphy", "electrotechnics",
+                                   "cockal", "guitarist",
+                                   "topsail", "Cimmerianism",
+                                   "larklike", "Llandovery",
+                                   "pyrocatechol", "immatchable",
+                                   "chooser", "metrocratic",
+                                   "craglike", "quadrennial",
+                                   "nonpoisonous", "undercolored",
+                                   "knob", "ultratense",
+                                   "balladmonger", "slait",
+                                   "sialadenitis", "bucketer",
+                                   "magnificently", "unstipulated",
+                                   "unscourged", "unsupercilious",
+                                   "packsack", "pansophism",
+                                   "soorkee", "percent",
+                                   "subirrigate", "champer",
+                                   "metapolitics", "spherulitic",
+                                   "involatile", "metaphonical",
+                                   "stachyuraceous", "speckedness",
+                                   "bespin", "proboscidiform",
+                                   "gul", "squit",
+                                   "yeelaman", "peristeropode",
+                                   "opacousness", "shibuichi",
+                                   "retinize", "yote",
+                                   "misexposition", "devilwise",
+                                   "pumpkinification", "vinny",
+                                   "bonze", "glossing",
+                                   "decardinalize", "transcortical",
+                                   "serphoid", "deepmost",
+                                   "guanajuatite", "wemless",
+                                   "arval", "lammy",
+                                   "Effie", "Saponaria",
+                                   "tetrahedral", "prolificy",
+                                   "excerpt", "dunkadoo",
+                                   "Spencerism", "insatiately",
+                                   "Gilaki", "oratorship",
+                                   "arduousness", "unbashfulness",
+                                   "Pithecolobium", "unisexuality",
+                                   "veterinarian", "detractive",
+                                   "liquidity", "acidophile",
+                                   "proauction", "sural",
+                                   "totaquina", "Vichyite",
+                                   "uninhabitedness", "allegedly",
+                                   "Gothish", "manny",
+                                   "Inger", "flutist",
+                                   "ticktick", "Ludgatian",
+                                   "homotransplant", "orthopedical",
+                                   "diminutively", "monogoneutic",
+                                   "Kenipsim", "sarcologist",
+                                   "drome", "stronghearted",
+                                   "Fameuse", "Swaziland",
+                                   "alen", "chilblain",
+                                   "beatable", "agglomeratic",
+                                   "constitutor", "tendomucoid",
+                                   "porencephalous", "arteriasis",
+                                   "boser", "tantivy",
+                                   "rede", "lineamental",
+                                   "uncontradictableness", "homeotypical",
+                                   "masa", "folious",
+                                   "dosseret", "neurodegenerative",
+                                   "subtransverse", "Chiasmodontidae",
+                                   "palaeotheriodont", "unstressedly",
+                                   "chalcites", "piquantness",
+                                   "lampyrine", "Aplacentalia",
+                                   "projecting", "elastivity",
+                                   "isopelletierin", "bladderwort",
+                                   "strander", "almud",
+                                   "iniquitously", "theologal",
+                                   "bugre", "chargeably",
+                                   "imperceptivity", "meriquinoidal",
+                                   "mesophyte", "divinator",
+                                   "perfunctory", "counterappellant",
+                                   "synovial", "charioteer",
+                                   "crystallographical", "comprovincial",
+                                   "infrastapedial", "pleasurehood",
+                                   "inventurous", "ultrasystematic",
+                                   "subangulated", "supraoesophageal",
+                                   "Vaishnavism", "transude",
+                                   "chrysochrous", "ungrave",
+                                   "reconciliable", "uninterpleaded",
+                                   "erlking", "wherefrom",
+                                   "aprosopia", "antiadiaphorist",
+                                   "metoxazine", "incalculable",
+                                   "umbellic", "predebit",
+                                   "foursquare", "unimmortal",
+                                   "nonmanufacture", "slangy",
+                                   "predisputant", "familist",
+                                   "preaffiliate", "friarhood",
+                                   "corelysis", "zoonitic",
+                                   "halloo", "paunchy",
+                                   "neuromimesis", "aconitine",
+                                   "hackneyed", "unfeeble",
+                                   "cubby", "autoschediastical",
+                                   "naprapath", "lyrebird",
+                                   "inexistency", "leucophoenicite",
+                                   "ferrogoslarite", "reperuse",
+                                   "uncombable", "tambo",
+                                   "propodiale", "diplomatize",
+                                   "Russifier", "clanned",
+                                   "corona", "michigan",
+                                   "nonutilitarian", "transcorporeal",
+                                   "bought", "Cercosporella",
+                                   "stapedius", "glandularly",
+                                   "pictorially", "weism",
+                                   "disilane", "rainproof",
+                                   "Caphtor", "scrubbed",
+                                   "oinomancy", "pseudoxanthine",
+                                   "nonlustrous", "redesertion",
+                                   "Oryzorictinae", "gala",
+                                   "Mycogone", "reappreciate",
+                                   "cyanoguanidine", "seeingness",
+                                   "breadwinner", "noreast",
+                                   "furacious", "epauliere",
+                                   "omniscribent", "Passiflorales",
+                                   "uninductive", "inductivity",
+                                   "Orbitolina", "Semecarpus",
+                                   "migrainoid", "steprelationship",
+                                   "phlogisticate", "mesymnion",
+                                   "sloped", "edificator",
+                                   "beneficent", "culm",
+                                   "paleornithology", "unurban",
+                                   "throbless", "amplexifoliate",
+                                   "sesquiquintile", "sapience",
+                                   "astucious", "dithery",
+                                   "boor", "ambitus",
+                                   "scotching", "uloid",
+                                   "uncompromisingness", "hoove",
+                                   "waird", "marshiness",
+                                   "Jerusalem", "mericarp",
+                                   "unevoked", "benzoperoxide",
+                                   "outguess", "pyxie",
+                                   "hymnic", "euphemize",
+                                   "mendacity", "erythremia",
+                                   "rosaniline", "unchatteled",
+                                   "lienteria", "Bushongo",
+                                   "dialoguer", "unrepealably",
+                                   "rivethead", "antideflation",
+                                   "vinegarish", "manganosiderite",
+                                   "doubtingness", "ovopyriform",
+                                   "Cephalodiscus", "Muscicapa",
+                                   "Animalivora", "angina",
+                                   "planispheric", "ipomoein",
+                                   "cuproiodargyrite", "sandbox",
+                                   "scrat", "Munnopsidae",
+                                   "shola", "pentafid",
+                                   "overstudiousness", "times",
+                                   "nonprofession", "appetible",
+                                   "valvulotomy", "goladar",
+                                   "uniarticular", "oxyterpene",
+                                   "unlapsing", "omega",
+                                   "trophonema", "seminonflammable",
+                                   "circumzenithal", "starer",
+                                   "depthwise", "liberatress",
+                                   "unleavened", "unrevolting",
+                                   "groundneedle", "topline",
+                                   "wandoo", "umangite",
+                                   "ordinant", "unachievable",
+                                   "oversand", "snare",
+                                   "avengeful", "unexplicit",
+                                   "mustafina", "sonable",
+                                   "rehabilitative", "eulogization",
+                                   "papery", "technopsychology",
+                                   "impressor", "cresylite",
+                                   "entame", "transudatory",
+                                   "scotale", "pachydermatoid",
+                                   "imaginary", "yeat",
+                                   "slipped", "stewardship",
+                                   "adatom", "cockstone",
+                                   "skyshine", "heavenful",
+                                   "comparability", "exprobratory",
+                                   "dermorhynchous", "parquet",
+                                   "cretaceous", "vesperal",
+                                   "raphis", "undangered",
+                                   "Glecoma", "engrain",
+                                   "counteractively", "Zuludom",
+                                   "orchiocatabasis", "Auriculariales",
+                                   "warriorwise", "extraorganismal",
+                                   "overbuilt", "alveolite",
+                                   "tetchy", "terrificness",
+                                   "widdle", "unpremonished",
+                                   "rebilling", "sequestrum",
+                                   "equiconvex", "heliocentricism",
+                                   "catabaptist", "okonite",
+                                   "propheticism", "helminthagogic",
+                                   "calycular", "giantly",
+                                   "wingable", "golem",
+                                   "unprovided", "commandingness",
+                                   "greave", "haply",
+                                   "doina", "depressingly",
+                                   "subdentate", "impairment",
+                                   "decidable", "neurotrophic",
+                                   "unpredict", "bicorporeal",
+                                   "pendulant", "flatman",
+                                   "intrabred", "toplike",
+                                   "Prosobranchiata", "farrantly",
+                                   "toxoplasmosis", "gorilloid",
+                                   "dipsomaniacal", "aquiline",
+                                   "atlantite", "ascitic",
+                                   "perculsive", "prospectiveness",
+                                   "saponaceous", "centrifugalization",
+                                   "dinical", "infravaginal",
+                                   "beadroll", "affaite",
+                                   "Helvidian", "tickleproof",
+                                   "abstractionism", "enhedge",
+                                   "outwealth", "overcontribute",
+                                   "coldfinch", "gymnastic",
+                                   "Pincian", "Munychian",
+                                   "codisjunct", "quad",
+                                   "coracomandibular", "phoenicochroite",
+                                   "amender", "selectivity",
+                                   "putative", "semantician",
+                                   "lophotrichic", "Spatangoidea",
+                                   "saccharogenic", "inferent",
+                                   "Triconodonta", "arrendation",
+                                   "sheepskin", "taurocolla",
+                                   "bunghole", "Machiavel",
+                                   "triakistetrahedral", "dehairer",
+                                   "prezygapophysial", "cylindric",
+                                   "pneumonalgia", "sleigher",
+                                   "emir", "Socraticism",
+                                   "licitness", "massedly",
+                                   "instructiveness", "sturdied",
+                                   "redecrease", "starosta",
+                                   "evictor", "orgiastic",
+                                   "squdge", "meloplasty",
+                                   "Tsonecan", "repealableness",
+                                   "swoony", "myesthesia",
+                                   "molecule", "autobiographist",
+                                   "reciprocation", "refective",
+                                   "unobservantness", "tricae",
+                                   "ungouged", "floatability",
+                                   "Mesua", "fetlocked",
+                                   "chordacentrum", "sedentariness",
+                                   "various", "laubanite",
+                                   "nectopod", "zenick",
+                                   "sequentially", "analgic",
+                                   "biodynamics", "posttraumatic",
+                                   "nummi", "pyroacetic",
+                                   "bot", "redescend",
+                                   "dispermy", "undiffusive",
+                                   "circular", "trillion",
+                                   "Uraniidae", "ploration",
+                                   "discipular", "potentness",
+                                   "sud", "Hu",
+                                   "Eryon", "plugger",
+                                   "subdrainage", "jharal",
+                                   "abscission", "supermarket",
+                                   "countergabion", "glacierist",
+                                   "lithotresis", "minniebush",
+                                   "zanyism", "eucalypteol",
+                                   "sterilely", "unrealize",
+                                   "unpatched", "hypochondriacism",
+                                   "critically", "cheesecutter",
+                                  };
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/41f5cd8a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomWriter.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomWriter.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomWriter.java
new file mode 100644
index 0000000..c9b51c7
--- /dev/null
+++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/RandomWriter.java
@@ -0,0 +1,298 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.mapreduce.examples;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This program uses map/reduce to just run a distributed job where there is
+ * no interaction between the tasks and each task write a large unsorted
+ * random binary sequence file of BytesWritable.
+ * In order for this program to generate data for terasort with 10-byte keys
+ * and 90-byte values, have the following config:
+ * <xmp>
+ * <?xml version="1.0"?>
+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ * <configuration>
+ *   <property>
+ *     <name>mapreduce.randomwriter.minkey</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.maxkey</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.minvalue</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.maxvalue</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>mapreduce.randomwriter.totalbytes</name>
+ *     <value>1099511627776</value>
+ *   </property>
+ * </configuration></xmp>
+ * 
+ * Equivalently, {@link RandomWriter} also supports all the above options
+ * and ones supported by {@link GenericOptionsParser} via the command-line.
+ */
+public class RandomWriter extends Configured implements Tool {
+  public static final String TOTAL_BYTES = "mapreduce.randomwriter.totalbytes";
+  public static final String BYTES_PER_MAP = 
+    "mapreduce.randomwriter.bytespermap";
+  public static final String MAPS_PER_HOST = 
+    "mapreduce.randomwriter.mapsperhost";
+  public static final String MAX_VALUE = "mapreduce.randomwriter.maxvalue";
+  public static final String MIN_VALUE = "mapreduce.randomwriter.minvalue";
+  public static final String MIN_KEY = "mapreduce.randomwriter.minkey";
+  public static final String MAX_KEY = "mapreduce.randomwriter.maxkey";
+  
+  /**
+   * User counters
+   */
+  static enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN }
+  
+  /**
+   * A custom input format that creates virtual inputs of a single string
+   * for each map.
+   */
+  static class RandomInputFormat extends InputFormat<Text, Text> {
+
+    /** 
+     * Generate the requested number of file splits, with the filename
+     * set to the filename of the output file.
+     */
+    public List<InputSplit> getSplits(JobContext job) throws IOException {
+      List<InputSplit> result = new ArrayList<InputSplit>();
+      Path outDir = FileOutputFormat.getOutputPath(job);
+      int numSplits = 
+            job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
+      for(int i=0; i < numSplits; ++i) {
+        result.add(new FileSplit(new Path(outDir, "dummy-split-" + i), 0, 1, 
+                                  (String[])null));
+      }
+      return result;
+    }
+
+    /**
+     * Return a single record (filename, "") where the filename is taken from
+     * the file split.
+     */
+    static class RandomRecordReader extends RecordReader<Text, Text> {
+      Path name;
+      Text key = null;
+      Text value = new Text();
+      public RandomRecordReader(Path p) {
+        name = p;
+      }
+      
+      public void initialize(InputSplit split,
+                             TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    	  
+      }
+      
+      public boolean nextKeyValue() {
+        if (name != null) {
+          key = new Text();
+          key.set(name.getName());
+          name = null;
+          return true;
+        }
+        return false;
+      }
+      
+      public Text getCurrentKey() {
+        return key;
+      }
+      
+      public Text getCurrentValue() {
+        return value;
+      }
+      
+      public void close() {}
+
+      public float getProgress() {
+        return 0.0f;
+      }
+    }
+
+    public RecordReader<Text, Text> createRecordReader(InputSplit split,
+        TaskAttemptContext context) throws IOException, InterruptedException {
+      return new RandomRecordReader(((FileSplit) split).getPath());
+    }
+  }
+
+  static class RandomMapper extends Mapper<WritableComparable, Writable,
+                      BytesWritable, BytesWritable> {
+    
+    private long numBytesToWrite;
+    private int minKeySize;
+    private int keySizeRange;
+    private int minValueSize;
+    private int valueSizeRange;
+    private Random random = new Random();
+    private BytesWritable randomKey = new BytesWritable();
+    private BytesWritable randomValue = new BytesWritable();
+    
+    private void randomizeBytes(byte[] data, int offset, int length) {
+      for(int i=offset + length - 1; i >= offset; --i) {
+        data[i] = (byte) random.nextInt(256);
+      }
+    }
+    
+    /**
+     * Given an output filename, write a bunch of random records to it.
+     */
+    public void map(WritableComparable key, 
+                    Writable value,
+                    Context context) throws IOException,InterruptedException {
+      int itemCount = 0;
+      while (numBytesToWrite > 0) {
+        int keyLength = minKeySize + 
+          (keySizeRange != 0 ? random.nextInt(keySizeRange) : 0);
+        randomKey.setSize(keyLength);
+        randomizeBytes(randomKey.getBytes(), 0, randomKey.getLength());
+        int valueLength = minValueSize +
+          (valueSizeRange != 0 ? random.nextInt(valueSizeRange) : 0);
+        randomValue.setSize(valueLength);
+        randomizeBytes(randomValue.getBytes(), 0, randomValue.getLength());
+        context.write(randomKey, randomValue);
+        numBytesToWrite -= keyLength + valueLength;
+        context.getCounter(Counters.BYTES_WRITTEN).increment(keyLength + valueLength);
+        context.getCounter(Counters.RECORDS_WRITTEN).increment(1);
+        if (++itemCount % 200 == 0) {
+          context.setStatus("wrote record " + itemCount + ". " + 
+                             numBytesToWrite + " bytes left.");
+        }
+      }
+      context.setStatus("done with " + itemCount + " records.");
+    }
+    
+    /**
+     * Save the values out of the configuaration that we need to write
+     * the data.
+     */
+    @Override
+    public void setup(Context context) {
+      Configuration conf = context.getConfiguration();
+      numBytesToWrite = conf.getLong(BYTES_PER_MAP,
+                                    1*1024*1024*1024);
+      minKeySize = conf.getInt(MIN_KEY, 10);
+      keySizeRange = 
+        conf.getInt(MAX_KEY, 1000) - minKeySize;
+      minValueSize = conf.getInt(MIN_VALUE, 0);
+      valueSizeRange = 
+        conf.getInt(MAX_VALUE, 20000) - minValueSize;
+    }
+  }
+  
+  /**
+   * This is the main routine for launching a distributed random write job.
+   * It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
+   * The reduce doesn't do anything.
+   * 
+   * @throws IOException 
+   */
+  public int run(String[] args) throws Exception {    
+    if (args.length == 0) {
+      System.out.println("Usage: writer <out-dir>");
+      ToolRunner.printGenericCommandUsage(System.out);
+      return 2;
+    }
+    
+    Path outDir = new Path(args[0]);
+    Configuration conf = getConf();
+    JobClient client = new JobClient(conf);
+    ClusterStatus cluster = client.getClusterStatus();
+    int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
+    long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
+                                             1*1024*1024*1024);
+    if (numBytesToWritePerMap == 0) {
+      System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
+      return -2;
+    }
+    long totalBytesToWrite = conf.getLong(TOTAL_BYTES, 
+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
+    if (numMaps == 0 && totalBytesToWrite > 0) {
+      numMaps = 1;
+      conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
+    }
+    conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
+
+    Job job = new Job(conf);
+    
+    job.setJarByClass(RandomWriter.class);
+    job.setJobName("random-writer");
+    FileOutputFormat.setOutputPath(job, outDir);
+    job.setOutputKeyClass(BytesWritable.class);
+    job.setOutputValueClass(BytesWritable.class);
+    job.setInputFormatClass(RandomInputFormat.class);
+    job.setMapperClass(RandomMapper.class);        
+    job.setReducerClass(Reducer.class);
+    job.setOutputFormatClass(SequenceFileOutputFormat.class);
+    
+    System.out.println("Running " + numMaps + " maps.");
+    
+    // reducer NONE
+    job.setNumReduceTasks(0);
+    
+    Date startTime = new Date();
+    System.out.println("Job started: " + startTime);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    Date endTime = new Date();
+    System.out.println("Job ended: " + endTime);
+    System.out.println("The job took " + 
+                       (endTime.getTime() - startTime.getTime()) /1000 + 
+                       " seconds.");
+    
+    return ret;
+  }
+  
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new RandomWriter(), args);
+    System.exit(res);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/41f5cd8a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/SecondarySort.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/SecondarySort.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/SecondarySort.java
new file mode 100644
index 0000000..cdae905
--- /dev/null
+++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/SecondarySort.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.mapreduce.examples;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Partitioner;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This is an example Hadoop Map/Reduce application.
+ * It reads the text input files that must contain two integers per a line.
+ * The output is sorted by the first and second number and grouped on the 
+ * first number.
+ *
+ * To run: bin/hadoop jar build/hadoop-examples.jar secondarysort
+ *            <i>in-dir</i> <i>out-dir</i> 
+ */
+public class SecondarySort extends Configured implements Tool {
+ 
+  /**
+   * Define a pair of integers that are writable.
+   * They are serialized in a byte comparable format.
+   */
+  public static class IntPair 
+                      implements WritableComparable<IntPair> {
+    private int first = 0;
+    private int second = 0;
+    
+    /**
+     * Set the left and right values.
+     */
+    public void set(int left, int right) {
+      first = left;
+      second = right;
+    }
+    public int getFirst() {
+      return first;
+    }
+    public int getSecond() {
+      return second;
+    }
+    /**
+     * Read the two integers. 
+     * Encoded as: MIN_VALUE -> 0, 0 -> -MIN_VALUE, MAX_VALUE-> -1
+     */
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      first = in.readInt() + Integer.MIN_VALUE;
+      second = in.readInt() + Integer.MIN_VALUE;
+    }
+    @Override
+    public void write(DataOutput out) throws IOException {
+      out.writeInt(first - Integer.MIN_VALUE);
+      out.writeInt(second - Integer.MIN_VALUE);
+    }
+    @Override
+    public int hashCode() {
+      return first * 157 + second;
+    }
+    @Override
+    public boolean equals(Object right) {
+      if (right instanceof IntPair) {
+        IntPair r = (IntPair) right;
+        return r.first == first && r.second == second;
+      } else {
+        return false;
+      }
+    }
+    /** A Comparator that compares serialized IntPair. */ 
+    public static class Comparator extends WritableComparator {
+      public Comparator() {
+        super(IntPair.class);
+      }
+
+      public int compare(byte[] b1, int s1, int l1,
+                         byte[] b2, int s2, int l2) {
+        return compareBytes(b1, s1, l1, b2, s2, l2);
+      }
+    }
+
+    static {                                        // register this comparator
+      WritableComparator.define(IntPair.class, new Comparator());
+    }
+
+    @Override
+    public int compareTo(IntPair o) {
+      if (first != o.first) {
+        return first < o.first ? -1 : 1;
+      } else if (second != o.second) {
+        return second < o.second ? -1 : 1;
+      } else {
+        return 0;
+      }
+    }
+  }
+  
+  /**
+   * Partition based on the first part of the pair.
+   */
+  public static class FirstPartitioner extends Partitioner<IntPair,IntWritable>{
+    @Override
+    public int getPartition(IntPair key, IntWritable value, 
+                            int numPartitions) {
+      return Math.abs(key.getFirst() * 127) % numPartitions;
+    }
+  }
+
+  /**
+   * Compare only the first part of the pair, so that reduce is called once
+   * for each value of the first part.
+   */
+  public static class FirstGroupingComparator 
+                implements RawComparator<IntPair> {
+    @Override
+    public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+      return WritableComparator.compareBytes(b1, s1, Integer.SIZE/8, 
+                                             b2, s2, Integer.SIZE/8);
+    }
+
+    @Override
+    public int compare(IntPair o1, IntPair o2) {
+      int l = o1.getFirst();
+      int r = o2.getFirst();
+      return l == r ? 0 : (l < r ? -1 : 1);
+    }
+  }
+
+  /**
+   * Read two integers from each line and generate a key, value pair
+   * as ((left, right), right).
+   */
+  public static class MapClass 
+         extends Mapper<LongWritable, Text, IntPair, IntWritable> {
+    
+    private final IntPair key = new IntPair();
+    private final IntWritable value = new IntWritable();
+    
+    @Override
+    public void map(LongWritable inKey, Text inValue, 
+                    Context context) throws IOException, InterruptedException {
+      StringTokenizer itr = new StringTokenizer(inValue.toString());
+      int left = 0;
+      int right = 0;
+      if (itr.hasMoreTokens()) {
+        left = Integer.parseInt(itr.nextToken());
+        if (itr.hasMoreTokens()) {
+          right = Integer.parseInt(itr.nextToken());
+        }
+        key.set(left, right);
+        value.set(right);
+        context.write(key, value);
+      }
+    }
+  }
+  
+  /**
+   * A reducer class that just emits the sum of the input values.
+   */
+  public static class Reduce 
+         extends Reducer<IntPair, IntWritable, Text, IntWritable> {
+    private static final Text SEPARATOR = 
+      new Text("------------------------------------------------");
+    private final Text first = new Text();
+    
+    @Override
+    public void reduce(IntPair key, Iterable<IntWritable> values,
+                       Context context
+                       ) throws IOException, InterruptedException {
+      context.write(SEPARATOR, null);
+      first.set(Integer.toString(key.getFirst()));
+      for(IntWritable value: values) {
+        context.write(first, value);
+      }
+    }
+  }
+  
+  @Override
+  public int run(String[] args) throws Exception {
+    Configuration conf = getConf();
+    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
+    if (otherArgs.length != 2) {
+      System.err.println("Usage: secondarysort <in> <out>");
+      ToolRunner.printGenericCommandUsage(System.out);
+      System.exit(2);
+    }
+    Job job = new Job(conf, "secondary sort");
+    job.setJarByClass(SecondarySort.class);
+    job.setMapperClass(MapClass.class);
+    job.setReducerClass(Reduce.class);
+
+    // group and partition by the first int in the pair
+    job.setPartitionerClass(FirstPartitioner.class);
+    job.setGroupingComparatorClass(FirstGroupingComparator.class);
+
+    // the map output is IntPair, IntWritable
+    job.setMapOutputKeyClass(IntPair.class);
+    job.setMapOutputValueClass(IntWritable.class);
+
+    // the reduce output is Text, IntWritable
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(IntWritable.class);
+    
+    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
+    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
+    return job.waitForCompletion(true) ? 0 : 1;
+  }
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new SecondarySort(), args);
+    System.exit(res);
+  }
+}

http://git-wip-us.apache.org/repos/asf/tez/blob/41f5cd8a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/Sort.java
----------------------------------------------------------------------
diff --git a/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/Sort.java b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/Sort.java
new file mode 100644
index 0000000..13e1bbc
--- /dev/null
+++ b/tez-tests/src/main/java/org/apache/tez/mapreduce/examples/Sort.java
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tez.mapreduce.examples;
+
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.mapreduce.filecache.DistributedCache;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapreduce.*;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.partition.InputSampler;
+import org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * This is the trivial map/reduce program that does absolutely nothing
+ * other than use the framework to fragment and sort the input values.
+ *
+ * To run: bin/hadoop jar build/hadoop-examples.jar sort
+ *            [-r <i>reduces</i>]
+ *            [-inFormat <i>input format class</i>] 
+ *            [-outFormat <i>output format class</i>] 
+ *            [-outKey <i>output key class</i>] 
+ *            [-outValue <i>output value class</i>] 
+ *            [-totalOrder <i>pcnt</i> <i>num samples</i> <i>max splits</i>]
+ *            <i>in-dir</i> <i>out-dir</i> 
+ */
+public class Sort<K,V> extends Configured implements Tool {
+  public static final String REDUCES_PER_HOST = 
+    "mapreduce.sort.reducesperhost";
+  private Job job = null;
+
+  static int printUsage() {
+    System.out.println("sort [-r <reduces>] " +
+                       "[-inFormat <input format class>] " +
+                       "[-outFormat <output format class>] " + 
+                       "[-outKey <output key class>] " +
+                       "[-outValue <output value class>] " +
+                       "[-totalOrder <pcnt> <num samples> <max splits>] " +
+                       "<input> <output>");
+    ToolRunner.printGenericCommandUsage(System.out);
+    return 2;
+  }
+
+  /**
+   * The main driver for sort program.
+   * Invoke this method to submit the map/reduce job.
+   * @throws java.lang.Exception When there is communication problems with the
+   *                     job tracker.
+   */
+  public int run(String[] args) throws Exception {
+
+    Configuration conf = getConf();
+    JobClient client = new JobClient(conf);
+    ClusterStatus cluster = client.getClusterStatus();
+    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
+    String sort_reduces = conf.get(REDUCES_PER_HOST);
+    if (sort_reduces != null) {
+       num_reduces = cluster.getTaskTrackers() * 
+                       Integer.parseInt(sort_reduces);
+    }
+    Class<? extends InputFormat> inputFormatClass = 
+      SequenceFileInputFormat.class;
+    Class<? extends OutputFormat> outputFormatClass = 
+      SequenceFileOutputFormat.class;
+    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
+    Class<? extends Writable> outputValueClass = BytesWritable.class;
+    List<String> otherArgs = new ArrayList<String>();
+    InputSampler.Sampler<K,V> sampler = null;
+    for(int i=0; i < args.length; ++i) {
+      try {
+        if ("-r".equals(args[i])) {
+          num_reduces = Integer.parseInt(args[++i]);
+        } else if ("-inFormat".equals(args[i])) {
+          inputFormatClass = 
+            Class.forName(args[++i]).asSubclass(InputFormat.class);
+        } else if ("-outFormat".equals(args[i])) {
+          outputFormatClass = 
+            Class.forName(args[++i]).asSubclass(OutputFormat.class);
+        } else if ("-outKey".equals(args[i])) {
+          outputKeyClass = 
+            Class.forName(args[++i]).asSubclass(WritableComparable.class);
+        } else if ("-outValue".equals(args[i])) {
+          outputValueClass = 
+            Class.forName(args[++i]).asSubclass(Writable.class);
+        } else if ("-totalOrder".equals(args[i])) {
+          double pcnt = Double.parseDouble(args[++i]);
+          int numSamples = Integer.parseInt(args[++i]);
+          int maxSplits = Integer.parseInt(args[++i]);
+          if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE;
+          sampler =
+            new InputSampler.RandomSampler<K,V>(pcnt, numSamples, maxSplits);
+        } else {
+          otherArgs.add(args[i]);
+        }
+      } catch (NumberFormatException except) {
+        System.out.println("ERROR: Integer expected instead of " + args[i]);
+        return printUsage();
+      } catch (ArrayIndexOutOfBoundsException except) {
+        System.out.println("ERROR: Required parameter missing from " +
+            args[i-1]);
+        return printUsage(); // exits
+      }
+    }
+    // Set user-supplied (possibly default) job configs
+    job = new Job(conf);
+    job.setJobName("sorter");
+    job.setJarByClass(Sort.class);
+
+    job.setMapperClass(Mapper.class);        
+    job.setReducerClass(Reducer.class);
+
+    job.setNumReduceTasks(num_reduces);
+
+    job.setInputFormatClass(inputFormatClass);
+    job.setOutputFormatClass(outputFormatClass);
+
+    job.setOutputKeyClass(outputKeyClass);
+    job.setOutputValueClass(outputValueClass);
+
+    // Make sure there are exactly 2 parameters left.
+    if (otherArgs.size() != 2) {
+      System.out.println("ERROR: Wrong number of parameters: " +
+          otherArgs.size() + " instead of 2.");
+      return printUsage();
+    }
+    FileInputFormat.setInputPaths(job, otherArgs.get(0));
+    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
+    
+    if (sampler != null) {
+      System.out.println("Sampling input to effect total-order sort...");
+      job.setPartitionerClass(TotalOrderPartitioner.class);
+      Path inputDir = FileInputFormat.getInputPaths(job)[0];
+      inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
+      Path partitionFile = new Path(inputDir, "_sortPartitioning");
+      TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
+      InputSampler.<K,V>writePartitionFile(job, sampler);
+      URI partitionUri = new URI(partitionFile.toString() +
+                                 "#" + "_sortPartitioning");
+      DistributedCache.addCacheFile(partitionUri, conf);
+    }
+
+    System.out.println("Running on " +
+        cluster.getTaskTrackers() +
+        " nodes to sort from " + 
+        FileInputFormat.getInputPaths(job)[0] + " into " +
+        FileOutputFormat.getOutputPath(job) +
+        " with " + num_reduces + " reduces.");
+    Date startTime = new Date();
+    System.out.println("Job started: " + startTime);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    Date end_time = new Date();
+    System.out.println("Job ended: " + end_time);
+    System.out.println("The job took " + 
+        (end_time.getTime() - startTime.getTime()) /1000 + " seconds.");
+    return ret;
+  }
+
+
+
+  public static void main(String[] args) throws Exception {
+    int res = ToolRunner.run(new Configuration(), new Sort(), args);
+    System.exit(res);
+  }
+
+  /**
+   * Get the last job that was run using this instance.
+   * @return the results of the last job that was run
+   */
+  public Job getResult() {
+    return job;
+  }
+}


Mime
View raw message