hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r541786 - in /lucene/hadoop/trunk: CHANGES.txt src/examples/org/apache/hadoop/examples/RandomWriter.java
Date Fri, 25 May 2007 21:41:23 GMT
Author: cutting
Date: Fri May 25 14:41:22 2007
New Revision: 541786

URL: http://svn.apache.org/viewvc?view=rev&rev=541786
Log:
HADOOP-1376.  Modify RandomWriter example so that it can generate data for the Terasort benchmark.
 Contributed by Devaraj.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=541786&r1=541785&r2=541786
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri May 25 14:41:22 2007
@@ -69,6 +69,9 @@
  22. HADOOP-1408.  Fix a compiler warning by adding a class to replace
      a generic.  (omalley via cutting)
 
+ 23. HADOOP-1376.  Modify RandomWriter example so that it can generate
+     data for the Terasort benchmark.  (Devaraj Das via cutting)
+
 
 Branch 0.13 (unreleased changes)
 

Modified: lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java?view=diff&rev=541786&r1=541785&r2=541786
==============================================================================
--- lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java (original)
+++ lucene/hadoop/trunk/src/examples/org/apache/hadoop/examples/RandomWriter.java Fri May
25 14:41:22 2007
@@ -34,8 +34,33 @@
  * This program uses map/reduce to just run a distributed job where there is
  * no interaction between the tasks and each task write a large unsorted
  * random binary sequence file of BytesWritable.
- * 
- * @author Owen O'Malley
+ * In order for this program to generate data for terasort with 10-byte keys
+ * and 90-byte values, have the following config:
+ * <xmp>
+ * <?xml version="1.0"?>
+ * <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+ * <configuration>
+ *   <property>
+ *     <name>test.randomwrite.min_key</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.max_key</name>
+ *     <value>10</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.min_value</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.max_value</name>
+ *     <value>90</value>
+ *   </property>
+ *   <property>
+ *     <name>test.randomwrite.total_bytes</name>
+ *     <value>1099511627776</value>
+ *   </property>
+ * </configuration></xmp>
  */
 public class RandomWriter {
   
@@ -220,8 +245,21 @@
     
     JobClient client = new JobClient(job);
     ClusterStatus cluster = client.getClusterStatus();
-    int numMaps = cluster.getTaskTrackers() * 
-      job.getInt("test.randomwriter.maps_per_host", 10);
+    int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10);
+    long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map",
+                                             1*1024*1024*1024);
+    if (numBytesToWritePerMap == 0) {
+      System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0");
+      System.exit(-1);
+    }
+    long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", 
+         numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
+    int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
+    if (numMaps == 0 && totalBytesToWrite > 0) {
+      numMaps = 1;
+      job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite);
+    }
+    
     job.setNumMapTasks(numMaps);
     System.out.println("Running " + numMaps + " maps.");
     job.setNumReduceTasks(1);



Mime
View raw message