hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sha...@apache.org
Subject svn commit: r788608 [1/2] - in /hadoop/mapreduce/trunk: ./ src/examples/org/apache/hadoop/examples/ src/java/org/apache/hadoop/mapred/lib/aggregate/ src/java/org/apache/hadoop/mapreduce/lib/aggregate/ src/test/ src/test/mapred/org/apache/hadoop/mapred/...
Date Fri, 26 Jun 2009 06:43:34 GMT
Author: sharad
Date: Fri Jun 26 06:43:33 2009
New Revision: 788608

URL: http://svn.apache.org/viewvc?rev=788608&view=rev
Log:
MAPREDUCE-358. Change org.apache.hadoop.examples. AggregateWordCount and org.apache.hadoop.examples.AggregateWordHistogram to use new mapreduce api. Contributed by Amareshwari Sriramadasu.

Added:
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/DoubleValueSum.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMax.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMin.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueSum.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMax.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMin.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UniqValueCount.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UserDefinedValueAggregatorDescriptor.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregator.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorBaseDescriptor.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorCombiner.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorDescriptor.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJobBase.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorMapper.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorReducer.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueHistogram.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/package.html
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapreduce/lib/aggregate/
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapreduce/lib/aggregate/AggregatorTests.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapreduce/lib/aggregate/TestMapReduceAggregates.java
Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
    hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/DoubleValueSum.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMax.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMin.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueSum.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMax.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMin.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UniqValueCount.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UserDefinedValueAggregatorDescriptor.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregator.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorBaseDescriptor.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorDescriptor.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueHistogram.java
    hadoop/mapreduce/trunk/src/test/findbugsExcludeFile.xml
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/mapred/lib/aggregate/AggregatorTests.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Jun 26 06:43:33 2009
@@ -7,6 +7,10 @@
     MAPREDUCE-516. Fix the starvation problem in the Capacity Scheduler 
     when running High RAM Jobs. (Arun Murthy via yhemanth)
 
+    MAPREDUCE-358. Change org.apache.hadoop.examples. AggregateWordCount 
+    and org.apache.hadoop.examples.AggregateWordHistogram to use new 
+    mapreduce api. (Amareshwari Sriramadasu via sharad)
+
   NEW FEATURES
 
     HADOOP-5887. Sqoop should create tables in Hive metastore after importing

Modified: hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java (original)
+++ hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordCount.java Fri Jun 26 06:43:33 2009
@@ -24,18 +24,17 @@
 import java.util.Map.Entry;
 
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
-import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor;
+import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob;
 
 /**
  * This is an example Aggregated Hadoop Map/Reduce application. It reads the
  * text input files, breaks each line into words and counts them. The output is
  * a locally sorted list of words and the count of how often they occurred.
  * 
- * To run: bin/hadoop jar hadoop-*-examples.jar aggregatewordcount <i>in-dir</i>
- * <i>out-dir</i> <i>numOfReducers</i> textinputformat
+ * To run: bin/hadoop jar hadoop-*-examples.jar aggregatewordcount 
+ * <i>in-dir</i> <i>out-dir</i> <i>numOfReducers</i> textinputformat
  * 
  */
 public class AggregateWordCount {
@@ -67,11 +66,13 @@
    *           When there is communication problems with the job tracker.
    */
   @SuppressWarnings("unchecked")
-  public static void main(String[] args) throws IOException {
-    JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
+  public static void main(String[] args) 
+    throws IOException, InterruptedException, ClassNotFoundException  {
+    Job job = ValueAggregatorJob.createValueAggregatorJob(args
         , new Class[] {WordCountPlugInClass.class});
-   
-    JobClient.runJob(conf);
+    job.setJarByClass(AggregateWordCount.class);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    System.exit(ret);
   }
 
 }

Modified: hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java (original)
+++ hadoop/mapreduce/trunk/src/examples/org/apache/hadoop/examples/AggregateWordHistogram.java Fri Jun 26 06:43:33 2009
@@ -23,10 +23,9 @@
 import java.util.Map.Entry;
 
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor;
-import org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor;
+import org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob;
 
 /**
  * This is an example Aggregated Hadoop Map/Reduce application. Computes the
@@ -49,7 +48,8 @@
      * @return a list of the generated pairs.
      */
     @Override
-    public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key, Object val) {
+    public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
+                                          Object val) {
       String words[] = val.toString().split(" |\t");
       ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
       for (int i = 0; i < words.length; i++) {
@@ -71,11 +71,12 @@
    *           When there is communication problems with the job tracker.
    */
   @SuppressWarnings("unchecked")
-  public static void main(String[] args) throws IOException {
-    JobConf conf = ValueAggregatorJob.createValueAggregatorJob(args
+  public static void main(String[] args) 
+    throws IOException, InterruptedException, ClassNotFoundException  {
+    Job job = ValueAggregatorJob.createValueAggregatorJob(args
         , new Class[] {AggregateWordHistogramPlugin.class});
-    
-    JobClient.runJob(conf);
+    job.setJarByClass(AggregateWordCount.class);
+    int ret = job.waitForCompletion(true) ? 0 : 1;
+    System.exit(ret);
   }
-  
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/DoubleValueSum.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/DoubleValueSum.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/DoubleValueSum.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/DoubleValueSum.java Fri Jun 26 06:43:33 2009
@@ -18,78 +18,16 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-
-
 /**
  * This class implements a value aggregator that sums up a sequence of double
  * values.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum} instead 
  */
-public class DoubleValueSum implements ValueAggregator {
-
-  double sum = 0;
-
-  /**
-   * The default constructor
-   * 
-   */
-  public DoubleValueSum() {
-    reset();
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          an object whose string representation represents a double value.
-   * 
-   */
-  public void addNextValue(Object val) {
-    this.sum += Double.parseDouble(val.toString());
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          a double value.
-   * 
-   */
-  public void addNextValue(double val) {
-    this.sum += val;
-  }
-
-  /**
-   * @return the string representation of the aggregated value
-   */
-  public String getReport() {
-    return "" + sum;
-  }
-
-  /**
-   * @return the aggregated value
-   */
-  public double getSum() {
-    return this.sum;
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    sum = 0;
-  }
-
-  /**
-   * @return return an array of one element. The element is a string
-   *         representation of the aggregated value. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList<String> getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>(1);
-    retv.add("" + sum);
-    return retv;
-  }
+@Deprecated
+public class DoubleValueSum 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum
+    implements ValueAggregator<String> {
 
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMax.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMax.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMax.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMax.java Fri Jun 26 06:43:33 2009
@@ -18,81 +18,16 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
 
 /**
  * This class implements a value aggregator that maintain the maximum of 
  * a sequence of long values.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax} instead
  */
-public class LongValueMax implements ValueAggregator {
-
-  long maxVal = Long.MIN_VALUE;
-    
-  /**
-   *  the default constructor
-   *
-   */
-  public LongValueMax() {
-    reset();
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          an object whose string representation represents a long value.
-   * 
-   */
-  public void addNextValue(Object val) {
-    long newVal = Long.parseLong(val.toString());
-    if (this.maxVal < newVal) {
-      this.maxVal = newVal;
-    }
-  }
-    
-  /**
-   * add a value to the aggregator
-   * 
-   * @param newVal
-   *          a long value.
-   * 
-   */
-  public void addNextValue(long newVal) {
-    if (this.maxVal < newVal) {
-      this.maxVal = newVal;
-    };
-  }
-    
-  /**
-   * @return the aggregated value
-   */
-  public long getVal() {
-    return this.maxVal;
-  }
-    
-  /**
-   * @return the string representation of the aggregated value
-   */
-  public String getReport() {
-    return ""+maxVal;
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    maxVal = Long.MIN_VALUE;
-  }
-
-  /**
-   * @return return an array of one element. The element is a string
-   *         representation of the aggregated value. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList<String> getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>(1);;
-    retv.add(""+maxVal);
-    return retv;
-  }
+@Deprecated
+public class LongValueMax 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax
+    implements ValueAggregator<String> {
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMin.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMin.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueMin.java Fri Jun 26 06:43:33 2009
@@ -18,81 +18,15 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-
 /**
  * This class implements a value aggregator that maintain the minimum of 
  * a sequence of long values.
  * 
+ *@deprecated Use 
+ *{@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin} instead
  */
-public class LongValueMin implements ValueAggregator {
-
-  long minVal = Long.MAX_VALUE;
-    
-  /**
-   *  the default constructor
-   *
-   */
-  public LongValueMin() {
-    reset();
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          an object whose string representation represents a long value.
-   * 
-   */
-  public void addNextValue(Object val) {
-    long newVal = Long.parseLong(val.toString());
-    if (this.minVal > newVal) {
-      this.minVal = newVal;
-    }
-  }
-    
-  /**
-   * add a value to the aggregator
-   * 
-   * @param newVal
-   *          a long value.
-   * 
-   */
-  public void addNextValue(long newVal) {
-    if (this.minVal > newVal) {
-      this.minVal = newVal;
-    };
-  }
-    
-  /**
-   * @return the aggregated value
-   */
-  public long getVal() {
-    return this.minVal;
-  }
-    
-  /**
-   * @return the string representation of the aggregated value
-   */
-  public String getReport() {
-    return ""+minVal;
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    minVal = Long.MAX_VALUE;
-  }
-
-  /**
-   * @return return an array of one element. The element is a string
-   *         representation of the aggregated value. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList<String> getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>(1);
-    retv.add(""+minVal);
-    return retv;
-  }
+@Deprecated
+public class LongValueMin 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin 
+    implements ValueAggregator<String> {
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueSum.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueSum.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueSum.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/LongValueSum.java Fri Jun 26 06:43:33 2009
@@ -18,78 +18,17 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-
 /**
  * This class implements a value aggregator that sums up 
  * a sequence of long values.
  * 
+ *@deprecated Use 
+ *{@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum} instead 
  */
-public class LongValueSum implements ValueAggregator {
-
-  long sum = 0;
-    
-  /**
-   *  the default constructor
-   *
-   */
-  public LongValueSum() {
-    reset();
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          an object whose string representation represents a long value.
-   * 
-   */
-  public void addNextValue(Object val) {
-    this.sum += Long.parseLong(val.toString());
-  }
-    
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          a long value.
-   * 
-   */
-  public void addNextValue(long val) {
-    this.sum += val;
-  }
-    
-  /**
-   * @return the aggregated value
-   */
-  public long getSum() {
-    return this.sum;
-  }
-    
-  /**
-   * @return the string representation of the aggregated value
-   */
-  public String getReport() {
-    return ""+sum;
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    sum = 0;
-  }
-
-  /**
-   * @return return an array of one element. The element is a string
-   *         representation of the aggregated value. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList<String> getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>(1);
-    retv.add(""+sum);
-    return retv;
-  }
+@Deprecated
+public class LongValueSum 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum 
+    implements ValueAggregator<String> {
 }
 
 

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMax.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMax.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMax.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMax.java Fri Jun 26 06:43:33 2009
@@ -18,69 +18,15 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-
 /**
  * This class implements a value aggregator that maintain the biggest of 
  * a sequence of strings.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax} instead
  */
-public class StringValueMax implements ValueAggregator {
-
-  String maxVal = null;
-    
-  /**
-   *  the default constructor
-   *
-   */
-  public StringValueMax() {
-    reset();
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          a string.
-   * 
-   */
-  public void addNextValue(Object val) {
-    String newVal = val.toString();
-    if (this.maxVal == null || this.maxVal.compareTo(newVal) < 0) {
-      this.maxVal = newVal;
-    }
-  }
-    
-    
-  /**
-   * @return the aggregated value
-   */
-  public String getVal() {
-    return this.maxVal;
-  }
-    
-  /**
-   * @return the string representation of the aggregated value
-   */
-  public String getReport() {
-    return maxVal;
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    maxVal = null;
-  }
-
-  /**
-   * @return return an array of one element. The element is a string
-   *         representation of the aggregated value. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList<String> getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>(1);
-    retv.add(maxVal);
-    return retv;
-  }
+@Deprecated
+public class StringValueMax 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax 
+    implements ValueAggregator<String> {
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMin.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMin.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/StringValueMin.java Fri Jun 26 06:43:33 2009
@@ -18,69 +18,15 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-
 /**
  * This class implements a value aggregator that maintain the smallest of 
  * a sequence of strings.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin} instead 
  */
-public class StringValueMin implements ValueAggregator {
-
-  String minVal = null;
-    
-  /**
-   *  the default constructor
-   *
-   */
-  public StringValueMin() {
-    reset();
-  }
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          a string.
-   * 
-   */
-  public void addNextValue(Object val) {
-    String newVal = val.toString();
-    if (this.minVal == null || this.minVal.compareTo(newVal) > 0) {
-      this.minVal = newVal;
-    }
-  }
-    
-    
-  /**
-   * @return the aggregated value
-   */
-  public String getVal() {
-    return this.minVal;
-  }
-    
-  /**
-   * @return the string representation of the aggregated value
-   */
-  public String getReport() {
-    return minVal;
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    minVal = null;
-  }
-
-  /**
-   * @return return an array of one element. The element is a string
-   *         representation of the aggregated value. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList<String> getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>(1);
-    retv.add(minVal);
-    return retv;
-  }
+@Deprecated
+public class StringValueMin 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin 
+    implements ValueAggregator<String> {
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UniqValueCount.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UniqValueCount.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UniqValueCount.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UniqValueCount.java Fri Jun 26 06:43:33 2009
@@ -18,29 +18,22 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.Set;
-import java.util.TreeMap;
-
 /**
  * This class implements a value aggregator that dedupes a sequence of objects.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount} instead 
  */
-public class UniqValueCount implements ValueAggregator {
-
-  private TreeMap<Object, Object> uniqItems = null;
-
-  private long numItems = 0;
-  
-  private long maxNumItems = Long.MAX_VALUE;
-
+@Deprecated
+public class UniqValueCount 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount 
+    implements ValueAggregator<Object> {
   /**
    * the default constructor
    * 
    */
   public UniqValueCount() {
-    this(Long.MAX_VALUE);
+    super();
   }
   
   /**
@@ -49,77 +42,6 @@
    *  
    */
   public UniqValueCount(long maxNum) {
-    uniqItems = new TreeMap<Object, Object>();
-    this.numItems = 0;
-    maxNumItems = Long.MAX_VALUE;
-    if (maxNum > 0 ) {
-      this.maxNumItems = maxNum;
-    }
-  }
-
-  /**
-   * Set the limit on the number of unique values
-   * @param n the desired limit on the number of unique values
-   * @return the new limit on the number of unique values
-   */
-  public long setMaxItems(long n) {
-    if (n >= numItems) {
-      this.maxNumItems = n;
-    } else if (this.maxNumItems >= this.numItems) {
-      this.maxNumItems = this.numItems;
-    }
-    return this.maxNumItems;
-  }
-  
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val
-   *          an object.
-   * 
-   */
-  public void addNextValue(Object val) {
-    if (this.numItems <= this.maxNumItems) {
-      uniqItems.put(val.toString(), "1");
-      this.numItems = this.uniqItems.size();
-    }
-  }
-
-  /**
-   * @return return the number of unique objects aggregated
-   */
-  public String getReport() {
-    return "" + uniqItems.size();
-  }
-
-  /**
-   * 
-   * @return the set of the unique objects
-   */
-  public Set getUniqueItems() {
-    return uniqItems.keySet();
-  }
-
-  /**
-   * reset the aggregator
-   */
-  public void reset() {
-    uniqItems = new TreeMap<Object, Object>();
-  }
-
-  /**
-   * @return return an array of the unique objects. The return value is
-   *         expected to be used by the a combiner.
-   */
-  public ArrayList getCombinerOutput() {
-    Object key = null;
-    Iterator iter = uniqItems.keySet().iterator();
-    ArrayList<Object> retv = new ArrayList<Object>();
-
-    while (iter.hasNext()) {
-      key = iter.next();
-      retv.add(key);
-    }
-    return retv;
+    super(maxNum);
   }
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UserDefinedValueAggregatorDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UserDefinedValueAggregatorDescriptor.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UserDefinedValueAggregatorDescriptor.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/UserDefinedValueAggregatorDescriptor.java Fri Jun 26 06:43:33 2009
@@ -18,11 +18,6 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.lang.reflect.Constructor;
-import java.util.ArrayList;
-import java.util.Map.Entry;
-
-import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 
 /**
@@ -31,14 +26,14 @@
  * name of a user defined class that may be dynamically loaded. The other is to
  * deligate inviokations of generateKeyValPairs function to the created object.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor}
+ * instead
  */
-public class UserDefinedValueAggregatorDescriptor implements
-    ValueAggregatorDescriptor {
-  private String className;
-
-  private ValueAggregatorDescriptor theAggregatorDescriptor = null;
-
-  private static final Class[] argArray = new Class[] {};
+@Deprecated
+public class UserDefinedValueAggregatorDescriptor extends org.apache.hadoop.
+    mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor
+    implements ValueAggregatorDescriptor {
 
   /**
    * Create an instance of the given class
@@ -46,24 +41,8 @@
    * @return a dynamically created instance of the given class 
    */
   public static Object createInstance(String className) {
-    Object retv = null;
-    try {
-      ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
-      Class<?> theFilterClass = Class.forName(className, true, classLoader);
-      Constructor meth = theFilterClass.getDeclaredConstructor(argArray);
-      meth.setAccessible(true);
-      retv = meth.newInstance();
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-    return retv;
-  }
-
-  private void createAggregator(JobConf job) {
-    if (theAggregatorDescriptor == null) {
-      theAggregatorDescriptor = (ValueAggregatorDescriptor) createInstance(this.className);
-      theAggregatorDescriptor.configure(job);
-    }
+    return org.apache.hadoop.mapreduce.lib.aggregate.
+      UserDefinedValueAggregatorDescriptor.createInstance(className);
   }
 
   /**
@@ -72,37 +51,8 @@
    * @param job a configure object used for decriptor configuration
    */
   public UserDefinedValueAggregatorDescriptor(String className, JobConf job) {
-    this.className = className;
-    this.createAggregator(job);
-  }
-
-  /**
-   *   Generate a list of aggregation-id/value pairs for the given key/value pairs
-   *   by delegating the invocation to the real object.
-   *   
-   * @param key
-   *          input key
-   * @param val
-   *          input value
-   * @return a list of aggregation id/value pairs. An aggregation id encodes an
-   *         aggregation type which is used to guide the way to aggregate the
-   *         value in the reduce/combiner phrase of an Aggregate based job.
-   */
-  public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
-                                                          Object val) {
-    ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
-    if (this.theAggregatorDescriptor != null) {
-      retv = this.theAggregatorDescriptor.generateKeyValPairs(key, val);
-    }
-    return retv;
-  }
-
-  /**
-   * @return the string representation of this object.
-   */
-  public String toString() {
-    return "UserDefinedValueAggregatorDescriptor with class name:" + "\t"
-      + this.className;
+    super(className, job);
+    ((ValueAggregatorDescriptor)theAggregatorDescriptor).configure(job);
   }
 
   /**

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregator.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregator.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregator.java Fri Jun 26 06:43:33 2009
@@ -18,36 +18,13 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-
 /**
  * This interface defines the minimal protocol for value aggregators.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator} instead
  */
-public interface ValueAggregator {
-
-  /**
-   * add a value to the aggregator
-   * 
-   * @param val the value to be added
-   */
-  public void addNextValue(Object val);
-
-  /**
-   * reset the aggregator
-   *
-   */
-  public void reset();
-
-  /**
-   * @return the string representation of the agregator
-   */
-  public String getReport();
-
-  /**
-   * 
-   * @return an array of values as the outputs of the combiner.
-   */
-  public ArrayList getCombinerOutput();
-
+@Deprecated
+public interface ValueAggregator<E> extends 
+    org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator<E> {
 }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorBaseDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorBaseDescriptor.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorBaseDescriptor.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorBaseDescriptor.java Fri Jun 26 06:43:33 2009
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
 import java.util.Map.Entry;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
@@ -26,54 +25,43 @@
 /** 
  * This class implements the common functionalities of 
  * the subclasses of ValueAggregatorDescriptor class.
+ * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor}
+ * instead
  */
-public class ValueAggregatorBaseDescriptor implements ValueAggregatorDescriptor {
+@Deprecated
+public class ValueAggregatorBaseDescriptor extends org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor 
+    implements ValueAggregatorDescriptor {
 
-  static public final String UNIQ_VALUE_COUNT = "UniqValueCount";
+  static public final String UNIQ_VALUE_COUNT = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.UNIQ_VALUE_COUNT;
 
-  static public final String LONG_VALUE_SUM = "LongValueSum";
+  static public final String LONG_VALUE_SUM = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.LONG_VALUE_SUM;
 
-  static public final String DOUBLE_VALUE_SUM = "DoubleValueSum";
+  static public final String DOUBLE_VALUE_SUM = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.DOUBLE_VALUE_SUM;
 
-  static public final String VALUE_HISTOGRAM = "ValueHistogram";
+  static public final String VALUE_HISTOGRAM = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.VALUE_HISTOGRAM;
   
-  static public final String LONG_VALUE_MAX = "LongValueMax";
+  static public final String LONG_VALUE_MAX = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.LONG_VALUE_MAX;
   
-  static public final String LONG_VALUE_MIN = "LongValueMin";
+  static public final String LONG_VALUE_MIN = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.LONG_VALUE_MIN;
   
-  static public final String STRING_VALUE_MAX = "StringValueMax";
+  static public final String STRING_VALUE_MAX = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.STRING_VALUE_MAX;
   
-  static public final String STRING_VALUE_MIN = "StringValueMin";
-  
-  private static long maxNumItems = Long.MAX_VALUE;
-  
-  public String inputFile = null;
-
-  private static class MyEntry implements Entry<Text, Text> {
-    Text key;
-
-    Text val;
-
-    public Text getKey() {
-      return key;
-    }
+  static public final String STRING_VALUE_MIN = org.apache.hadoop.mapreduce.
+    lib.aggregate.ValueAggregatorBaseDescriptor.STRING_VALUE_MIN;
 
-    public Text getValue() {
-      return val;
-    }
-
-    public Text setValue(Text val) {
-      this.val = val;
-      return val;
-    }
-
-    public MyEntry(Text key, Text val) {
-      this.key = key;
-      this.val = val;
-    }
-  }
-
-  /**
+  private static long maxNumItems = Long.MAX_VALUE; 
+  
+ /**
    * 
    * @param type the aggregation type
    * @param id the aggregation id
@@ -82,8 +70,8 @@
    * the aggregation type.
    */
   public static Entry<Text, Text> generateEntry(String type, String id, Text val) {
-    Text key = new Text(type + TYPE_SEPARATOR + id);
-    return new MyEntry(key, val);
+    return org.apache.hadoop.mapreduce.lib.aggregate.
+      ValueAggregatorBaseDescriptor.generateEntry(type, id, val);
   }
 
   /**
@@ -114,46 +102,12 @@
   }
 
   /**
-   * Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
-   * The first id will be of type LONG_VALUE_SUM, with "record_count" as
-   * its aggregation id. If the input is a file split,
-   * the second id of the same type will be generated too, with the file name 
-   * as its aggregation id. This achieves the behavior of counting the total number
-   * of records in the input data, and the number of records in each input file.
-   * 
-   * @param key
-   *          input key
-   * @param val
-   *          input value
-   * @return a list of aggregation id/value pairs. An aggregation id encodes an
-   *         aggregation type which is used to guide the way to aggregate the
-   *         value in the reduce/combiner phrase of an Aggregate based job.
-   */
-  public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
-                                                          Object val) {
-    ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
-    String countType = LONG_VALUE_SUM;
-    String id = "record_count";
-    Entry<Text, Text> e = generateEntry(countType, id, ONE);
-    if (e != null) {
-      retv.add(e);
-    }
-    if (this.inputFile != null) {
-      e = generateEntry(countType, this.inputFile, ONE);
-      if (e != null) {
-        retv.add(e);
-      }
-    }
-    return retv;
-  }
-
-  /**
    * get the input file name.
    * 
    * @param job a job configuration object
    */
   public void configure(JobConf job) {
-    this.inputFile = job.get("map.input.file");
+    super.configure(job);
     maxNumItems = job.getLong("aggregate.max.num.unique.values",
                               Long.MAX_VALUE);
   }

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorCombiner.java Fri Jun 26 06:43:33 2009
@@ -30,7 +30,11 @@
 
 /**
  * This class implements the generic combiner of Aggregate.
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner}
+ * instead
  */
+@Deprecated
 public class ValueAggregatorCombiner<K1 extends WritableComparable,
                                      V1 extends Writable>
   extends ValueAggregatorJobBase<K1, V1> {

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorDescriptor.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorDescriptor.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorDescriptor.java Fri Jun 26 06:43:33 2009
@@ -18,9 +18,6 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-import java.util.Map.Entry;
-
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 
@@ -35,27 +32,19 @@
  * key/value pair, the mapper will use those objects to create aggregation
  * id/value pairs.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor}
+ * instead 
  */
-public interface ValueAggregatorDescriptor {
-
-  public static final String TYPE_SEPARATOR = ":";
+@Deprecated
+public interface ValueAggregatorDescriptor extends 
+    org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor {
 
-  public static final Text ONE = new Text("1");
+  public static final String TYPE_SEPARATOR = org.apache.hadoop.mapreduce.
+      lib.aggregate.ValueAggregatorDescriptor.TYPE_SEPARATOR;
 
-  /**
-   * Generate a list of aggregation-id/value pairs for the given key/value pair.
-   * This function is usually called by the mapper of an Aggregate based job.
-   * 
-   * @param key
-   *          input key
-   * @param val
-   *          input value
-   * @return a list of aggregation id/value pairs. An aggregation id encodes an
-   *         aggregation type which is used to guide the way to aggregate the
-   *         value in the reduce/combiner phrase of an Aggregate based job.
-   */
-  public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
-                                                          Object val);
+  public static final Text ONE = org.apache.hadoop.mapreduce.
+      lib.aggregate.ValueAggregatorDescriptor.ONE;
 
   /**
    * Configure the object

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java Fri Jun 26 06:43:33 2009
@@ -77,7 +77,10 @@
  * input format (text or sequence file) output directory a file specifying the
  * user plugin class
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob} instead
  */
+@Deprecated
 public class ValueAggregatorJob {
 
   public static JobControl createValueAggregatorJobs(String args[]

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJobBase.java Fri Jun 26 06:43:33 2009
@@ -31,7 +31,11 @@
 /**
  * This abstract class implements some common functionalities of the
  * the generic mapper, reducer and combiner classes of Aggregate.
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase}
+ * instead
  */
+@Deprecated
 public abstract class ValueAggregatorJobBase<K1 extends WritableComparable,
                                              V1 extends Writable>
   implements Mapper<K1, V1, Text, Text>, Reducer<Text, Text, Text, Text> {

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorMapper.java Fri Jun 26 06:43:33 2009
@@ -30,7 +30,11 @@
 
 /**
  * This class implements the generic mapper of Aggregate.
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper}
+ * instead
  */
+@Deprecated
 public class ValueAggregatorMapper<K1 extends WritableComparable,
                                    V1 extends Writable>
   extends ValueAggregatorJobBase<K1, V1> {

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java Fri Jun 26 06:43:33 2009
@@ -30,8 +30,11 @@
 /**
  * This class implements the generic reducer of Aggregate.
  * 
- * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer}
+ * instead
  */
+@Deprecated
 public class ValueAggregatorReducer<K1 extends WritableComparable,
                                     V1 extends Writable>
   extends ValueAggregatorJobBase<K1, V1> {

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueHistogram.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueHistogram.java?rev=788608&r1=788607&r2=788608&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueHistogram.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate/ValueHistogram.java Fri Jun 26 06:43:33 2009
@@ -18,162 +18,15 @@
 
 package org.apache.hadoop.mapred.lib.aggregate;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.TreeMap;
-import java.util.Map.Entry;
-import java.util.Arrays;
-
-
 /**
  * This class implements a value aggregator that computes the 
  * histogram of a sequence of strings.
  * 
+ * @deprecated Use 
+ * {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram} instead
  */
-public class ValueHistogram implements ValueAggregator {
-
-  TreeMap<Object, Object> items = null;
-
-  public ValueHistogram() {
-    items = new TreeMap<Object, Object>();
-  }
-
-  /**
-   * add the given val to the aggregator.
-   * 
-   * @param val the value to be added. It is expected to be a string
-   * in the form of xxxx\tnum, meaning xxxx has num occurrences.
-   */
-  public void addNextValue(Object val) {
-    String valCountStr = val.toString();
-    int pos = valCountStr.lastIndexOf("\t");
-    String valStr = valCountStr;
-    String countStr = "1";
-    if (pos >= 0) {
-      valStr = valCountStr.substring(0, pos);
-      countStr = valCountStr.substring(pos + 1);
-    }
-    
-    Long count = (Long) this.items.get(valStr);
-    long inc = Long.parseLong(countStr);
-
-    if (count == null) {
-      count = inc;
-    } else {
-      count = count.longValue() + inc;
-    }
-    items.put(valStr, count);
-  }
-
-  /**
-   * @return the string representation of this aggregator.
-   * It includes the following basic statistics of the histogram:
-   *    the number of unique values
-   *    the minimum value
-   *    the media value
-   *    the maximum value
-   *    the average value
-   *    the standard deviation
-   */
-  public String getReport() {
-    long[] counts = new long[items.size()];
-
-    StringBuffer sb = new StringBuffer();
-    Iterator iter = items.values().iterator();
-    int i = 0;
-    while (iter.hasNext()) {
-      Long count = (Long) iter.next();
-      counts[i] = count.longValue();
-      i += 1;
-    }
-    Arrays.sort(counts);
-    sb.append(counts.length);
-    i = 0;
-    long acc = 0;
-    while (i < counts.length) {
-      long nextVal = counts[i];
-      int j = i + 1;
-      while (j < counts.length && counts[j] == nextVal) {
-        j++;
-      }
-      acc += nextVal * (j - i);
-      //sbVal.append("\t").append(nextVal).append("\t").append(j - i)
-      //.append("\n");
-      i = j;
-    }
-    double average = 0.0;
-    double sd = 0.0;
-    if (counts.length > 0) {
-      sb.append("\t").append(counts[0]);
-      sb.append("\t").append(counts[counts.length / 2]);
-      sb.append("\t").append(counts[counts.length - 1]);
-
-      average = acc * 1.0 / counts.length;
-      sb.append("\t").append(average);
-
-      i = 0;
-      while (i < counts.length) {
-        double nextDiff = counts[i] - average;
-        sd += nextDiff * nextDiff;
-        i += 1;
-      }
-      sd = Math.sqrt(sd / counts.length);
-
-      sb.append("\t").append(sd);
-
-    }
-    //sb.append("\n").append(sbVal.toString());
-    return sb.toString();
-  }
-
-  /** 
-   * 
-   * @return a string representation of the list of value/frequence pairs of 
-   * the histogram
-   */
-  public String getReportDetails() {
-    StringBuffer sb = new StringBuffer();
-    Iterator iter = items.entrySet().iterator();
-    while (iter.hasNext()) {
-      Entry en = (Entry) iter.next();
-      Object val = en.getKey();
-      Long count = (Long) en.getValue();
-      sb.append("\t").append(val.toString()).append("\t").append(
-                                                                 count.longValue()).append("\n");
-    }
-    return sb.toString();
-  }
-
-  /**
-   *  @return a list value/frequence pairs.
-   *  The return value is expected to be used by the reducer.
-   */
-  public ArrayList getCombinerOutput() {
-    ArrayList<String> retv = new ArrayList<String>();
-    Iterator iter = items.entrySet().iterator();
-
-    while (iter.hasNext()) {
-      Entry en = (Entry) iter.next();
-      Object val = en.getKey();
-      Long count = (Long) en.getValue();
-      retv.add(val.toString() + "\t" + count.longValue());
-    }
-    return retv;
-  }
-
-  /** 
-   * 
-   * @return a TreeMap representation of the histogram
-   */
-  public TreeMap getReportItems() {
-    return items;
-  }
-
-  /** 
-   * reset the aggregator
-   */
-  public void reset() {
-    items = new TreeMap<Object, Object>();
-  }
-
+@Deprecated
+public class ValueHistogram 
+    extends org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram 
+    implements ValueAggregator<String> {
 }

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/DoubleValueSum.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/DoubleValueSum.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/DoubleValueSum.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/DoubleValueSum.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+
+/**
+ * This class implements a value aggregator that sums up a sequence of double
+ * values.
+ * 
+ */
+public class DoubleValueSum implements ValueAggregator<String> {
+
+  double sum = 0;
+
+  /**
+   * The default constructor
+   * 
+   */
+  public DoubleValueSum() {
+    reset();
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          an object whose string representation represents a double value.
+   * 
+   */
+  public void addNextValue(Object val) {
+    this.sum += Double.parseDouble(val.toString());
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          a double value.
+   * 
+   */
+  public void addNextValue(double val) {
+    this.sum += val;
+  }
+
+  /**
+   * @return the string representation of the aggregated value
+   */
+  public String getReport() {
+    return "" + sum;
+  }
+
+  /**
+   * @return the aggregated value
+   */
+  public double getSum() {
+    return this.sum;
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    sum = 0;
+  }
+
+  /**
+   * @return return an array of one element. The element is a string
+   *         representation of the aggregated value. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<String> getCombinerOutput() {
+    ArrayList<String> retv = new ArrayList<String>(1);
+    retv.add("" + sum);
+    return retv;
+  }
+
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMax.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMax.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMax.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMax.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+/**
+ * This class implements a value aggregator that maintain the maximum of 
+ * a sequence of long values.
+ * 
+ */
+public class LongValueMax implements ValueAggregator<String> {
+
+  long maxVal = Long.MIN_VALUE;
+    
+  /**
+   *  the default constructor
+   *
+   */
+  public LongValueMax() {
+    reset();
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          an object whose string representation represents a long value.
+   * 
+   */
+  public void addNextValue(Object val) {
+    long newVal = Long.parseLong(val.toString());
+    if (this.maxVal < newVal) {
+      this.maxVal = newVal;
+    }
+  }
+    
+  /**
+   * add a value to the aggregator
+   * 
+   * @param newVal
+   *          a long value.
+   * 
+   */
+  public void addNextValue(long newVal) {
+    if (this.maxVal < newVal) {
+      this.maxVal = newVal;
+    };
+  }
+    
+  /**
+   * @return the aggregated value
+   */
+  public long getVal() {
+    return this.maxVal;
+  }
+    
+  /**
+   * @return the string representation of the aggregated value
+   */
+  public String getReport() {
+    return ""+maxVal;
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    maxVal = Long.MIN_VALUE;
+  }
+
+  /**
+   * @return return an array of one element. The element is a string
+   *         representation of the aggregated value. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<String> getCombinerOutput() {
+    ArrayList<String> retv = new ArrayList<String>(1);;
+    retv.add("" + maxVal);
+    return retv;
+  }
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMin.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMin.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueMin.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+/**
+ * This class implements a value aggregator that maintain the minimum of 
+ * a sequence of long values.
+ * 
+ */
+public class LongValueMin implements ValueAggregator<String> {
+
+  long minVal = Long.MAX_VALUE;
+    
+  /**
+   *  the default constructor
+   *
+   */
+  public LongValueMin() {
+    reset();
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          an object whose string representation represents a long value.
+   * 
+   */
+  public void addNextValue(Object val) {
+    long newVal = Long.parseLong(val.toString());
+    if (this.minVal > newVal) {
+      this.minVal = newVal;
+    }
+  }
+    
+  /**
+   * add a value to the aggregator
+   * 
+   * @param newVal
+   *          a long value.
+   * 
+   */
+  public void addNextValue(long newVal) {
+    if (this.minVal > newVal) {
+      this.minVal = newVal;
+    };
+  }
+    
+  /**
+   * @return the aggregated value
+   */
+  public long getVal() {
+    return this.minVal;
+  }
+    
+  /**
+   * @return the string representation of the aggregated value
+   */
+  public String getReport() {
+    return ""+minVal;
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    minVal = Long.MAX_VALUE;
+  }
+
+  /**
+   * @return return an array of one element. The element is a string
+   *         representation of the aggregated value. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<String> getCombinerOutput() {
+    ArrayList<String> retv = new ArrayList<String>(1);
+    retv.add(""+minVal);
+    return retv;
+  }
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueSum.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueSum.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueSum.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/LongValueSum.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,95 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+/**
+ * This class implements a value aggregator that sums up 
+ * a sequence of long values.
+ * 
+ */
+public class LongValueSum implements ValueAggregator<String> {
+
+  long sum = 0;
+    
+  /**
+   *  the default constructor
+   *
+   */
+  public LongValueSum() {
+    reset();
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          an object whose string representation represents a long value.
+   * 
+   */
+  public void addNextValue(Object val) {
+    this.sum += Long.parseLong(val.toString());
+  }
+    
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          a long value.
+   * 
+   */
+  public void addNextValue(long val) {
+    this.sum += val;
+  }
+    
+  /**
+   * @return the aggregated value
+   */
+  public long getSum() {
+    return this.sum;
+  }
+    
+  /**
+   * @return the string representation of the aggregated value
+   */
+  public String getReport() {
+    return ""+sum;
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    sum = 0;
+  }
+
+  /**
+   * @return return an array of one element. The element is a string
+   *         representation of the aggregated value. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<String> getCombinerOutput() {
+    ArrayList<String> retv = new ArrayList<String>(1);
+    retv.add(""+sum);
+    return retv;
+  }
+}
+
+

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMax.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMax.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMax.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMax.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+/**
+ * This class implements a value aggregator that maintain the biggest of 
+ * a sequence of strings.
+ * 
+ */
+public class StringValueMax implements ValueAggregator<String> {
+
+  String maxVal = null;
+    
+  /**
+   *  the default constructor
+   *
+   */
+  public StringValueMax() {
+    reset();
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          a string.
+   * 
+   */
+  public void addNextValue(Object val) {
+    String newVal = val.toString();
+    if (this.maxVal == null || this.maxVal.compareTo(newVal) < 0) {
+      this.maxVal = newVal;
+    }
+  }
+    
+    
+  /**
+   * @return the aggregated value
+   */
+  public String getVal() {
+    return this.maxVal;
+  }
+    
+  /**
+   * @return the string representation of the aggregated value
+   */
+  public String getReport() {
+    return maxVal;
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    maxVal = null;
+  }
+
+  /**
+   * @return return an array of one element. The element is a string
+   *         representation of the aggregated value. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<String> getCombinerOutput() {
+    ArrayList<String> retv = new ArrayList<String>(1);
+    retv.add(maxVal);
+    return retv;
+  }
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMin.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMin.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/StringValueMin.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+/**
+ * This class implements a value aggregator that maintain the smallest of 
+ * a sequence of strings.
+ * 
+ */
+public class StringValueMin implements ValueAggregator<String> {
+
+  String minVal = null;
+    
+  /**
+   *  the default constructor
+   *
+   */
+  public StringValueMin() {
+    reset();
+  }
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          a string.
+   * 
+   */
+  public void addNextValue(Object val) {
+    String newVal = val.toString();
+    if (this.minVal == null || this.minVal.compareTo(newVal) > 0) {
+      this.minVal = newVal;
+    }
+  }
+    
+    
+  /**
+   * @return the aggregated value
+   */
+  public String getVal() {
+    return this.minVal;
+  }
+    
+  /**
+   * @return the string representation of the aggregated value
+   */
+  public String getReport() {
+    return minVal;
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    minVal = null;
+  }
+
+  /**
+   * @return return an array of one element. The element is a string
+   *         representation of the aggregated value. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<String> getCombinerOutput() {
+    ArrayList<String> retv = new ArrayList<String>(1);
+    retv.add(minVal);
+    return retv;
+  }
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UniqValueCount.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UniqValueCount.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UniqValueCount.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UniqValueCount.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,125 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeMap;
+
+/**
+ * This class implements a value aggregator that dedupes a sequence of objects.
+ * 
+ */
+public class UniqValueCount implements ValueAggregator<Object> {
+
+  private TreeMap<Object, Object> uniqItems = null;
+
+  private long numItems = 0;
+  
+  private long maxNumItems = Long.MAX_VALUE;
+
+  /**
+   * the default constructor
+   * 
+   */
+  public UniqValueCount() {
+    this(Long.MAX_VALUE);
+  }
+  
+  /**
+   * constructor
+   * @param maxNum the limit in the number of unique values to keep.
+   *  
+   */
+  public UniqValueCount(long maxNum) {
+    uniqItems = new TreeMap<Object, Object>();
+    this.numItems = 0;
+    maxNumItems = Long.MAX_VALUE;
+    if (maxNum > 0 ) {
+      this.maxNumItems = maxNum;
+    }
+  }
+
+  /**
+   * Set the limit on the number of unique values
+   * @param n the desired limit on the number of unique values
+   * @return the new limit on the number of unique values
+   */
+  public long setMaxItems(long n) {
+    if (n >= numItems) {
+      this.maxNumItems = n;
+    } else if (this.maxNumItems >= this.numItems) {
+      this.maxNumItems = this.numItems;
+    }
+    return this.maxNumItems;
+  }
+  
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val
+   *          an object.
+   * 
+   */
+  public void addNextValue(Object val) {
+    if (this.numItems <= this.maxNumItems) {
+      uniqItems.put(val.toString(), "1");
+      this.numItems = this.uniqItems.size();
+    }
+  }
+
+  /**
+   * @return return the number of unique objects aggregated
+   */
+  public String getReport() {
+    return "" + uniqItems.size();
+  }
+
+  /**
+   * 
+   * @return the set of the unique objects
+   */
+  public Set<Object> getUniqueItems() {
+    return uniqItems.keySet();
+  }
+
+  /**
+   * reset the aggregator
+   */
+  public void reset() {
+    uniqItems = new TreeMap<Object, Object>();
+  }
+
+  /**
+   * @return return an array of the unique objects. The return value is
+   *         expected to be used by the a combiner.
+   */
+  public ArrayList<Object> getCombinerOutput() {
+    Object key = null;
+    Iterator<Object> iter = uniqItems.keySet().iterator();
+    ArrayList<Object> retv = new ArrayList<Object>();
+
+    while (iter.hasNext()) {
+      key = iter.next();
+      retv.add(key);
+    }
+    return retv;
+  }
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UserDefinedValueAggregatorDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UserDefinedValueAggregatorDescriptor.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UserDefinedValueAggregatorDescriptor.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/UserDefinedValueAggregatorDescriptor.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,119 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.lang.reflect.Constructor;
+import java.util.ArrayList;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+
+/**
+ * This class implements a wrapper for a user defined value 
+ * aggregator descriptor.
+ * It serves two functions: One is to create an object of 
+ * ValueAggregatorDescriptor from the name of a user defined class
+ * that may be dynamically loaded. The other is to
+ * delegate invocations of generateKeyValPairs function to the created object.
+ * 
+ */
+public class UserDefinedValueAggregatorDescriptor implements
+    ValueAggregatorDescriptor {
+  private String className;
+
+  protected ValueAggregatorDescriptor theAggregatorDescriptor = null;
+
+  private static final Class<?>[] argArray = new Class[] {};
+
+  /**
+   * Create an instance of the given class
+   * @param className the name of the class
+   * @return a dynamically created instance of the given class 
+   */
+  public static Object createInstance(String className) {
+    Object retv = null;
+    try {
+      ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
+      Class<?> theFilterClass = Class.forName(className, true, classLoader);
+      Constructor<?> meth = theFilterClass.getDeclaredConstructor(argArray);
+      meth.setAccessible(true);
+      retv = meth.newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+    return retv;
+  }
+
+  private void createAggregator(Configuration conf) {
+    if (theAggregatorDescriptor == null) {
+      theAggregatorDescriptor = (ValueAggregatorDescriptor)
+                                  createInstance(this.className);
+      theAggregatorDescriptor.configure(conf);
+    }
+  }
+
+  /**
+   * 
+   * @param className the class name of the user defined descriptor class
+   * @param conf a configure object used for decriptor configuration
+   */
+  public UserDefinedValueAggregatorDescriptor(String className, 
+      Configuration conf) {
+    this.className = className;
+    this.createAggregator(conf);
+  }
+
+  /**
+   *   Generate a list of aggregation-id/value pairs for the given 
+   *   key/value pairs by delegating the invocation to the real object.
+   *   
+   * @param key
+   *          input key
+   * @param val
+   *          input value
+   * @return a list of aggregation id/value pairs. An aggregation id encodes an
+   *         aggregation type which is used to guide the way to aggregate the
+   *         value in the reduce/combiner phrase of an Aggregate based job.
+   */
+  public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
+                                                          Object val) {
+    ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
+    if (this.theAggregatorDescriptor != null) {
+      retv = this.theAggregatorDescriptor.generateKeyValPairs(key, val);
+    }
+    return retv;
+  }
+
+  /**
+   * @return the string representation of this object.
+   */
+  public String toString() {
+    return "UserDefinedValueAggregatorDescriptor with class name:" + "\t"
+      + this.className;
+  }
+
+  /**
+   *  Do nothing.
+   */
+  public void configure(Configuration conf) {
+
+  }
+
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregator.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregator.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregator.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+
+/**
+ * This interface defines the minimal protocol for value aggregators.
+ * 
+ */
+public interface ValueAggregator<E> {
+
+  /**
+   * add a value to the aggregator
+   * 
+   * @param val the value to be added
+   */
+  public void addNextValue(Object val);
+
+  /**
+   * reset the aggregator
+   *
+   */
+  public void reset();
+
+  /**
+   * @return the string representation of the agregator
+   */
+  public String getReport();
+
+  /**
+   * 
+   * @return an array of values as the outputs of the combiner.
+   */
+  public ArrayList<E> getCombinerOutput();
+
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorBaseDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorBaseDescriptor.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorBaseDescriptor.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorBaseDescriptor.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.util.ArrayList;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+
+/** 
+ * This class implements the common functionalities of 
+ * the subclasses of ValueAggregatorDescriptor class.
+ */
+public class ValueAggregatorBaseDescriptor 
+    implements ValueAggregatorDescriptor {
+
+  static public final String UNIQ_VALUE_COUNT = "UniqValueCount";
+
+  static public final String LONG_VALUE_SUM = "LongValueSum";
+
+  static public final String DOUBLE_VALUE_SUM = "DoubleValueSum";
+
+  static public final String VALUE_HISTOGRAM = "ValueHistogram";
+  
+  static public final String LONG_VALUE_MAX = "LongValueMax";
+  
+  static public final String LONG_VALUE_MIN = "LongValueMin";
+  
+  static public final String STRING_VALUE_MAX = "StringValueMax";
+  
+  static public final String STRING_VALUE_MIN = "StringValueMin";
+  
+  public String inputFile = null;
+
+  private static class MyEntry implements Entry<Text, Text> {
+    Text key;
+
+    Text val;
+
+    public Text getKey() {
+      return key;
+    }
+
+    public Text getValue() {
+      return val;
+    }
+
+    public Text setValue(Text val) {
+      this.val = val;
+      return val;
+    }
+
+    public MyEntry(Text key, Text val) {
+      this.key = key;
+      this.val = val;
+    }
+  }
+
+  /**
+   * 
+   * @param type the aggregation type
+   * @param id the aggregation id
+   * @param val the val associated with the id to be aggregated
+   * @return an Entry whose key is the aggregation id prefixed with 
+   * the aggregation type.
+   */
+  public static Entry<Text, Text> generateEntry(String type, 
+      String id, Text val) {
+    Text key = new Text(type + TYPE_SEPARATOR + id);
+    return new MyEntry(key, val);
+  }
+
+  /**
+   * 
+   * @param type the aggregation type
+   * @param uniqCount the limit in the number of unique values to keep, 
+   *                  if type is UNIQ_VALUE_COUNT 
+   * @return a value aggregator of the given type.
+   */
+  static public ValueAggregator generateValueAggregator(String type, long uniqCount) {
+    if (type.compareToIgnoreCase(LONG_VALUE_SUM) == 0) {
+      return new LongValueSum();
+    } if (type.compareToIgnoreCase(LONG_VALUE_MAX) == 0) {
+      return new LongValueMax();
+    } else if (type.compareToIgnoreCase(LONG_VALUE_MIN) == 0) {
+      return new LongValueMin();
+    } else if (type.compareToIgnoreCase(STRING_VALUE_MAX) == 0) {
+      return new StringValueMax();
+    } else if (type.compareToIgnoreCase(STRING_VALUE_MIN) == 0) {
+      return new StringValueMin();
+    } else if (type.compareToIgnoreCase(DOUBLE_VALUE_SUM) == 0) {
+      return new DoubleValueSum();
+    } else if (type.compareToIgnoreCase(UNIQ_VALUE_COUNT) == 0) {
+      return new UniqValueCount(uniqCount);
+    } else if (type.compareToIgnoreCase(VALUE_HISTOGRAM) == 0) {
+      return new ValueHistogram();
+    }
+    return null;
+  }
+
+  /**
+   * Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
+   * The first id will be of type LONG_VALUE_SUM, with "record_count" as
+   * its aggregation id. If the input is a file split,
+   * the second id of the same type will be generated too, with the file name 
+   * as its aggregation id. This achieves the behavior of counting the total 
+   * number of records in the input data, and the number of records 
+   * in each input file.
+   * 
+   * @param key
+   *          input key
+   * @param val
+   *          input value
+   * @return a list of aggregation id/value pairs. An aggregation id encodes an
+   *         aggregation type which is used to guide the way to aggregate the
+   *         value in the reduce/combiner phrase of an Aggregate based job.
+   */
+  public ArrayList<Entry<Text, Text>> generateKeyValPairs(Object key,
+                                                          Object val) {
+    ArrayList<Entry<Text, Text>> retv = new ArrayList<Entry<Text, Text>>();
+    String countType = LONG_VALUE_SUM;
+    String id = "record_count";
+    Entry<Text, Text> e = generateEntry(countType, id, ONE);
+    if (e != null) {
+      retv.add(e);
+    }
+    if (this.inputFile != null) {
+      e = generateEntry(countType, this.inputFile, ONE);
+      if (e != null) {
+        retv.add(e);
+      }
+    }
+    return retv;
+  }
+
+  /**
+   * get the input file name.
+   * 
+   * @param conf a configuration object
+   */
+  public void configure(Configuration conf) {
+    this.inputFile = conf.get("map.input.file");
+  }
+}

Added: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorCombiner.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorCombiner.java?rev=788608&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorCombiner.java (added)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorCombiner.java Fri Jun 26 06:43:33 2009
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.lib.aggregate;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.Reducer;
+
+/**
+ * This class implements the generic combiner of Aggregate.
+ */
+public class ValueAggregatorCombiner<K1 extends WritableComparable<?>,
+                                     V1 extends Writable>
+  extends Reducer<Text, Text, Text, Text> {
+
+  /** Combines values for a given key.  
+   * @param key the key is expected to be a Text object, whose prefix indicates
+   * the type of aggregation to aggregate the values. 
+   * @param values the values to combine
+   * @param context to collect combined values
+   */
+  public void reduce(Text key, Iterable<Text> values, Context context) 
+      throws IOException, InterruptedException {
+    String keyStr = key.toString();
+    int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
+    String type = keyStr.substring(0, pos);
+    long uniqCount = context.getConfiguration().
+      getLong("aggregate.max.num.unique.values", Long.MAX_VALUE);
+    ValueAggregator aggregator = ValueAggregatorBaseDescriptor
+      .generateValueAggregator(type, uniqCount);
+    for (Text val : values) {
+      aggregator.addNextValue(val);
+    }
+    Iterator<?> outputs = aggregator.getCombinerOutput().iterator();
+
+    while (outputs.hasNext()) {
+      Object v = outputs.next();
+      if (v instanceof Text) {
+        context.write(key, (Text)v);
+      } else {
+        context.write(key, new Text(v.toString()));
+      }
+    }
+  }
+}



Mime
View raw message