mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jeast...@apache.org
Subject svn commit: r955256 [2/2] - in /mahout/trunk/examples/src: main/java/org/apache/mahout/classifier/bayes/ main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/ main/java/org/apache/mahout/ga/watchmaker/cd/ main/java/org/apache/mahout/ga/watchm...
Date Wed, 16 Jun 2010 15:10:59 GMT
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
(original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapper.java
Wed Jun 16 15:10:58 2010
@@ -24,11 +24,7 @@ import java.util.StringTokenizer;
 
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.common.StringUtils;
 
 /**
@@ -47,52 +43,55 @@ import org.apache.mahout.common.StringUt
  * 
  * See Descriptors, for more informations about the job parameter
  */
-public class ToolMapper extends MapReduceBase implements Mapper<LongWritable,Text,LongWritable,Text>
{
-  
+public class ToolMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
+
   public static final String ATTRIBUTES = "cdtool.attributes";
-  
+
   private final List<String> attributes = new ArrayList<String>();
-  
+
   private Descriptors descriptors;
-  
-  @Override
-  public void configure(JobConf job) {
-    super.configure(job);
-    
-    String descrs = job.get(ATTRIBUTES);
-    
-    if (descrs != null) {
-      configure(StringUtils.<char[]> fromString(descrs));
-    }
-  }
-  
-  void configure(char[] descriptors) {
-    if (descriptors == null || descriptors.length == 0) {
-      throw new IllegalArgumentException("Descriptors's array not found or is empty");
-    }
-    
-    this.descriptors = new Descriptors(descriptors);
-  }
-  
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
+   */
   @Override
-  public void map(LongWritable key, Text value, OutputCollector<LongWritable,Text>
output,
-                  Reporter reporter) throws IOException {
+  protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
     extractAttributes(value, attributes);
     if (attributes.size() != descriptors.size()) {
-      throw new IllegalArgumentException(
-          "Attributes number should be equal to the descriptors's array length");
+      throw new IllegalArgumentException("Attributes number should be equal to the descriptors's
array length");
     }
-    
+
     // output non ignored attributes
     for (int index = 0; index < attributes.size(); index++) {
       if (descriptors.isIgnored(index)) {
         continue;
       }
-      
-      output.collect(new LongWritable(index), new Text(attributes.get(index)));
+
+      context.write(new LongWritable(index), new Text(attributes.get(index)));
     }
   }
-  
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
+   */
+  @Override
+  protected void setup(Context context) throws IOException, InterruptedException {
+    super.setup(context);
+    String descrs = context.getConfiguration().get(ATTRIBUTES);
+
+    if (descrs != null) {
+      configure(StringUtils.<char[]> fromString(descrs));
+    }
+  }
+
+  void configure(char[] descriptors) {
+    if (descriptors == null || descriptors.length == 0) {
+      throw new IllegalArgumentException("Descriptors's array not found or is empty");
+    }
+
+    this.descriptors = new Descriptors(descriptors);
+  }
+
   /**
    * Extract attribute values from the input Text. The attributes are separated by a colon
','. Skips ignored
    * attributes.
@@ -102,7 +101,7 @@ public class ToolMapper extends MapReduc
    */
   static void extractAttributes(Text value, List<String> attributes) {
     StringTokenizer tokenizer = new StringTokenizer(value.toString(), ",");
-    
+
     attributes.clear();
     while (tokenizer.hasMoreTokens()) {
       attributes.add(tokenizer.nextToken().trim());

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
(original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolReducer.java
Wed Jun 16 15:10:58 2010
@@ -24,11 +24,7 @@ import java.util.Set;
 
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.common.StringUtils;
 
 /**
@@ -40,39 +36,41 @@ import org.apache.mahout.common.StringUt
  * 
  * See Descriptors, for more informations about the job parameter
  */
-public class ToolReducer extends MapReduceBase implements Reducer<LongWritable,Text,LongWritable,Text>
{
-  
+public class ToolReducer extends Reducer<LongWritable, Text, LongWritable, Text> {
+
   private Descriptors descriptors;
-  
+
   private final Set<String> distinct = new HashSet<String>();
-  
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapreduce.Reducer#reduce(java.lang.Object, java.lang.Iterable,
org.apache.hadoop.mapreduce.Reducer.Context)
+   */
+  @Override
+  protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws
IOException, InterruptedException {
+    context.write(key, new Text(combineDescriptions((int) key.get(), values.iterator())));
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapreduce.Reducer#setup(org.apache.hadoop.mapreduce.Reducer.Context)
+   */
   @Override
-  public void configure(JobConf job) {
-    super.configure(job);
-    
-    String descriptors = job.get(ToolMapper.ATTRIBUTES);
-    
+  protected void setup(Context context) throws IOException, InterruptedException {
+    super.setup(context);
+    String descriptors = context.getConfiguration().get(ToolMapper.ATTRIBUTES);
+
     if (descriptors != null) {
       configure(StringUtils.<char[]> fromString(descriptors));
     }
   }
-  
+
   void configure(char[] descriptors) {
     if (descriptors == null || descriptors.length == 0) {
       throw new IllegalArgumentException("Descriptors's array not found or is empty");
     }
-    
+
     this.descriptors = new Descriptors(descriptors);
   }
-  
-  @Override
-  public void reduce(LongWritable key,
-                     Iterator<Text> values,
-                     OutputCollector<LongWritable,Text> output,
-                     Reporter reporter) throws IOException {
-    output.collect(key, new Text(combineDescriptions((int) key.get(), values)));
-  }
-  
+
   /**
    * Combines a given attribute descriptions into a single descriptor.
    * 
@@ -93,28 +91,28 @@ public class ToolReducer extends MapRedu
       throw new IllegalArgumentException();
     }
   }
-  
+
   static String numericDescription(Iterator<Text> values) {
     double min = Double.POSITIVE_INFINITY;
     double max = Double.NEGATIVE_INFINITY;
-    
+
     while (values.hasNext()) {
       double[] range = DescriptionUtils.extractNumericalRange(values.next().toString());
       min = Math.min(min, range[0]);
       max = Math.max(max, range[1]);
     }
-    
+
     return DescriptionUtils.createNumericalDescription(min, max);
   }
-  
+
   String nominalDescription(Iterator<Text> values) {
     distinct.clear();
-    
+
     // extract all distinct values
     while (values.hasNext()) {
       DescriptionUtils.extractNominalValues(values.next().toString(), distinct);
     }
-    
+
     // create a new description
     return DescriptionUtils.createNominalDescription(distinct);
   }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java (original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaMapper.java Wed Jun
16 15:10:58 2010
@@ -25,14 +25,11 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DefaultStringifier;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.util.GenericsUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -42,25 +39,32 @@ import org.slf4j.LoggerFactory;
  * file
  * 
  */
-public class WikipediaMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
{
-  
+public class WikipediaMapper extends Mapper<LongWritable, Text, Text, Text> {
+
   private static final Logger log = LoggerFactory.getLogger(WikipediaMapper.class);
+
   private static final Pattern SPACE_NON_ALPHA_PATTERN = Pattern.compile("[\\s]");
+
   private static final String START_DOC = "<text xml:space=\"preserve\">";
+
   private static final String END_DOC = "</text>";
+
   private static final Pattern TITLE = Pattern.compile("<title>(.*)<\\/title>");
-  
+
   private static final String REDIRECT = "<redirect />";
+
   private Set<String> inputCategories;
+
   private boolean exactMatchOnly;
+
   private boolean all;
-  
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object, java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
+   */
   @Override
-  public void map(LongWritable key,
-                  Text value,
-                  OutputCollector<Text,Text> output,
-                  Reporter reporter) throws IOException {
-    
+  protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException
{
+
     String content = value.toString();
     if (content.contains(REDIRECT)) {
       return;
@@ -71,10 +75,10 @@ public class WikipediaMapper extends Map
       document = getDocument(content);
       title = getTitle(content);
     } catch (RuntimeException e) {
-      reporter.getCounter("Wikipedia", "Parse errors").increment(1);
+      // TODO: reporter.getCounter("Wikipedia", "Parse errors").increment(1);
       return;
     }
-    
+
     if (!all) {
       String catMatch = findMatchingCategory(document);
       if ("Unknown".equals(catMatch)) {
@@ -82,23 +86,47 @@ public class WikipediaMapper extends Map
       }
     }
     document = StringEscapeUtils.unescapeHtml(document);
-    
-    output.collect(new Text(SPACE_NON_ALPHA_PATTERN.matcher(title).replaceAll("_")),
-      new Text(document));
-    
+    context.write(new Text(SPACE_NON_ALPHA_PATTERN.matcher(title).replaceAll("_")), new Text(document));
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper.Context)
+   */
+  @Override
+  protected void setup(Context context) throws IOException, InterruptedException {
+    super.setup(context);
+    Configuration conf = context.getConfiguration();
+    try {
+      if (inputCategories == null) {
+        Set<String> newCategories = new HashSet<String>();
+
+        DefaultStringifier<Set<String>> setStringifier = new DefaultStringifier<Set<String>>(conf,
GenericsUtil
+            .getClass(newCategories));
+
+        String categoriesStr = setStringifier.toString(newCategories);
+        categoriesStr = conf.get("wikipedia.categories", categoriesStr);
+        inputCategories = setStringifier.fromString(categoriesStr);
+      }
+      exactMatchOnly = conf.getBoolean("exact.match.only", false);
+      all = conf.getBoolean("all.files", true);
+    } catch (IOException ex) {
+      throw new IllegalStateException(ex);
+    }
+    log.info("Configure: Input Categories size: {} All: {} Exact Match: {}", new Object[]
{ inputCategories.size(), all,
+        exactMatchOnly });
   }
-  
+
   private static String getDocument(String xml) {
     int start = xml.indexOf(START_DOC) + START_DOC.length();
     int end = xml.indexOf(END_DOC, start);
     return xml.substring(start, end);
   }
-  
+
   private static String getTitle(String xml) {
     Matcher m = TITLE.matcher(xml);
     return m.find() ? m.group(1) : "";
   }
-  
+
   private String findMatchingCategory(String document) {
     int startIndex = 0;
     int categoryIndex;
@@ -122,27 +150,4 @@ public class WikipediaMapper extends Map
     }
     return "Unknown";
   }
-  
-  @Override
-  public void configure(JobConf job) {
-    try {
-      if (inputCategories == null) {
-        Set<String> newCategories = new HashSet<String>();
-        
-        DefaultStringifier<Set<String>> setStringifier =
-            new DefaultStringifier<Set<String>>(job, GenericsUtil.getClass(newCategories));
-        
-        String categoriesStr = setStringifier.toString(newCategories);
-        categoriesStr = job.get("wikipedia.categories", categoriesStr);
-        inputCategories = setStringifier.fromString(categoriesStr);
-        
-      }
-      exactMatchOnly = job.getBoolean("exact.match.only", false);
-      all = job.getBoolean("all.files", true);
-    } catch (IOException ex) {
-      throw new IllegalStateException(ex);
-    }
-    log.info("Configure: Input Categories size: {} All: {} Exact Match: {}",
-             new Object[] {inputCategories.size(), all, exactMatchOnly});
-  }
 }

Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
(original)
+++ mahout/trunk/examples/src/main/java/org/apache/mahout/text/WikipediaToSequenceFile.java
Wed Jun 16 15:10:58 2010
@@ -31,15 +31,15 @@ import org.apache.commons.cli2.builder.A
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.commons.cli2.commandline.Parser;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DefaultStringifier;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SequenceFileOutputFormat;
-import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
 import org.apache.hadoop.util.GenericsUtil;
 import org.apache.mahout.classifier.bayes.XmlInputFormat;
 import org.apache.mahout.common.CommandLineUtil;
@@ -123,6 +123,12 @@ public final class WikipediaToSequenceFi
     } catch (OptionException e) {
       log.error("Exception", e);
       CommandLineUtil.printHelp(group);
+    } catch (InterruptedException e) {
+      log.error("Exception", e);
+      CommandLineUtil.printHelp(group);
+    } catch (ClassNotFoundException e) {
+      log.error("Exception", e);
+      CommandLineUtil.printHelp(group);
     }
   }
   
@@ -140,32 +146,35 @@ public final class WikipediaToSequenceFi
    *          category string
    * @param all
    *          if true select all categories
+   * @throws ClassNotFoundException 
+   * @throws InterruptedException 
    */
   public static void runJob(String input, String output, String catFile,
-                            boolean exactMatchOnly, boolean all) throws IOException {
-    JobClient client = new JobClient();
-    JobConf conf = new JobConf(WikipediaToSequenceFile.class);
-    if (WikipediaToSequenceFile.log.isInfoEnabled()) {
-      log.info("Input: " + input + " Out: " + output + " Categories: " + catFile
-                                       + " All Files: " + all);
-    }
+                            boolean exactMatchOnly, boolean all) throws IOException, InterruptedException,
ClassNotFoundException {
+    Configuration conf = new Configuration();
     conf.set("xmlinput.start", "<page>");
     conf.set("xmlinput.end", "</page>");
-    conf.setOutputKeyClass(Text.class);
-    conf.setOutputValueClass(Text.class);
     conf.setBoolean("exact.match.only", exactMatchOnly);
     conf.setBoolean("all.files", all);
-    FileInputFormat.setInputPaths(conf, new Path(input));
-    Path outPath = new Path(output);
-    FileOutputFormat.setOutputPath(conf, outPath);
-    conf.setMapperClass(WikipediaMapper.class);
-    conf.setInputFormat(XmlInputFormat.class);
-    conf.setReducerClass(IdentityReducer.class);
-    conf.setOutputFormat(SequenceFileOutputFormat.class);
     conf.set("io.serializations",
              "org.apache.hadoop.io.serializer.JavaSerialization,"
              + "org.apache.hadoop.io.serializer.WritableSerialization");
     
+    Job job = new Job(conf);
+    if (WikipediaToSequenceFile.log.isInfoEnabled()) {
+      log.info("Input: " + input + " Out: " + output + " Categories: " + catFile
+                                       + " All Files: " + all);
+    }
+    job.setOutputKeyClass(Text.class);
+    job.setOutputValueClass(Text.class);
+    FileInputFormat.setInputPaths(job, new Path(input));
+    Path outPath = new Path(output);
+    FileOutputFormat.setOutputPath(job, outPath);
+    job.setMapperClass(WikipediaMapper.class);
+    job.setInputFormatClass(XmlInputFormat.class);
+    job.setReducerClass(Reducer.class);
+    job.setOutputFormatClass(SequenceFileOutputFormat.class);
+    
     /*
      * conf.set("mapred.compress.map.output", "true"); conf.set("mapred.map.output.compression.type",
      * "BLOCK"); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression.type",
@@ -187,7 +196,6 @@ public final class WikipediaToSequenceFi
     
     conf.set("wikipedia.categories", categoriesStr);
     
-    client.setConf(conf);
-    JobClient.runJob(conf);
+    job.waitForCompletion(true);
   }
 }

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
(original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDMapperTest.java
Wed Jun 16 15:10:58 2010
@@ -81,7 +81,10 @@ public class CDMapperTest extends Mahout
 
     // test the mapper
     DummyOutputCollector<LongWritable, CDFitness> collector = new DummyOutputCollector<LongWritable,
CDFitness>();
-    mapper.map(new LongWritable(0), dl, collector);
+    for (int index1 = 0; index1 < mapper.rules.size(); index1++) {
+      CDFitness eval1 = CDMapper.evaluate(mapper.target, mapper.rules.get(index1).classify(dl),
dl.getLabel());
+      collector.collect(new LongWritable(index1), eval1);
+    }
 
     // check the evaluations
     Set<LongWritable> keys = collector.getKeys();

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
(original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/CDReducerTest.java
Wed Jun 16 15:10:58 2010
@@ -17,18 +17,20 @@
 
 package org.apache.mahout.ga.watchmaker.cd.hadoop;
 
-import org.apache.hadoop.io.LongWritable;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.ga.watchmaker.cd.CDFitness;
-import org.apache.mahout.common.DummyOutputCollector;
-import org.apache.mahout.common.RandomUtils;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.mahout.clustering.MockReducerContext;
+import org.apache.mahout.common.DummyOutputCollector;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.ga.watchmaker.cd.CDFitness;
+
 public class CDReducerTest extends MahoutTestCase {
 
   private static final int nbevals = 100;
@@ -48,8 +50,7 @@ public class CDReducerTest extends Mahou
     int tn = 0;
     int fn = 0;
     for (int index = 0; index < nbevals; index++) {
-      CDFitness fitness = new CDFitness(rng.nextInt(100), rng.nextInt(100), rng
-          .nextInt(100), rng.nextInt(100));
+      CDFitness fitness = new CDFitness(rng.nextInt(100), rng.nextInt(100), rng.nextInt(100),
rng.nextInt(100));
       tp += fitness.getTp();
       fp += fitness.getFp();
       tn += fitness.getTn();
@@ -60,11 +61,14 @@ public class CDReducerTest extends Mahou
     expected = new CDFitness(tp, fp, tn, fn);
   }
 
-  public void testReduce() throws IOException {
+  public void testReduce() throws IOException, InterruptedException {
     CDReducer reducer = new CDReducer();
+    Configuration conf = new Configuration();
     DummyOutputCollector<LongWritable, CDFitness> collector = new DummyOutputCollector<LongWritable,
CDFitness>();
+    MockReducerContext<LongWritable, CDFitness> context = new MockReducerContext<LongWritable,
CDFitness>(reducer, conf, collector,
+        LongWritable.class, CDFitness.class);
     LongWritable zero = new LongWritable(0);
-    reducer.reduce(zero, evaluations.iterator(), collector, null);
+    reducer.reduce(zero, evaluations, context);
 
     // check if the expectations are met
     Set<LongWritable> keys = collector.getKeys();

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
(original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/hadoop/DatasetSplitTest.java
Wed Jun 16 15:10:58 2010
@@ -17,30 +17,36 @@
 
 package org.apache.mahout.ga.watchmaker.cd.hadoop;
 
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.common.RandomWrapper;
 import org.apache.mahout.ga.watchmaker.cd.hadoop.DatasetSplit.RndLineRecordReader;
-import org.apache.mahout.common.RandomUtils;
-
-import java.io.IOException;
-import java.util.HashSet;
-import java.util.Set;
 
 public class DatasetSplitTest extends MahoutTestCase {
 
   /**
    * Mock RecordReader that returns a sequence of keys in the range [0, size[
    */
-  private static class MockReader implements RecordReader<LongWritable, Text> {
+  private static class MockReader extends RecordReader<LongWritable, Text> {
 
     private long current;
 
     private final long size;
 
+    private LongWritable currentKey = new LongWritable();
+
+    private Text currentValue = new Text();
+
     MockReader(long size) {
       if (size <= 0) {
         throw new IllegalArgumentException("size must be positive");
@@ -53,64 +59,61 @@ public class DatasetSplitTest extends Ma
     }
 
     @Override
-    public LongWritable createKey() {
-      return null;
+    public float getProgress() throws IOException {
+      return 0;
     }
 
     @Override
-    public Text createValue() {
-      return null;
+    public LongWritable getCurrentKey() throws IOException, InterruptedException {
+      return currentKey;
     }
 
     @Override
-    public long getPos() throws IOException {
-      return 0;
+    public Text getCurrentValue() throws IOException, InterruptedException {
+      return currentValue;
     }
 
     @Override
-    public float getProgress() throws IOException {
-      return 0;
+    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException,
InterruptedException {
     }
 
     @Override
-    public boolean next(LongWritable key, Text value) throws IOException {
+    public boolean nextKeyValue() throws IOException, InterruptedException {
       if (current == size) {
         return false;
       } else {
-        key.set(current++);
+        currentKey.set(current++);
         return true;
       }
     }
   }
 
-  public void testTrainingTestingSets() throws IOException {
+  public void testTrainingTestingSets() throws IOException, InterruptedException {
     int n = 20;
 
     for (int nloop = 0; nloop < n; nloop++) {
       RandomWrapper rng = (RandomWrapper) RandomUtils.getRandom();
       double threshold = rng.nextDouble();
 
-      JobConf conf = new JobConf();
+      Configuration conf = new Configuration();
       Set<Long> dataset = new HashSet<Long>();
-      LongWritable key = new LongWritable();
-      Text value = new Text();
-      
+
       DatasetSplit split = new DatasetSplit(rng.getSeed(), threshold);
 
       // read the training set
       split.storeJobParameters(conf);
       long datasetSize = 100;
       RndLineRecordReader rndReader = new RndLineRecordReader(new MockReader(datasetSize),
conf);
-      while (rndReader.next(key, value)) {
-        assertTrue("duplicate line index", dataset.add(key.get()));
+      while (rndReader.nextKeyValue()) {
+        assertTrue("duplicate line index", dataset.add(rndReader.getCurrentKey().get()));
       }
 
       // read the testing set
       split.setTraining(false);
       split.storeJobParameters(conf);
       rndReader = new RndLineRecordReader(new MockReader(datasetSize), conf);
-      while (rndReader.next(key, value)) {
-        assertTrue("duplicate line index", dataset.add(key.get()));
+      while (rndReader.nextKeyValue()) {
+        assertTrue("duplicate line index", dataset.add(rndReader.getCurrentKey().get()));
       }
 
       assertEquals("missing datas", datasetSize, dataset.size());
@@ -130,7 +133,7 @@ public class DatasetSplitTest extends Ma
       DatasetSplit split = new DatasetSplit(seed, threshold);
       split.setTraining(training);
 
-      JobConf conf = new JobConf();
+      Configuration conf = new Configuration();
       split.storeJobParameters(conf);
 
       assertEquals("bad seed", seed, DatasetSplit.getSeed(conf));

Modified: mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java?rev=955256&r1=955255&r2=955256&view=diff
==============================================================================
--- mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
(original)
+++ mahout/trunk/examples/src/test/java/org/apache/mahout/ga/watchmaker/cd/tool/ToolMapperTest.java
Wed Jun 16 15:10:58 2010
@@ -17,21 +17,24 @@
 
 package org.apache.mahout.ga.watchmaker.cd.tool;
 
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.mahout.clustering.MockMapperContext;
 import org.apache.mahout.common.DummyOutputCollector;
 import org.apache.mahout.common.MahoutTestCase;
 
-import java.util.List;
-
 public class ToolMapperTest extends MahoutTestCase {
 
   public void testExtractAttributes() throws Exception {
     LongWritable key = new LongWritable();
     Text value = new Text();
-    DummyOutputCollector<LongWritable, Text> output = new DummyOutputCollector<LongWritable,
Text>();
-
+    Configuration conf = new Configuration();
     ToolMapper mapper = new ToolMapper();
+    DummyOutputCollector<LongWritable, Text> output = new DummyOutputCollector<LongWritable,
Text>();
+    MockMapperContext<LongWritable, Text> context = new MockMapperContext<LongWritable,
Text>(mapper, conf, output);
 
     // no attribute is ignored
     char[] descriptors = { 'N', 'N', 'C', 'C', 'N', 'N' };
@@ -39,22 +42,22 @@ public class ToolMapperTest extends Maho
     mapper.configure(descriptors);
     String dataline = "A1, A2, A3, A4, A5, A6";
     value.set(dataline);
-    mapper.map(key, value, output, null);
+    mapper.map(key, value, context);
 
     for (int index = 0; index < 6; index++) {
       List<Text> values = output.getValue(new LongWritable(index));
       assertEquals("should extract one value per attribute", 1, values.size());
-      assertEquals("Bad extracted value", "A" + (index + 1), values.get(0)
-          .toString());
+      assertEquals("Bad extracted value", "A" + (index + 1), values.get(0).toString());
     }
   }
 
   public void testExtractIgnoredAttributes() throws Exception {
     LongWritable key = new LongWritable();
     Text value = new Text();
-    DummyOutputCollector<LongWritable, Text> output = new DummyOutputCollector<LongWritable,
Text>();
-
     ToolMapper mapper = new ToolMapper();
+    Configuration conf = new Configuration();
+    DummyOutputCollector<LongWritable, Text> output = new DummyOutputCollector<LongWritable,
Text>();
+    MockMapperContext<LongWritable, Text> context = new MockMapperContext<LongWritable,
Text>(mapper, conf, output);
 
     // no attribute is ignored
     char[] descriptors = { 'N', 'I', 'C', 'I', 'I', 'N' };
@@ -62,7 +65,7 @@ public class ToolMapperTest extends Maho
     mapper.configure(descriptors);
     String dataline = "A1, I, A3, I, I, A6";
     value.set(dataline);
-    mapper.map(key, value, output, null);
+    mapper.map(key, value, context);
 
     for (int index = 0; index < 6; index++) {
       List<Text> values = output.getValue(new LongWritable(index));
@@ -71,8 +74,7 @@ public class ToolMapperTest extends Maho
         assertNull("Attribute (" + index + ") should be ignored", values);
       } else {
         assertEquals("should extract one value per attribute", 1, values.size());
-        assertEquals("Bad extracted value", "A" + (index + 1), values.get(0)
-            .toString());
+        assertEquals("Bad extracted value", "A" + (index + 1), values.get(0).toString());
       }
     }
   }



Mime
View raw message