mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r887727 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop: ./ item/ pseudo/ slopeone/
Date Sun, 06 Dec 2009 18:06:47 GMT
Author: srowen
Date: Sun Dec  6 18:06:46 2009
New Revision: 887727

URL: http://svn.apache.org/viewvc?rev=887727&view=rev
Log:
More Hadoop CF progress

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
Sun Dec  6 18:06:46 2009
@@ -61,27 +61,26 @@
     this.configuration = configuration;
   }
 
-  protected static Option buildOption(String name, String shortName, String description)
{
-    return new DefaultOptionBuilder().withLongName(name).withRequired(true)
+  protected static Option buildOption(String name, String shortName, String description,
boolean required) {
+    return new DefaultOptionBuilder().withLongName(name).withRequired(required)
       .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1)
       .withMaximum(1).create()).withDescription(description).create();
   }
 
   protected static Map<String,Object> parseArguments(String[] args, Option... extraOpts)
{
 
-    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
-    ArgumentBuilder abuilder = new ArgumentBuilder();
-
-    Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create();
-    Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create();
-    Option helpOpt = DefaultOptionCreator.helpOption(obuilder);
-    Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar");
-
-    GroupBuilder gBuilder = new GroupBuilder().withName("Options");
-    gBuilder = gBuilder.withOption(inputOpt);
-    gBuilder = gBuilder.withOption(outputOpt);
-    gBuilder = gBuilder.withOption(helpOpt);
-    gBuilder = gBuilder.withOption(jarFileOpt);
+    Option inputOpt = DefaultOptionCreator.inputOption().create();
+    Option tempDirOpt = buildOption("tempDir", "t", "Intermediate output directory", false);
+    Option outputOpt = DefaultOptionCreator.outputOption().create();
+    Option helpOpt = DefaultOptionCreator.helpOption();
+    Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar", true);
+
+    GroupBuilder gBuilder = new GroupBuilder().withName("Options")
+      .withOption(inputOpt)
+      .withOption(tempDirOpt)
+      .withOption(outputOpt)
+      .withOption(helpOpt)
+      .withOption(jarFileOpt);
 
     for (Option opt : extraOpts) {
       gBuilder = gBuilder.withOption(opt);
@@ -107,6 +106,7 @@
 
     Map<String,Object> result = new HashMap<String,Object>();
     result.put(inputOpt.getPreferredName(), cmdLine.getValue(inputOpt));
+    result.put(tempDirOpt.getPreferredName(), cmdLine.getValue(tempDirOpt));
     result.put(outputOpt.getPreferredName(), cmdLine.getValue(outputOpt));
     result.put(helpOpt.getPreferredName(), cmdLine.getValue(helpOpt));
     result.put(jarFileOpt.getPreferredName(), cmdLine.getValue(jarFileOpt));
@@ -135,10 +135,6 @@
     Path inputPathPath = new Path(inputPath).makeQualified(fs);
     Path outputPathPath = new Path(outputPath).makeQualified(fs);
 
-    if (fs.exists(outputPathPath)) {
-      fs.delete(outputPathPath, true);
-    }
-
     jobConf.set("mapred.jar", jarFile);
     jobConf.setJar(jarFile);
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
Sun Dec  6 18:06:46 2009
@@ -32,7 +32,7 @@
 import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.matrix.Vector;
+import org.apache.mahout.matrix.SparseVector;
 
 import java.io.IOException;
 import java.util.Map;
@@ -42,19 +42,20 @@
   @Override
   public int run(String[] args) throws IOException {
 
-    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations
per user");
+    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations
per user", true);
 
     Map<String,Object> parsedArgs = parseArguments(args, numReccomendationsOpt);
 
-    String prefsFile = parsedArgs.get("--input").toString();
+    String inputPath = parsedArgs.get("--input").toString();
+    String tempDirPath = parsedArgs.get("--tempDir").toString();
     String outputPath = parsedArgs.get("--output").toString();
     String jarFile = parsedArgs.get("--jarFile").toString();
     int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations"));
-    String userVectorPath = outputPath + "/userVectors";
-    String itemIDIndexPath = outputPath + "/itemIDIndex";
-    String cooccurrencePath = outputPath + "/cooccurrence";
+    String userVectorPath = tempDirPath + "/userVectors";
+    String itemIDIndexPath = tempDirPath + "/itemIDIndex";
+    String cooccurrencePath = tempDirPath + "/cooccurrence";
 
-    JobConf itemIDIndexConf = prepareJobConf(prefsFile,
+    JobConf itemIDIndexConf = prepareJobConf(inputPath,
                                              itemIDIndexPath,
                                              jarFile,
                                              TextInputFormat.class,
@@ -67,7 +68,7 @@
                                              SequenceFileOutputFormat.class);
     JobClient.runJob(itemIDIndexConf);
 
-    JobConf toUserVectorConf = prepareJobConf(prefsFile,
+    JobConf toUserVectorConf = prepareJobConf(inputPath,
                                               userVectorPath,
                                               jarFile,
                                               TextInputFormat.class,
@@ -76,7 +77,7 @@
                                               ItemPrefWritable.class,
                                               ToUserVectorReducer.class,
                                               LongWritable.class,
-                                              Vector.class,
+                                              SparseVector.class,
                                               SequenceFileOutputFormat.class);
     JobClient.runJob(toUserVectorConf);
 
@@ -89,7 +90,7 @@
                                                 IntWritable.class,
                                                 UserVectorToCooccurrenceReducer.class,
                                                 IntWritable.class,
-                                                Vector.class,
+                                                SparseVector.class,
                                                 SequenceFileOutputFormat.class);
     JobClient.runJob(toCooccurrenceConf);
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
Sun Dec  6 18:06:46 2009
@@ -45,7 +45,7 @@
 
 public final class RecommenderMapper
     extends MapReduceBase
-    implements Mapper<LongWritable, Vector, LongWritable, RecommendedItemsWritable>
{
+    implements Mapper<LongWritable, SparseVector, LongWritable, RecommendedItemsWritable>
{
 
   static final String COOCCURRENCE_PATH = "cooccurrencePath";
   static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
@@ -82,7 +82,7 @@
 
   @Override
   public void map(LongWritable userID,
-                  Vector userVector,
+                  SparseVector userVector,
                   OutputCollector<LongWritable, RecommendedItemsWritable> output,
                   Reporter reporter) throws IOException {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
Sun Dec  6 18:06:46 2009
@@ -24,22 +24,21 @@
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
 import org.apache.mahout.matrix.SparseVector;
-import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
 import java.util.Iterator;
 
 public final class ToUserVectorReducer
     extends MapReduceBase
-    implements Reducer<LongWritable, ItemPrefWritable, LongWritable, Vector> {
+    implements Reducer<LongWritable, ItemPrefWritable, LongWritable, SparseVector>
{
 
   @Override
   public void reduce(LongWritable userID,
                      Iterator<ItemPrefWritable> itemPrefs,
-                     OutputCollector<LongWritable, Vector> output,
+                     OutputCollector<LongWritable, SparseVector> output,
                      Reporter reporter) throws IOException {
     if (itemPrefs.hasNext()) {
-      Vector userVector = new SparseVector(Integer.MAX_VALUE, 100);
+      SparseVector userVector = new SparseVector(Integer.MAX_VALUE, 100);
       while (itemPrefs.hasNext()) {
         ItemPrefWritable itemPref = itemPrefs.next();
         int index = ItemIDIndexMapper.itemIDToIndex(itemPref.getItemID());

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
Sun Dec  6 18:06:46 2009
@@ -23,6 +23,7 @@
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.matrix.SparseVector;
 import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
@@ -30,11 +31,11 @@
 
 public final class UserVectorToCooccurrenceMapper
     extends MapReduceBase
-    implements Mapper<LongWritable, Vector, IntWritable, IntWritable> {
+    implements Mapper<LongWritable, SparseVector, IntWritable, IntWritable> {
 
   @Override
   public void map(LongWritable userID,
-                  Vector userVector,
+                  SparseVector userVector,
                   OutputCollector<IntWritable, IntWritable> output,
                   Reporter reporter) throws IOException {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
Sun Dec  6 18:06:46 2009
@@ -23,22 +23,21 @@
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.matrix.SparseVector;
-import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
 import java.util.Iterator;
 
 public final class UserVectorToCooccurrenceReducer
     extends MapReduceBase
-    implements Reducer<IntWritable, IntWritable, IntWritable, Vector> {
+    implements Reducer<IntWritable, IntWritable, IntWritable, SparseVector> {
 
   @Override
   public void reduce(IntWritable index1,
                      Iterator<IntWritable> index2s,
-                     OutputCollector<IntWritable, Vector> output,
+                     OutputCollector<IntWritable, SparseVector> output,
                      Reporter reporter) throws IOException {
     if (index2s.hasNext()) {
-      Vector cooccurrenceRow = new SparseVector(Integer.MAX_VALUE, 1000);
+      SparseVector cooccurrenceRow = new SparseVector(Integer.MAX_VALUE, 1000);
       while (index2s.hasNext()) {
         int index2 = index2s.next().get();
         cooccurrenceRow.set(index2, cooccurrenceRow.get(index2) + 1.0);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
Sun Dec  6 18:06:46 2009
@@ -105,9 +105,9 @@
   @Override
   public int run(String[] args) throws IOException {
 
-    Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender
class to instantiate");
-    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations
per user");
-    Option dataModelFileOpt = buildOption("dataModelFile", "m", "File containing preference
data");
+    Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender
class to instantiate", true);
+    Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations
per user", true);
+    Option dataModelFileOpt = buildOption("dataModelFile", "m", "File containing preference
data", true);
 
     Map<String,Object> parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt,
dataModelFileOpt);
     String userIDFile = parsedArgs.get("--input").toString();

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=887727&r1=887726&r2=887727&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
Sun Dec  6 18:06:46 2009
@@ -45,7 +45,7 @@
     String prefsFile = parsedArgs.get("--input").toString();
     String outputPath = parsedArgs.get("--output").toString();
     String jarFile = parsedArgs.get("--jarFile").toString();
-    String averagesOutputPath = outputPath + "/averages";
+    String averagesOutputPath = parsedArgs.get("--tempDir").toString();
 
     JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile,
                                                  averagesOutputPath,



Mime
View raw message