mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r887676 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop: ./ item/ pseudo/ slopeone/
Date Sun, 06 Dec 2009 13:05:18 GMT
Author: srowen
Date: Sun Dec  6 13:05:18 2009
New Revision: 887676

URL: http://svn.apache.org/viewvc?rev=887676&view=rev
Log:
More progress on CF Hadoop updates

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
      - copied, changed from r887528, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
      - copied, changed from r887324, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java
Sun Dec  6 13:05:18 2009
@@ -25,6 +25,7 @@
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.commons.cli2.builder.GroupBuilder;
 import org.apache.commons.cli2.commandline.Parser;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
@@ -34,6 +35,7 @@
 import org.apache.hadoop.mapred.OutputFormat;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.Tool;
 import org.apache.mahout.common.CommandLineUtil;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
@@ -43,10 +45,22 @@
 import java.util.HashMap;
 import java.util.Map;
 
-public abstract class AbstractJob {
+public abstract class AbstractJob implements Tool {
 
   private static final Logger log = LoggerFactory.getLogger(AbstractJob.class);
 
+  private Configuration configuration;
+
+  @Override
+  public Configuration getConf() {
+    return configuration;
+  }
+
+  @Override
+  public void setConf(Configuration configuration) {
+    this.configuration = configuration;
+  }
+
   protected static Option buildOption(String name, String shortName, String description)
{
     return new DefaultOptionBuilder().withLongName(name).withRequired(true)
       .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1)

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
(from r887528, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java&r1=887528&r2=887676&rev=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
Sun Dec  6 13:05:18 2009
@@ -17,88 +17,36 @@
 
 package org.apache.mahout.cf.taste.hadoop.item;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
-import org.apache.mahout.cf.taste.recommender.RecommendedItem;
-import org.apache.mahout.matrix.SparseVector;
-import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.Queue;
+import java.util.regex.Pattern;
 
-public final class RecommenderMapper
+public final class ItemIDIndexMapper
     extends MapReduceBase
-    implements Mapper<LongWritable, Vector, LongWritable, RecommendedItemsWritable>
{
+    implements Mapper<LongWritable, Text, IntWritable, LongWritable> {
 
-  static final String COOCCURRENCE_PATH = "cooccurrencePath";
-  static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
-
-  private FileSystem fs;
-  private Path cooccurrencePath;
-  private int recommendationsPerUser;
+  private static final Pattern COMMA = Pattern.compile(",");
 
   @Override
-  public void configure(JobConf jobConf) {
-    try {
-      fs = FileSystem.get(jobConf);
-    } catch (IOException ioe) {
-      throw new IllegalStateException(ioe);
-    }
-    cooccurrencePath = new Path(jobConf.get(COOCCURRENCE_PATH)).makeQualified(fs);
-    recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
-  }
-
-  @Override
-  public void map(LongWritable userID,
-                  Vector userVector,
-                  OutputCollector<LongWritable, RecommendedItemsWritable> output,
+  public void map(LongWritable key,
+                  Text value,
+                  OutputCollector<IntWritable, LongWritable> output,
                   Reporter reporter) throws IOException {
-
-    SequenceFile.Reader reader = new SequenceFile.Reader(fs, cooccurrencePath, new Configuration());
-    LongWritable itemIDWritable = new LongWritable();
-    Vector cooccurrenceVector = new SparseVector();
-    Queue<RecommendedItem> topItems =
-        new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1, Collections.reverseOrder());
-    while (reader.next(itemIDWritable, cooccurrenceVector)) {
-      processOneRecommendation(userVector, itemIDWritable.get(), cooccurrenceVector, topItems);
-    }
-    List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size());
-    recommendations.addAll(topItems);
-    output.collect(userID, new RecommendedItemsWritable(recommendations));
+    String[] tokens = COMMA.split(value.toString());
+    long itemID = Long.parseLong(tokens[1]);
+    int index = itemIDToIndex(itemID);
+    output.collect(new IntWritable(index), new LongWritable(itemID));
   }
 
-  private void processOneRecommendation(Vector userVector,
-                                        long itemID,
-                                        Vector cooccurrenceVector,
-                                        Queue<RecommendedItem> topItems) {
-    double totalWeight = 0.0;
-    Iterator<Vector.Element> cooccurrences = cooccurrenceVector.iterateNonZero();
-    while (cooccurrences.hasNext()) {
-      Vector.Element cooccurrence = cooccurrences.next();
-      totalWeight += cooccurrence.get();
-    }
-    double score = userVector.dot(cooccurrenceVector) / totalWeight;
-    if (!Double.isNaN(score)) {
-      topItems.add(new GenericRecommendedItem(itemID, (float) score));
-      if (topItems.size() > recommendationsPerUser) {
-        topItems.poll();
-      }
-    }
+  static int itemIDToIndex(long itemID) {
+    return (int) (itemID) ^ (int) (itemID >>> 32);
   }
 
 }
\ No newline at end of file

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
(from r887324, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java&r1=887324&r2=887676&rev=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
Sun Dec  6 13:05:18 2009
@@ -17,35 +17,35 @@
 
 package org.apache.mahout.cf.taste.hadoop.item;
 
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
-import org.apache.mahout.matrix.SparseVector;
-import org.apache.mahout.matrix.Vector;
 
 import java.io.IOException;
 import java.util.Iterator;
 
-public final class ToUserVectorReducer
+public final class ItemIDIndexReducer
     extends MapReduceBase
-    implements Reducer<LongWritable, ItemPrefWritable, LongWritable, Vector> {
+    implements Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
 
   @Override
-  public void reduce(LongWritable userID,
-                     Iterator<ItemPrefWritable> itemPrefs,
-                     OutputCollector<LongWritable, Vector> output,
+  public void reduce(IntWritable index,
+                     Iterator<LongWritable> possibleItemIDs,
+                     OutputCollector<IntWritable, LongWritable> output,
                      Reporter reporter) throws IOException {
-    if (itemPrefs.hasNext()) {
-      Vector userVector = new SparseVector();
-      while (itemPrefs.hasNext()) {
-        ItemPrefWritable itemPref = itemPrefs.next();
-        userVector.set(String.valueOf(itemPref.getItemID()), itemPref.getPrefValue());
+    if (possibleItemIDs.hasNext()) {
+      long minimumItemID = Long.MAX_VALUE;
+      while (possibleItemIDs.hasNext()) {
+        long itemID = possibleItemIDs.next().get();
+        if (itemID < minimumItemID) {
+          minimumItemID = itemID;
+        }
       }
-      output.collect(userID, userVector);
+      output.collect(index, new LongWritable(minimumItemID));
     }
   }
 
-}
+}
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
Sun Dec  6 13:05:18 2009
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.hadoop.item;
 
 import org.apache.commons.cli2.Option;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
@@ -26,17 +27,20 @@
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.cf.taste.hadoop.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
 import org.apache.mahout.matrix.Vector;
 
+import java.io.IOException;
 import java.util.Map;
 
 public final class RecommenderJob extends AbstractJob {
 
-  public static void main(String[] args) throws Exception {
+  @Override
+  public int run(String[] args) throws IOException {
 
     Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations
per user");
 
@@ -47,8 +51,22 @@
     String jarFile = parsedArgs.get("--jarFile").toString();
     int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations"));
     String userVectorPath = outputPath + "/userVectors";
+    String itemIDIndexPath = outputPath + "/itemIDIndex";
     String cooccurrencePath = outputPath + "/cooccurrence";
 
+    JobConf itemIDIndexConf = prepareJobConf(prefsFile,
+                                             itemIDIndexPath,
+                                             jarFile,
+                                             TextInputFormat.class,
+                                             ItemIDIndexMapper.class,
+                                             IntWritable.class,
+                                             LongWritable.class,
+                                             ItemIDIndexReducer.class,
+                                             IntWritable.class,
+                                             LongWritable.class,
+                                             SequenceFileOutputFormat.class);
+    JobClient.runJob(itemIDIndexConf);
+
     JobConf toUserVectorConf = prepareJobConf(prefsFile,
                                               userVectorPath,
                                               jarFile,
@@ -67,10 +85,10 @@
                                                 jarFile,
                                                 SequenceFileInputFormat.class,
                                                 UserVectorToCooccurrenceMapper.class,
-                                                LongWritable.class,
-                                                LongWritable.class,
+                                                IntWritable.class,
+                                                IntWritable.class,
                                                 UserVectorToCooccurrenceReducer.class,
-                                                LongWritable.class,
+                                                IntWritable.class,
                                                 Vector.class,
                                                 SequenceFileOutputFormat.class);
     JobClient.runJob(toCooccurrenceConf);
@@ -87,8 +105,14 @@
                                              RecommendedItemsWritable.class,
                                              TextOutputFormat.class);
     recommenderConf.set(RecommenderMapper.COOCCURRENCE_PATH, cooccurrencePath);
+    recommenderConf.set(RecommenderMapper.ITEMID_INDEX_PATH, itemIDIndexPath);    
     recommenderConf.setInt(RecommenderMapper.RECOMMENDATIONS_PER_USER, recommendationsPerUser);
     JobClient.runJob(recommenderConf);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new RecommenderJob(), args);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java
Sun Dec  6 13:05:18 2009
@@ -20,6 +20,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.mapred.JobConf;
@@ -28,6 +29,7 @@
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
 import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.matrix.SparseVector;
@@ -46,11 +48,14 @@
     implements Mapper<LongWritable, Vector, LongWritable, RecommendedItemsWritable>
{
 
   static final String COOCCURRENCE_PATH = "cooccurrencePath";
+  static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
   static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
 
   private FileSystem fs;
   private Path cooccurrencePath;
+  private Path itemIDIndexPath;
   private int recommendationsPerUser;
+  private FastByIDMap<Long> indexItemIDMap;
 
   @Override
   public void configure(JobConf jobConf) {
@@ -60,7 +65,19 @@
       throw new IllegalStateException(ioe);
     }
     cooccurrencePath = new Path(jobConf.get(COOCCURRENCE_PATH)).makeQualified(fs);
+    itemIDIndexPath = new Path(jobConf.get(ITEMID_INDEX_PATH)).makeQualified(fs);
     recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
+    indexItemIDMap = new FastByIDMap<Long>();
+    try {
+      SequenceFile.Reader reader = new SequenceFile.Reader(fs, itemIDIndexPath, new Configuration());
+      IntWritable index = new IntWritable();
+      LongWritable itemID = new LongWritable();
+      while (reader.next(index, itemID)) {
+        indexItemIDMap.put(index.get(), itemID.get());
+      }
+    } catch (IOException ioe) {
+      throw new IllegalStateException(ioe);
+    }
   }
 
   @Override
@@ -70,12 +87,17 @@
                   Reporter reporter) throws IOException {
 
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, cooccurrencePath, new Configuration());
-    LongWritable itemIDWritable = new LongWritable();
-    Vector cooccurrenceVector = new SparseVector();
+    IntWritable indexWritable = new IntWritable();
+    Vector cooccurrenceVector = new SparseVector(Integer.MAX_VALUE, 1000);
     Queue<RecommendedItem> topItems =
         new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1, Collections.reverseOrder());
-    while (reader.next(itemIDWritable, cooccurrenceVector)) {
-      processOneRecommendation(userVector, itemIDWritable.get(), cooccurrenceVector, topItems);
+    while (reader.next(indexWritable, cooccurrenceVector)) {
+      Long itemID = indexItemIDMap.get(indexWritable.get());
+      if (itemID != null) {
+        processOneRecommendation(userVector, itemID, cooccurrenceVector, topItems);
+      } else {
+        throw new IllegalStateException("Found index without item ID: " + indexWritable.get());
+      }
     }
     List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size());
     recommendations.addAll(topItems);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
Sun Dec  6 13:05:18 2009
@@ -39,10 +39,11 @@
                      OutputCollector<LongWritable, Vector> output,
                      Reporter reporter) throws IOException {
     if (itemPrefs.hasNext()) {
-      Vector userVector = new SparseVector();
+      Vector userVector = new SparseVector(Integer.MAX_VALUE, 100);
       while (itemPrefs.hasNext()) {
         ItemPrefWritable itemPref = itemPrefs.next();
-        userVector.set(String.valueOf(itemPref.getItemID()), itemPref.getPrefValue());
+        int index = ItemIDIndexMapper.itemIDToIndex(itemPref.getItemID());
+        userVector.set(index, itemPref.getPrefValue());
       }
       output.collect(userID, userVector);
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
Sun Dec  6 13:05:18 2009
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.hadoop.item;
 
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.Mapper;
@@ -29,23 +30,23 @@
 
 public final class UserVectorToCooccurrenceMapper
     extends MapReduceBase
-    implements Mapper<LongWritable, Vector, LongWritable, LongWritable> {
+    implements Mapper<LongWritable, Vector, IntWritable, IntWritable> {
 
   @Override
   public void map(LongWritable userID,
                   Vector userVector,
-                  OutputCollector<LongWritable, LongWritable> output,
+                  OutputCollector<IntWritable, IntWritable> output,
                   Reporter reporter) throws IOException {
 
     Iterator<Vector.Element> it = userVector.iterateNonZero();
     while (it.hasNext()) {
-      long itemID1 = it.next().index();
+      int index1 = it.next().index();
       Iterator<Vector.Element> it2 = userVector.iterateNonZero();
-      LongWritable itemWritable1 = new LongWritable(itemID1);
+      IntWritable itemWritable1 = new IntWritable(index1);
       while (it2.hasNext()) {
-        long itemID2 = it2.next().index();
-        if (itemID1 != itemID2) {
-          output.collect(itemWritable1, new LongWritable(itemID2));
+        int index2 = it2.next().index();
+        if (index1 != index2) {
+          output.collect(itemWritable1, new IntWritable(index2));
         }
       }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
Sun Dec  6 13:05:18 2009
@@ -17,7 +17,7 @@
 
 package org.apache.mahout.cf.taste.hadoop.item;
 
-import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.mapred.MapReduceBase;
 import org.apache.hadoop.mapred.OutputCollector;
 import org.apache.hadoop.mapred.Reducer;
@@ -30,23 +30,21 @@
 
 public final class UserVectorToCooccurrenceReducer
     extends MapReduceBase
-    implements Reducer<LongWritable, LongWritable, LongWritable, Vector> {
+    implements Reducer<IntWritable, IntWritable, IntWritable, Vector> {
 
   @Override
-  public void reduce(LongWritable itemID1,
-                     Iterator<LongWritable> itemID2s,
-                     OutputCollector<LongWritable, Vector> output,
+  public void reduce(IntWritable index1,
+                     Iterator<IntWritable> index2s,
+                     OutputCollector<IntWritable, Vector> output,
                      Reporter reporter) throws IOException {
-
-    if (itemID2s.hasNext()) {
-      Vector cooccurrenceRow = new SparseVector();
-      while (itemID2s.hasNext()) {
-        String label = String.valueOf(itemID2s.next());
-        cooccurrenceRow.set(label, cooccurrenceRow.get(label) + 1.0);
+    if (index2s.hasNext()) {
+      Vector cooccurrenceRow = new SparseVector(Integer.MAX_VALUE, 1000);
+      while (index2s.hasNext()) {
+        int index2 = index2s.next().get();
+        cooccurrenceRow.set(index2, cooccurrenceRow.get(index2) + 1.0);
       }
-      output.collect(itemID1, cooccurrenceRow);
+      output.collect(index1, cooccurrenceRow);
     }
-
   }
 
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java
Sun Dec  6 13:05:18 2009
@@ -24,10 +24,12 @@
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.cf.taste.hadoop.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.recommender.Recommender;
 
+import java.io.IOException;
 import java.util.Map;
 
 /**
@@ -100,10 +102,8 @@
  */
 public final class RecommenderJob extends AbstractJob {
 
-  private RecommenderJob() {
-  }
-
-  public static void main(String[] args) throws Exception {
+  @Override
+  public int run(String[] args) throws IOException {
 
     Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender
class to instantiate");
     Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations
per user");
@@ -135,7 +135,11 @@
     jobConf.set(RecommenderMapper.DATA_MODEL_FILE, dataModelFile);
 
     JobClient.runJob(jobConf);
+    return 0;
+  }
 
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new RecommenderJob(), args);
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=887676&r1=887675&r2=887676&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
Sun Dec  6 13:05:18 2009
@@ -26,19 +26,19 @@
 import org.apache.hadoop.mapred.TextInputFormat;
 import org.apache.hadoop.mapred.TextOutputFormat;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
+import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.cf.taste.hadoop.AbstractJob;
 import org.apache.mahout.cf.taste.hadoop.ItemItemWritable;
 import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
 
+import java.io.IOException;
 import java.util.Map;
 
 public final class SlopeOneAverageDiffsJob extends AbstractJob {
 
-  private SlopeOneAverageDiffsJob() {
-  }
-
-  public static void main(String[] args) throws Exception {
+  @Override
+  public int run(String[] args) throws IOException {
 
     Map<String,Object> parsedArgs = parseArguments(args);
 
@@ -72,6 +72,11 @@
                                                     FloatWritable.class,
                                                     TextOutputFormat.class);
     JobClient.runJob(diffsToAveragesJobConf);
+    return 0;
+  }
+
+  public static void main(String[] args) throws Exception {
+    ToolRunner.run(new SlopeOneAverageDiffsJob(), args);
   }
 
 }
\ No newline at end of file



Mime
View raw message