Return-Path: Delivered-To: apmail-lucene-mahout-commits-archive@minotaur.apache.org Received: (qmail 43248 invoked from network); 6 Dec 2009 18:07:11 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 6 Dec 2009 18:07:11 -0000 Received: (qmail 26688 invoked by uid 500); 6 Dec 2009 18:07:11 -0000 Delivered-To: apmail-lucene-mahout-commits-archive@lucene.apache.org Received: (qmail 26605 invoked by uid 500); 6 Dec 2009 18:07:10 -0000 Mailing-List: contact mahout-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mahout-dev@lucene.apache.org Delivered-To: mailing list mahout-commits@lucene.apache.org Received: (qmail 26596 invoked by uid 99); 6 Dec 2009 18:07:10 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 06 Dec 2009 18:07:10 +0000 X-ASF-Spam-Status: No, hits=-2.6 required=5.0 tests=AWL,BAYES_00 X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 06 Dec 2009 18:07:08 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 435FB238888F; Sun, 6 Dec 2009 18:06:48 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r887727 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop: ./ item/ pseudo/ slopeone/ Date: Sun, 06 Dec 2009 18:06:47 -0000 To: mahout-commits@lucene.apache.org From: srowen@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20091206180648.435FB238888F@eris.apache.org> Author: srowen Date: Sun Dec 6 18:06:46 2009 New Revision: 887727 URL: http://svn.apache.org/viewvc?rev=887727&view=rev Log: More Hadoop CF progress Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/AbstractJob.java Sun Dec 6 18:06:46 2009 @@ -61,27 +61,26 @@ this.configuration = configuration; } - protected static Option buildOption(String name, String shortName, String description) { - return new DefaultOptionBuilder().withLongName(name).withRequired(true) + protected static Option buildOption(String name, String shortName, String description, boolean required) { + return new DefaultOptionBuilder().withLongName(name).withRequired(required) .withShortName(shortName).withArgument(new ArgumentBuilder().withName(name).withMinimum(1) .withMaximum(1).create()).withDescription(description).create(); } protected static Map parseArguments(String[] args, Option... extraOpts) { - DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); - ArgumentBuilder abuilder = new ArgumentBuilder(); - - Option inputOpt = DefaultOptionCreator.inputOption(obuilder, abuilder).create(); - Option outputOpt = DefaultOptionCreator.outputOption(obuilder, abuilder).create(); - Option helpOpt = DefaultOptionCreator.helpOption(obuilder); - Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar"); - - GroupBuilder gBuilder = new GroupBuilder().withName("Options"); - gBuilder = gBuilder.withOption(inputOpt); - gBuilder = gBuilder.withOption(outputOpt); - gBuilder = gBuilder.withOption(helpOpt); - gBuilder = gBuilder.withOption(jarFileOpt); + Option inputOpt = DefaultOptionCreator.inputOption().create(); + Option tempDirOpt = buildOption("tempDir", "t", "Intermediate output directory", false); + Option outputOpt = DefaultOptionCreator.outputOption().create(); + Option helpOpt = DefaultOptionCreator.helpOption(); + Option jarFileOpt = buildOption("jarFile", "m", "Implementation jar", true); + + GroupBuilder gBuilder = new GroupBuilder().withName("Options") + .withOption(inputOpt) + .withOption(tempDirOpt) + .withOption(outputOpt) + .withOption(helpOpt) + .withOption(jarFileOpt); for (Option opt : extraOpts) { gBuilder = gBuilder.withOption(opt); @@ -107,6 +106,7 @@ Map result = new HashMap(); result.put(inputOpt.getPreferredName(), cmdLine.getValue(inputOpt)); + result.put(tempDirOpt.getPreferredName(), cmdLine.getValue(tempDirOpt)); result.put(outputOpt.getPreferredName(), cmdLine.getValue(outputOpt)); result.put(helpOpt.getPreferredName(), cmdLine.getValue(helpOpt)); result.put(jarFileOpt.getPreferredName(), cmdLine.getValue(jarFileOpt)); @@ -135,10 +135,6 @@ Path inputPathPath = new Path(inputPath).makeQualified(fs); Path outputPathPath = new Path(outputPath).makeQualified(fs); - if (fs.exists(outputPathPath)) { - fs.delete(outputPathPath, true); - } - jobConf.set("mapred.jar", jarFile); jobConf.setJar(jarFile); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Sun Dec 6 18:06:46 2009 @@ -32,7 +32,7 @@ import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable; import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable; import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper; -import org.apache.mahout.matrix.Vector; +import org.apache.mahout.matrix.SparseVector; import java.io.IOException; import java.util.Map; @@ -42,19 +42,20 @@ @Override public int run(String[] args) throws IOException { - Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user"); + Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user", true); Map parsedArgs = parseArguments(args, numReccomendationsOpt); - String prefsFile = parsedArgs.get("--input").toString(); + String inputPath = parsedArgs.get("--input").toString(); + String tempDirPath = parsedArgs.get("--tempDir").toString(); String outputPath = parsedArgs.get("--output").toString(); String jarFile = parsedArgs.get("--jarFile").toString(); int recommendationsPerUser = Integer.parseInt((String) parsedArgs.get("--numRecommendations")); - String userVectorPath = outputPath + "/userVectors"; - String itemIDIndexPath = outputPath + "/itemIDIndex"; - String cooccurrencePath = outputPath + "/cooccurrence"; + String userVectorPath = tempDirPath + "/userVectors"; + String itemIDIndexPath = tempDirPath + "/itemIDIndex"; + String cooccurrencePath = tempDirPath + "/cooccurrence"; - JobConf itemIDIndexConf = prepareJobConf(prefsFile, + JobConf itemIDIndexConf = prepareJobConf(inputPath, itemIDIndexPath, jarFile, TextInputFormat.class, @@ -67,7 +68,7 @@ SequenceFileOutputFormat.class); JobClient.runJob(itemIDIndexConf); - JobConf toUserVectorConf = prepareJobConf(prefsFile, + JobConf toUserVectorConf = prepareJobConf(inputPath, userVectorPath, jarFile, TextInputFormat.class, @@ -76,7 +77,7 @@ ItemPrefWritable.class, ToUserVectorReducer.class, LongWritable.class, - Vector.class, + SparseVector.class, SequenceFileOutputFormat.class); JobClient.runJob(toUserVectorConf); @@ -89,7 +90,7 @@ IntWritable.class, UserVectorToCooccurrenceReducer.class, IntWritable.class, - Vector.class, + SparseVector.class, SequenceFileOutputFormat.class); JobClient.runJob(toCooccurrenceConf); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderMapper.java Sun Dec 6 18:06:46 2009 @@ -45,7 +45,7 @@ public final class RecommenderMapper extends MapReduceBase - implements Mapper { + implements Mapper { static final String COOCCURRENCE_PATH = "cooccurrencePath"; static final String ITEMID_INDEX_PATH = "itemIDIndexPath"; @@ -82,7 +82,7 @@ @Override public void map(LongWritable userID, - Vector userVector, + SparseVector userVector, OutputCollector output, Reporter reporter) throws IOException { Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Sun Dec 6 18:06:46 2009 @@ -24,22 +24,21 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.ItemPrefWritable; import org.apache.mahout.matrix.SparseVector; -import org.apache.mahout.matrix.Vector; import java.io.IOException; import java.util.Iterator; public final class ToUserVectorReducer extends MapReduceBase - implements Reducer { + implements Reducer { @Override public void reduce(LongWritable userID, Iterator itemPrefs, - OutputCollector output, + OutputCollector output, Reporter reporter) throws IOException { if (itemPrefs.hasNext()) { - Vector userVector = new SparseVector(Integer.MAX_VALUE, 100); + SparseVector userVector = new SparseVector(Integer.MAX_VALUE, 100); while (itemPrefs.hasNext()) { ItemPrefWritable itemPref = itemPrefs.next(); int index = ItemIDIndexMapper.itemIDToIndex(itemPref.getItemID()); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java Sun Dec 6 18:06:46 2009 @@ -23,6 +23,7 @@ import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; +import org.apache.mahout.matrix.SparseVector; import org.apache.mahout.matrix.Vector; import java.io.IOException; @@ -30,11 +31,11 @@ public final class UserVectorToCooccurrenceMapper extends MapReduceBase - implements Mapper { + implements Mapper { @Override public void map(LongWritable userID, - Vector userVector, + SparseVector userVector, OutputCollector output, Reporter reporter) throws IOException { Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java Sun Dec 6 18:06:46 2009 @@ -23,22 +23,21 @@ import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.matrix.SparseVector; -import org.apache.mahout.matrix.Vector; import java.io.IOException; import java.util.Iterator; public final class UserVectorToCooccurrenceReducer extends MapReduceBase - implements Reducer { + implements Reducer { @Override public void reduce(IntWritable index1, Iterator index2s, - OutputCollector output, + OutputCollector output, Reporter reporter) throws IOException { if (index2s.hasNext()) { - Vector cooccurrenceRow = new SparseVector(Integer.MAX_VALUE, 1000); + SparseVector cooccurrenceRow = new SparseVector(Integer.MAX_VALUE, 1000); while (index2s.hasNext()) { int index2 = index2s.next().get(); cooccurrenceRow.set(index2, cooccurrenceRow.get(index2) + 1.0); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/RecommenderJob.java Sun Dec 6 18:06:46 2009 @@ -105,9 +105,9 @@ @Override public int run(String[] args) throws IOException { - Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender class to instantiate"); - Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user"); - Option dataModelFileOpt = buildOption("dataModelFile", "m", "File containing preference data"); + Option recommendClassOpt = buildOption("recommenderClassName", "r", "Name of recommender class to instantiate", true); + Option numReccomendationsOpt = buildOption("numRecommendations", "n", "Number of recommendations per user", true); + Option dataModelFileOpt = buildOption("dataModelFile", "m", "File containing preference data", true); Map parsedArgs = parseArguments(args, recommendClassOpt, numReccomendationsOpt, dataModelFileOpt); String userIDFile = parsedArgs.get("--input").toString(); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=887727&r1=887726&r2=887727&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Sun Dec 6 18:06:46 2009 @@ -45,7 +45,7 @@ String prefsFile = parsedArgs.get("--input").toString(); String outputPath = parsedArgs.get("--output").toString(); String jarFile = parsedArgs.get("--jarFile").toString(); - String averagesOutputPath = outputPath + "/averages"; + String averagesOutputPath = parsedArgs.get("--tempDir").toString(); JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile, averagesOutputPath,