mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r1130443 [1/2] - in /mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/common/ main/java/org/apache/mahout/cf/taste/hadoop/ main/java/org/apache/mahout/cf/taste/hadoop/als/ main/java/org/apache/mahout/cf/taste/hadoop/item/ main/ja...
Date Thu, 02 Jun 2011 09:07:51 GMT
Author: ssc
Date: Thu Jun  2 09:07:49 2011
New Revision: 1130443

URL: http://svn.apache.org/viewvc?rev=1130443&view=rev
Log:
MAHOUT-718 Small refactoring to broaden the use of Google Guava

Added:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/common/TopKMinKTest.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsReducer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ReloadFromJDBCDataModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
    mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/PolymorphicWritable.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java
    mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java?rev=1130443&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/FixedSizePriorityQueue.java Thu Jun  2 09:07:49 2011
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Queue;
+
+/**
+ * base class for queues holding the top or min k elements of all elements they have been offered
+ */
+abstract class FixedSizePriorityQueue<T> {
+
+  private final int k;
+  private final Comparator<? super T> comparator;
+  private final Queue<T> queue;
+
+  FixedSizePriorityQueue(int k, Comparator<? super T> comparator) {
+    Preconditions.checkArgument(k > 0);
+    this.k = k;
+    this.comparator = Preconditions.checkNotNull(comparator);
+    this.queue = new PriorityQueue<T>(k + 1, queueingComparator(comparator));
+  }
+
+  abstract Comparator<? super T> queueingComparator(Comparator<? super T> stdComparator);
+  abstract Comparator<? super T> sortingComparator(Comparator<? super T> stdComparator);
+
+  public void offer(T item) {
+    if (queue.size() < k) {
+      queue.add(item);
+    } else if (comparator.compare(item, queue.peek()) > 0) {
+      queue.add(item);
+      queue.poll();
+    }
+  }
+
+  public boolean isEmpty() {
+    return queue.isEmpty();
+  }
+
+  public List<T> retrieve() {
+    List<T> topItems = Lists.newArrayList(queue);
+    Collections.sort(topItems, sortingComparator(comparator));
+    return topItems;
+  }
+
+  protected T peek() {
+    return queue.peek();
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java?rev=1130443&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/MinK.java Thu Jun  2 09:07:49 2011
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+import java.util.Collections;
+import java.util.Comparator;
+
+/**
+ * this class will preserve the k minimum elements of all elements it has been offered
+ */
+public class MinK<T> extends FixedSizePriorityQueue<T> {
+
+  public MinK(int k, Comparator<? super T> comparator) {
+    super(k, comparator);
+  }
+
+  @Override
+  protected Comparator<? super T> queueingComparator(Comparator<? super T> stdComparator) {
+    return Collections.reverseOrder(stdComparator);
+  }
+
+  @Override
+  protected Comparator<? super T> sortingComparator(Comparator<? super T> stdComparator) {
+    return stdComparator;
+  }
+
+  public T greatestSmall() {
+    return peek();
+  }
+}

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java?rev=1130443&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/common/TopK.java Thu Jun  2 09:07:49 2011
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.common;
+
+import java.util.Collections;
+import java.util.Comparator;
+
+/**
+ * this class will preserve the k maximum elements of all elements it has been offered
+ */
+public class TopK<T> extends FixedSizePriorityQueue<T> {
+
+  public TopK(int k, Comparator<? super T> comparator) {
+    super(k, comparator);
+  }
+
+  @Override
+  protected Comparator<? super T> queueingComparator(Comparator<? super T> stdComparator) {
+    return stdComparator;
+  }
+
+  @Override
+  protected Comparator<? super T> sortingComparator(Comparator<? super T> stdComparator) {
+    return Collections.reverseOrder(stdComparator);
+  }
+
+  public T smallestGreat() {
+    return peek();
+  }
+}
\ No newline at end of file

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java Thu Jun  2 09:07:49 2011
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.hadoo
 
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.common.MinK;
 import org.apache.mahout.math.VarLongWritable;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -26,9 +27,8 @@ import org.apache.mahout.math.hadoop.Dis
 import org.apache.mahout.math.map.OpenIntIntHashMap;
 
 import java.io.IOException;
-import java.util.Collections;
+import java.util.Comparator;
 import java.util.Iterator;
-import java.util.PriorityQueue;
 
 
 /**
@@ -93,19 +93,20 @@ public class MaybePruneRowsMapper
       return vector;
     }
 
-    PriorityQueue<Integer> smallCounts = new PriorityQueue<Integer>(maxCooccurrences + 1, Collections.reverseOrder());
+    MinK<Integer> smallCounts = new MinK<Integer>(maxCooccurrences, new Comparator<Integer>() {
+        @Override
+        public int compare(Integer one, Integer two) {
+          return one.compareTo(two);
+        }
+      });
+
     Iterator<Vector.Element> it = vector.iterateNonZero();
     while (it.hasNext()) {
       int count = indexCounts.get(it.next().index());
-      if (smallCounts.size() < maxCooccurrences) {
-        smallCounts.add(count);
-      } else if (count < smallCounts.peek()) {
-        smallCounts.add(count);
-        smallCounts.poll();
-      }
+      smallCounts.offer(count);
     }
 
-    int greatestSmallCount = smallCounts.peek();
+    int greatestSmallCount = smallCounts.greatestSmall();
     if (greatestSmallCount > 0) {
       Iterator<Vector.Element> it2 = vector.iterateNonZero();
       while (it2.hasNext()) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java Thu Jun  2 09:07:49 2011
@@ -23,6 +23,8 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/ParallelALSFactorizationJob.java Thu Jun  2 09:07:49 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.hadoop.als;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.FloatWritable;
@@ -91,9 +92,9 @@ public class ParallelALSFactorizationJob
 
     addInputOption();
     addOutputOption();
-    addOption("lambda", "l", "", true);
-    addOption("numFeatures", "f", "", true);
-    addOption("numIterations", "i", "", true);
+    addOption("lambda", "l", "regularization parameter", true);
+    addOption("numFeatures", "f", "dimension of the feature space", true);
+    addOption("numIterations", "i", "number of iterations", true);
 
     Map<String,String> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
@@ -152,7 +153,7 @@ public class ParallelALSFactorizationJob
 
 
   private void iterate(int currentIteration, int numFeatures, double lambda)
-    throws IOException, ClassNotFoundException, InterruptedException {
+      throws IOException, ClassNotFoundException, InterruptedException {
     /* fix M, compute U */
     joinAndSolve(pathToM(currentIteration - 1), pathToItemRatings(), pathToU(currentIteration), numFeatures,
         lambda, currentIteration, STEP_ONE);
@@ -177,7 +178,6 @@ public class ParallelALSFactorizationJob
         IndexedVarIntWritable.class, FeatureVectorWithRatingWritable.class, SolvingReducer.class, VarIntWritable.class,
         FeatureVectorWithRatingWritable.class, SequenceFileOutputFormat.class);
     Configuration solveConf = solve.getConfiguration();
-    solve.setPartitionerClass(HashPartitioner.class);
     solve.setGroupingComparatorClass(IndexedVarIntWritable.GroupingComparator.class);
     solveConf.setInt(NUM_FEATURES, numFeatures);
     solveConf.set(LAMBDA, String.valueOf(lambda));
@@ -242,10 +242,9 @@ public class ParallelALSFactorizationJob
       super.setup(ctx);
       lambda = Double.parseDouble(ctx.getConfiguration().get(LAMBDA));
       numFeatures = ctx.getConfiguration().getInt(NUM_FEATURES, -1);
-      if (numFeatures < 1) {
-        throw new IllegalStateException("numFeatures was not set correctly!");
-      }
       solver = new AlternateLeastSquaresSolver();
+
+      Preconditions.checkArgument(numFeatures > 0, "numFeatures was not set correctly!");
     }
 
     @Override
@@ -281,14 +280,13 @@ public class ParallelALSFactorizationJob
     protected void setup(Context ctx) throws IOException, InterruptedException {
       super.setup(ctx);
       numFeatures = ctx.getConfiguration().getInt(NUM_FEATURES, -1);
-      if (numFeatures < 1) {
-        throw new IllegalStateException("numFeatures was not set correctly!");
-      }
+
+      Preconditions.checkArgument(numFeatures > 0, "numFeatures was not set correctly!");
     }
 
     @Override
     protected void reduce(VarLongWritable itemID, Iterable<FloatWritable> ratings, Context ctx) 
-      throws IOException, InterruptedException {
+        throws IOException, InterruptedException {
 
       RunningAverage averageRating = new FullRunningAverage();
       for (FloatWritable rating : ratings) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/PredictionJob.java Thu Jun  2 09:07:49 2011
@@ -82,6 +82,7 @@ public class PredictionJob extends Abstr
 
     Path pairsJoinedWithItemFeatures = new Path(tempDirPath, "pairsJoinedWithItemFeatures");
 
+    /* joins here could spare more than 50% of their M/R cycles when MultipleInputs is available again */
     Job convertPairs = prepareJob(pairs, convertedPairs, TextInputFormat.class, PairsMapper.class,
         TaggedVarIntWritable.class, VectorWithIndexWritable.class, Reducer.class, TaggedVarIntWritable.class,
         VectorWithIndexWritable.class, SequenceFileOutputFormat.class);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Thu Jun  2 09:07:49 2011
@@ -18,17 +18,15 @@
 package org.apache.mahout.cf.taste.hadoop.item;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.common.TopK;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
 import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
-import org.apache.mahout.cf.taste.impl.recommender.ByValueRecommendedItemComparator;
 import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.HadoopUtil;
 import org.apache.mahout.common.iterator.FileLineIterable;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.VarLongWritable;
@@ -37,12 +35,8 @@ import org.apache.mahout.math.function.D
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
+import java.util.Comparator;
 import java.util.Iterator;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.Queue;
 
 /**
  * <p>computes prediction values for each user</p>
@@ -70,31 +64,27 @@ public final class AggregateAndRecommend
   private OpenIntLongHashMap indexItemIDMap;
 
   private static final float BOOLEAN_PREF_VALUE = 1.0f;
+  private static final Comparator<RecommendedItem> BY_PREFERENCE_VALUE =
+      new Comparator<RecommendedItem>() {
+        @Override
+        public int compare(RecommendedItem one, RecommendedItem two) {
+          return one.getValue() == two.getValue() ? 0 : one.getValue() > two.getValue() ? 1 : -1;
+        }
+      };
 
   @Override
   protected void setup(Context context) throws IOException {
-    Configuration jobConf = context.getConfiguration();
-    recommendationsPerUser = jobConf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
-    booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
-    indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(jobConf.get(ITEMID_INDEX_PATH), jobConf);
-
-    FSDataInputStream in = null;
-    try {
-      String itemFilePathString = jobConf.get(ITEMS_FILE);
-      if (itemFilePathString == null) {
-        itemsToRecommendFor = null;
-      } else {
-        Path unqualifiedItemsFilePath = new Path(itemFilePathString);
-        FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
-        itemsToRecommendFor = new FastIDSet();
-        Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
-        in = fs.open(itemsFilePath);
-        for (String line : new FileLineIterable(in)) {
-          itemsToRecommendFor.add(Long.parseLong(line));
-        }
+    Configuration conf = context.getConfiguration();
+    recommendationsPerUser = conf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
+    booleanData = conf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
+    indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(conf.get(ITEMID_INDEX_PATH), conf);
+
+    String itemFilePathString = conf.get(ITEMS_FILE);
+    if (itemFilePathString != null) {
+      itemsToRecommendFor = new FastIDSet();
+      for (String line : new FileLineIterable(HadoopUtil.openStream(new Path(itemFilePathString), conf))) {
+        itemsToRecommendFor.add(Long.parseLong(line));
       }
-    } finally {
-      IOUtils.closeStream(in);
     }
   }
 
@@ -182,35 +172,25 @@ public final class AggregateAndRecommend
    * find the top entries in recommendationVector, map them to the real itemIDs and write back the result
    */
   private void writeRecommendedItems(VarLongWritable userID, Vector recommendationVector, Context context)
-    throws IOException, InterruptedException {
-    Queue<RecommendedItem> topItems =
-        new PriorityQueue<RecommendedItem>(recommendationsPerUser + 1,
-                                           Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance()));
+      throws IOException, InterruptedException {
+
+    TopK<RecommendedItem> topKItems = new TopK<RecommendedItem>(recommendationsPerUser, BY_PREFERENCE_VALUE);
 
     Iterator<Vector.Element> recommendationVectorIterator = recommendationVector.iterateNonZero();
     while (recommendationVectorIterator.hasNext()) {
       Vector.Element element = recommendationVectorIterator.next();
       int index = element.index();
-
       long itemID = indexItemIDMap.get(index);
       if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) {
         float value = (float) element.get();
         if (!Float.isNaN(value)) {
-          if (topItems.size() < recommendationsPerUser) {
-            topItems.add(new GenericRecommendedItem(itemID, value));
-          } else if (value > topItems.peek().getValue()) {
-            topItems.add(new GenericRecommendedItem(itemID, value));
-            topItems.poll();
-          }
+          topKItems.offer(new GenericRecommendedItem(itemID, value));
         }
       }
     }
 
-    if (!topItems.isEmpty()) {
-      List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size());
-      recommendations.addAll(topItems);
-      Collections.sort(recommendations, ByValueRecommendedItemComparator.getInstance());
-      context.write(userID, new RecommendedItemsWritable(recommendations));
+    if (!topKItems.isEmpty()) {
+      context.write(userID, new RecommendedItemsWritable(topKItems.retrieve()));
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Thu Jun  2 09:07:49 2011
@@ -18,7 +18,6 @@
 package org.apache.mahout.cf.taste.hadoop.item;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
@@ -26,7 +25,6 @@ import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
@@ -135,7 +133,6 @@ public final class RecommenderJob extend
 
     Path inputPath = getInputPath();
     Path outputPath = getOutputPath();
-    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
     int numRecommendations = Integer.parseInt(parsedArgs.get("--numRecommendations"));
     String usersFile = parsedArgs.get("--usersFile");
     String itemsFile = parsedArgs.get("--itemsFile");
@@ -147,15 +144,15 @@ public final class RecommenderJob extend
     int maxCooccurrencesPerItem = Integer.parseInt(parsedArgs.get("--maxCooccurrencesPerItem"));
     String similarityClassname = parsedArgs.get("--similarityClassname");
 
-    Path userVectorPath = new Path(tempDirPath, "userVectors");
-    Path itemIDIndexPath = new Path(tempDirPath, "itemIDIndex");
-    Path countUsersPath = new Path(tempDirPath, "countUsers");
-    Path itemUserMatrixPath = new Path(tempDirPath, "itemUserMatrix");
-    Path similarityMatrixPath = new Path(tempDirPath, "similarityMatrix");
-    Path prePartialMultiplyPath1 = new Path(tempDirPath, "prePartialMultiply1");
-    Path prePartialMultiplyPath2 = new Path(tempDirPath, "prePartialMultiply2");
-    Path explicitFilterPath = new Path(tempDirPath, "explicitFilterPath");
-    Path partialMultiplyPath = new Path(tempDirPath, "partialMultiply");
+    Path userVectorPath = getTempPath("userVectors");
+    Path itemIDIndexPath = getTempPath("itemIDIndex");
+    Path countUsersPath = getTempPath("countUsers");
+    Path itemUserMatrixPath = getTempPath("itemUserMatrix");
+    Path similarityMatrixPath = getTempPath("similarityMatrix");
+    Path prePartialMultiplyPath1 = getTempPath("prePartialMultiply1");
+    Path prePartialMultiplyPath2 = getTempPath("prePartialMultiply2");
+    Path explicitFilterPath = getTempPath("explicitFilterPath");
+    Path partialMultiplyPath = getTempPath("partialMultiply");
 
     AtomicInteger currentPhase = new AtomicInteger();
 
@@ -224,7 +221,7 @@ public final class RecommenderJob extend
           "--numberOfColumns", String.valueOf(numberOfUsers),
           "--similarityClassname", similarityClassname,
           "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem + 1),
-          "--tempDir", tempDirPath.toString() });
+          "--tempDir", getTempPath().toString() });
       } catch (Exception e) {
         throw new IllegalStateException("item-item-similarity computation failed", e);
       }
@@ -255,13 +252,7 @@ public final class RecommenderJob extend
           SequenceFileInputFormat.class, Mapper.class, VarIntWritable.class, VectorOrPrefWritable.class,
           ToVectorAndPrefReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
           SequenceFileOutputFormat.class);
-
-      /* necessary to make this job (having a combined input path) work on Amazon S3 */
-      Configuration partialMultiplyConf = partialMultiply.getConfiguration();
-      FileSystem fs = FileSystem.get(tempDirPath.toUri(), partialMultiplyConf);
-      prePartialMultiplyPath1 = prePartialMultiplyPath1.makeQualified(fs);
-      prePartialMultiplyPath2 = prePartialMultiplyPath2.makeQualified(fs);
-      FileInputFormat.setInputPaths(partialMultiply, prePartialMultiplyPath1, prePartialMultiplyPath2);
+      setS3SafeCombinedInputPath(partialMultiply, getTempPath(), prePartialMultiplyPath1, prePartialMultiplyPath2);
       partialMultiply.waitForCompletion(true);
     }
 
@@ -292,11 +283,7 @@ public final class RecommenderJob extend
       }
 
       if (filterFile != null) {
-        /* necessary to make this job (having a combined input path) work on Amazon S3 */
-        FileSystem fs = FileSystem.get(tempDirPath.toUri(), aggregateAndRecommendConf);
-        partialMultiplyPath = partialMultiplyPath.makeQualified(fs);
-        explicitFilterPath = explicitFilterPath.makeQualified(fs);
-        FileInputFormat.setInputPaths(aggregateAndRecommend, partialMultiplyPath, explicitFilterPath);
+        setS3SafeCombinedInputPath(aggregateAndRecommend, getTempPath(), partialMultiplyPath, explicitFilterPath);
       }
       setIOSort(aggregateAndRecommend);
       aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH, itemIDIndexPath.toString());

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java Thu Jun  2 09:07:49 2011
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.common.TopK;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
 import org.apache.mahout.common.iterator.FileLineIterable;
 import org.apache.mahout.math.VarIntWritable;
@@ -31,6 +32,7 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 
 import java.io.IOException;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.PriorityQueue;
 
@@ -109,20 +111,20 @@ public final class UserVectorSplitterMap
   }
 
   private float findSmallestLargeValue(Vector userVector) {
-    PriorityQueue<Float> topPrefValues = new PriorityQueue<Float>(maxPrefsPerUserConsidered + 1);
+
+    TopK<Float> topPrefValues = new TopK<Float>(maxPrefsPerUserConsidered, new Comparator<Float>() {
+      @Override
+      public int compare(Float one, Float two) {
+        return one.compareTo(two);
+      }
+    });
+
     Iterator<Vector.Element> it = userVector.iterateNonZero();
     while (it.hasNext()) {
       float absValue = Math.abs((float) it.next().get());
-      if (topPrefValues.size() < maxPrefsPerUserConsidered) {
-        topPrefValues.add(absValue);
-      } else {
-        if (absValue > topPrefValues.peek()) {
-          topPrefValues.add(absValue);
-          topPrefValues.poll();
-        }
-      }
+      topPrefValues.offer(absValue);
     }
-    return topPrefValues.peek();
+    return topPrefValues.smallestGreat();
   }
 
 }
\ No newline at end of file

Added: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java?rev=1130443&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java Thu Jun  2 09:07:49 2011
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import com.google.common.collect.Iterables;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.math.VarLongWritable;
+
+import java.io.IOException;
+
+public class CountUsersCombiner
+    extends Reducer<CountUsersKeyWritable,VarLongWritable,CountUsersKeyWritable,VarLongWritable> {
+
+  @Override
+  protected void reduce(CountUsersKeyWritable key, Iterable<VarLongWritable> values, Context ctx)
+      throws IOException, InterruptedException {
+    /* we only need to see one tuple per user */
+    ctx.write(key, Iterables.get(values, 0));
+  }
+}

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Thu Jun  2 09:07:49 2011
@@ -123,13 +123,12 @@ public final class ItemSimilarityJob ext
 
     Path inputPath = getInputPath();
     Path outputPath = getOutputPath();
-    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
 
-    Path itemIDIndexPath = new Path(tempDirPath, "itemIDIndex");
-    Path countUsersPath = new Path(tempDirPath, "countUsers");
-    Path userVectorPath = new Path(tempDirPath, "userVectors");
-    Path itemUserMatrixPath = new Path(tempDirPath, "itemUserMatrix");
-    Path similarityMatrixPath = new Path(tempDirPath, "similarityMatrix");
+    Path itemIDIndexPath = getTempPath("itemIDIndex");
+    Path countUsersPath = getTempPath("countUsers");
+    Path userVectorPath = getTempPath("userVectors");
+    Path itemUserMatrixPath = getTempPath("itemUserMatrix");
+    Path similarityMatrixPath = getTempPath("similarityMatrix");
 
     AtomicInteger currentPhase = new AtomicInteger();
 
@@ -171,6 +170,7 @@ public final class ItemSimilarityJob ext
                                   VarIntWritable.class,
                                   NullWritable.class,
                                   TextOutputFormat.class);
+      countUsers.setCombinerClass(CountUsersCombiner.class);
       countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
       countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
       countUsers.waitForCompletion(true);
@@ -202,7 +202,7 @@ public final class ItemSimilarityJob ext
       "--numberOfColumns", String.valueOf(numberOfUsers),
       "--similarityClassname", similarityClassName,
       "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem + 1),
-      "--tempDir", tempDirPath.toString() });
+      "--tempDir", getTempPath().toString() });
 
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
       Job mostSimilarItems = prepareJob(similarityMatrixPath,

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsMapper.java Thu Jun  2 09:07:49 2011
@@ -17,10 +17,12 @@
 
 package org.apache.mahout.cf.taste.hadoop.similarity.item;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.mahout.cf.taste.common.TopK;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
 import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.math.Vector;
@@ -28,12 +30,7 @@ import org.apache.mahout.math.VectorWrit
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
 import java.util.Iterator;
-import java.util.List;
-import java.util.PriorityQueue;
-import java.util.Queue;
 
 public final class MostSimilarItemPairsMapper
     extends Mapper<IntWritable,VectorWritable,EntityEntityWritable,DoubleWritable> {
@@ -44,12 +41,10 @@ public final class MostSimilarItemPairsM
   @Override
   protected void setup(Context ctx) {
     Configuration conf = ctx.getConfiguration();
-    //String itemIDIndexPathStr = conf.get(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR);
     maxSimilarItemsPerItem = conf.getInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, -1);
-    if (maxSimilarItemsPerItem < 1) {
-      throw new IllegalStateException("maxSimilarItemsPerItem was not correctly set!");
-    }
     indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(conf.get(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR), conf);
+
+    Preconditions.checkArgument(maxSimilarItemsPerItem > 0, "maxSimilarItemsPerItem was not correctly set!");
   }
 
   @Override
@@ -58,40 +53,28 @@ public final class MostSimilarItemPairsM
 
     int itemIDIndex = itemIDIndexWritable.get();
 
-    Queue<SimilarItem> topMostSimilarItems = new PriorityQueue<SimilarItem>(maxSimilarItemsPerItem + 1,
-        Collections.reverseOrder(SimilarItem.COMPARE_BY_SIMILARITY));
+    TopK<SimilarItem> topKMostSimilarItems =
+        new TopK<SimilarItem>(maxSimilarItemsPerItem, SimilarItem.COMPARE_BY_SIMILARITY);
 
     Iterator<Vector.Element> similarityVectorIterator = similarityVector.get().iterateNonZero();
 
     while (similarityVectorIterator.hasNext()) {
       Vector.Element element = similarityVectorIterator.next();
-      int index = element.index();
-      double value = element.get();
       /* ignore self similarities */
-      if (index != itemIDIndex) {
-        if (topMostSimilarItems.size() < maxSimilarItemsPerItem) {
-          topMostSimilarItems.add(new SimilarItem(indexItemIDMap.get(index), value));
-        } else if (value > topMostSimilarItems.peek().getSimilarity()) {
-          topMostSimilarItems.add(new SimilarItem(indexItemIDMap.get(index), value));
-          topMostSimilarItems.poll();
-        }
+      if (element.index() != itemIDIndex) {
+        topKMostSimilarItems.offer(new SimilarItem(indexItemIDMap.get(element.index()), element.get()));
       }
     }
 
-    if (!topMostSimilarItems.isEmpty()) {
-      List<SimilarItem> mostSimilarItems = new ArrayList<SimilarItem>(topMostSimilarItems.size());
-      mostSimilarItems.addAll(topMostSimilarItems);
-      Collections.sort(mostSimilarItems, SimilarItem.COMPARE_BY_SIMILARITY);
-
-      long itemID = indexItemIDMap.get(itemIDIndex);
-      for (SimilarItem similarItem : mostSimilarItems) {
-        long otherItemID = similarItem.getItemID();
-        if (itemID < otherItemID) {
-          ctx.write(new EntityEntityWritable(itemID, otherItemID), new DoubleWritable(similarItem.getSimilarity()));
-        } else {
-          ctx.write(new EntityEntityWritable(otherItemID, itemID), new DoubleWritable(similarItem.getSimilarity()));
-        }
+    long itemID = indexItemIDMap.get(itemIDIndex);
+    for (SimilarItem similarItem : topKMostSimilarItems.retrieve()) {
+      long otherItemID = similarItem.getItemID();
+      if (itemID < otherItemID) {
+        ctx.write(new EntityEntityWritable(itemID, otherItemID), new DoubleWritable(similarItem.getSimilarity()));
+      } else {
+        ctx.write(new EntityEntityWritable(otherItemID, itemID), new DoubleWritable(similarItem.getSimilarity()));
       }
     }
+
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsReducer.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/MostSimilarItemPairsReducer.java Thu Jun  2 09:07:49 2011
@@ -28,7 +28,7 @@ public class MostSimilarItemPairsReducer
 
   @Override
   protected void reduce(EntityEntityWritable itemIDPair, Iterable<DoubleWritable> values, Context ctx)
-    throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
     ctx.write(itemIDPair, values.iterator().next());
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIterator.java Thu Jun  2 09:07:49 2011
@@ -39,8 +39,7 @@ public final class LongPrimitiveArrayIte
    *          array to iterate over
    */
   public LongPrimitiveArrayIterator(long[] array) {
-    Preconditions.checkArgument(array != null, "array is null");
-    this.array = array; // yeah, not going to copy the array here, for performance
+    this.array = Preconditions.checkNotNull(array); // yeah, not going to copy the array here, for performance
     this.position = 0;
     this.max = array.length;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Thu Jun  2 09:07:49 2011
@@ -155,7 +155,7 @@ public class FileDataModel extends Abstr
    * @see #FileDataModel(File)
    */
   public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS) throws IOException {
-    Preconditions.checkArgument(dataFile != null, "dataFile is null");
+    this.dataFile = Preconditions.checkNotNull(dataFile.getAbsoluteFile());
     if (!dataFile.exists() || dataFile.isDirectory()) {
       throw new FileNotFoundException(dataFile.toString());
     }
@@ -164,7 +164,6 @@ public class FileDataModel extends Abstr
 
     log.info("Creating FileDataModel for file {}", dataFile);
 
-    this.dataFile = dataFile.getAbsoluteFile();
     this.lastModified = dataFile.lastModified();
     this.lastUpdateFileModified = readLastUpdateFileModified();
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java Thu Jun  2 09:07:49 2011
@@ -62,14 +62,13 @@ public class FileIDMigrator extends Abst
 
   public FileIDMigrator(File dataFile, long minReloadIntervalMS) throws FileNotFoundException {
     longToString = new FastByIDMap<String>(100);
-    Preconditions.checkArgument(dataFile != null, "dataFile is null");
+    this.dataFile = Preconditions.checkNotNull(dataFile);
     if (!dataFile.exists() || dataFile.isDirectory()) {
       throw new FileNotFoundException(dataFile.toString());
     }
 
     log.info("Creating FileReadonlyIDMigrator for file {}", dataFile);
 
-    this.dataFile = dataFile;
     this.reloadLock = new ReentrantLock();
     this.lastModified = dataFile.lastModified();
     this.minReloadIntervalMS = minReloadIntervalMS;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java Thu Jun  2 09:07:49 2011
@@ -42,7 +42,7 @@ public final class ConnectionPoolDataSou
   private final DataSource delegate;
   
   public ConnectionPoolDataSource(DataSource underlyingDataSource) {
-    Preconditions.checkArgument(underlyingDataSource != null, "underlyingDataSource is null");
+    Preconditions.checkNotNull(underlyingDataSource);
     ConnectionFactory connectionFactory = new ConfiguringConnectionFactory(underlyingDataSource);
     GenericObjectPool objectPool = new GenericObjectPool();
     objectPool.setTestOnBorrow(false);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ReloadFromJDBCDataModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ReloadFromJDBCDataModel.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ReloadFromJDBCDataModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ReloadFromJDBCDataModel.java Thu Jun  2 09:07:49 2011
@@ -48,8 +48,7 @@ public final class ReloadFromJDBCDataMod
   private final RefreshHelper refreshHelper;
 
   public ReloadFromJDBCDataModel(JDBCDataModel delegate) throws TasteException {
-    Preconditions.checkNotNull(delegate, "Delegate cannot be null");
-    this.delegate = delegate;
+    this.delegate = Preconditions.checkNotNull(delegate);
     refreshHelper = new RefreshHelper(new Callable<Void>() {
       @Override
       public Void call() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java Thu Jun  2 09:07:49 2011
@@ -39,10 +39,8 @@ public abstract class AbstractRecommende
   private final CandidateItemsStrategy candidateItemsStrategy;
   
   protected AbstractRecommender(DataModel dataModel, CandidateItemsStrategy candidateItemsStrategy) {
-    Preconditions.checkArgument(dataModel != null, "dataModel is null");
-
-    this.dataModel = dataModel;
-    this.candidateItemsStrategy = candidateItemsStrategy;
+    this.dataModel = Preconditions.checkNotNull(dataModel);
+    this.candidateItemsStrategy = Preconditions.checkNotNull(candidateItemsStrategy);
   }
 
   protected AbstractRecommender(DataModel dataModel) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java Thu Jun  2 09:07:49 2011
@@ -32,10 +32,10 @@ public final class PreferredItemsNeighbo
   protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) throws TasteException {
     FastIDSet possibleItemsIDs = new FastIDSet();
     for (long itemID : preferredItemIDs) {
-      PreferenceArray prefs2 = dataModel.getPreferencesForItem(itemID);
-      int size2 = prefs2.length();
-      for (int j = 0; j < size2; j++) {
-        possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(prefs2.getUserID(j)));
+      PreferenceArray itemPreferences = dataModel.getPreferencesForItem(itemID);
+      int numUsersPreferringItem = itemPreferences.length();
+      for (int index = 0; index < numUsersPreferringItem; index++) {
+        possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(itemPreferences.getUserID(index)));
       }
     }
     possibleItemsIDs.removeAll(preferredItemIDs);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java Thu Jun  2 09:07:49 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.impl.recommender;
 
+import com.google.common.base.Preconditions;
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
 import org.apache.mahout.cf.taste.model.DataModel;
@@ -60,6 +61,9 @@ public class SamplingCandidateItemsStrat
    * @param userItemCountMultiplier
    */
   public SamplingCandidateItemsStrategy(int defaultMaxPrefsPerItemConsidered, int userItemCountMultiplier) {
+    Preconditions.checkArgument(defaultMaxPrefsPerItemConsidered > 0, "defaultMaxPrefsPerItemConsidered must be " +
+        "greater zero");
+    Preconditions.checkArgument(userItemCountMultiplier > 0, "userItemCountMultiplier must be greater zero");
     this.defaultMaxPrefsPerItemConsidered = defaultMaxPrefsPerItemConsidered;
     this.userItemCountMultiplier = userItemCountMultiplier;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Thu Jun  2 09:07:49 2011
@@ -111,12 +111,11 @@ public final class TreeClusteringRecomme
                                    int numClusters,
                                    double samplingRate) throws TasteException {
     super(dataModel);
-    Preconditions.checkArgument(clusterSimilarity != null, "clusterSimilarity is null");
     Preconditions.checkArgument(numClusters >= 2, "numClusters must be at least 2");
     Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0,
       "samplingRate is invalid: %f", samplingRate);
     random = RandomUtils.getRandom();
-    this.clusterSimilarity = clusterSimilarity;
+    this.clusterSimilarity = Preconditions.checkNotNull(clusterSimilarity);
     this.numClusters = numClusters;
     this.clusteringThreshold = Double.NaN;
     this.clusteringByThreshold = false;
@@ -170,11 +169,10 @@ public final class TreeClusteringRecomme
                                    double clusteringThreshold,
                                    double samplingRate) throws TasteException {
     super(dataModel);
-    Preconditions.checkArgument(clusterSimilarity != null, "clusterSimilarity is null");
     Preconditions.checkArgument(!Double.isNaN(clusteringThreshold), "clusteringThreshold must not be NaN");
     Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0, "samplingRate is invalid: %f", samplingRate);
     random = RandomUtils.getRandom();
-    this.clusterSimilarity = clusterSimilarity;
+    this.clusterSimilarity = Preconditions.checkNotNull(clusterSimilarity);
     this.numClusters = Integer.MIN_VALUE;
     this.clusteringThreshold = clusteringThreshold;
     this.clusteringByThreshold = true;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender2.java Thu Jun  2 09:07:49 2011
@@ -96,9 +96,8 @@ public final class TreeClusteringRecomme
   public TreeClusteringRecommender2(DataModel dataModel, ClusterSimilarity clusterSimilarity, int numClusters)
     throws TasteException {
     super(dataModel);
-    Preconditions.checkArgument(clusterSimilarity != null, "clusterSimilarity is null");
     Preconditions.checkArgument(numClusters >= 2, "numClusters must be at least 2");
-    this.clusterSimilarity = clusterSimilarity;
+    this.clusterSimilarity = Preconditions.checkNotNull(clusterSimilarity);
     this.numClusters = numClusters;
     this.clusteringThreshold = Double.NaN;
     this.clusteringByThreshold = false;
@@ -130,9 +129,8 @@ public final class TreeClusteringRecomme
                                     ClusterSimilarity clusterSimilarity,
                                     double clusteringThreshold) throws TasteException {
     super(dataModel);
-    Preconditions.checkArgument(clusterSimilarity != null, "clusterSimilarity is null");
     Preconditions.checkArgument(!Double.isNaN(clusteringThreshold), "clusteringThreshold must not be NaN");
-    this.clusterSimilarity = clusterSimilarity;
+    this.clusterSimilarity = Preconditions.checkNotNull(clusterSimilarity);
     this.numClusters = Integer.MIN_VALUE;
     this.clusteringThreshold = clusteringThreshold;
     this.clusteringByThreshold = true;

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java Thu Jun  2 09:07:49 2011
@@ -41,8 +41,7 @@ public final class SpearmanCorrelationSi
   private final DataModel dataModel;
   
   public SpearmanCorrelationSimilarity(DataModel dataModel) {
-    Preconditions.checkArgument(dataModel != null, "dataModel is null");
-    this.dataModel = dataModel;
+    this.dataModel = Preconditions.checkNotNull(dataModel);
   }
   
   @Override

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/InverseUserFrequency.java Thu Jun  2 09:07:49 2011
@@ -71,9 +71,8 @@ public final class InverseUserFrequency 
    *           if dataModel is {@code null} or logBase is {@link Double#NaN} or &lt;= 1.0
    */
   public InverseUserFrequency(DataModel dataModel, double logBase) throws TasteException {
-    Preconditions.checkArgument(dataModel != null, "dataModel is null");
     Preconditions.checkArgument(logBase > 1.0, "logBase should be > 1.0");
-    this.dataModel = dataModel;
+    this.dataModel = Preconditions.checkNotNull(dataModel);
     this.logBase = logBase;
     this.iufFactors = new FastByIDMap<Double>();
     this.refreshHelper = new RefreshHelper(new Callable<Object>() {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/ZScore.java Thu Jun  2 09:07:49 2011
@@ -19,6 +19,7 @@ package org.apache.mahout.cf.taste.impl.
 
 import java.util.Collection;
 
+import com.google.common.base.Preconditions;
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.Cache;
@@ -50,7 +51,7 @@ public final class ZScore implements Pre
   private final Cache<Long,RunningAverageAndStdDev> meanAndStdevs;
   
   public ZScore(DataModel dataModel) {
-    this.dataModel = dataModel;
+    this.dataModel = Preconditions.checkNotNull(dataModel);
     this.meanAndStdevs = new Cache<Long,RunningAverageAndStdDev>(new MeanStdevRetriever());
     refresh(null);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/discriminative/WinnowTrainer.java Thu Jun  2 09:07:49 2011
@@ -32,8 +32,7 @@ public class WinnowTrainer extends Linea
   /** Promotion step to multiply weights with on update. */
   private final double promotionStep;
   
-  public WinnowTrainer(int dimension, double promotionStep,
-                       double threshold, double init, double initBias) {
+  public WinnowTrainer(int dimension, double promotionStep, double threshold, double init, double initBias) {
     super(dimension, threshold, init, initBias);
     this.promotionStep = promotionStep;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java Thu Jun  2 09:07:49 2011
@@ -27,7 +27,8 @@ import org.apache.mahout.math.Vector.Ele
  * Class implementing the Naive Bayes Classifier Algorithm
  * 
  */
-public abstract class AbstractNaiveBayesClassifier extends AbstractVectorClassifier { 
+public abstract class AbstractNaiveBayesClassifier extends AbstractVectorClassifier {
+
   private final NaiveBayesModel model;
   
   protected AbstractNaiveBayesClassifier(NaiveBayesModel model) {
@@ -44,8 +45,7 @@ public abstract class AbstractNaiveBayes
     double result = 0.0;
     Iterator<Element> it = instance.iterateNonZero();
     while (it.hasNext()) {
-      Element e = it.next();
-      result +=  getScoreForLabelFeature(label, e.index());
+      result +=  getScoreForLabelFeature(label, it.next().index());
     }
     return result;
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/NaiveBayesModel.java Thu Jun  2 09:07:49 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.naivebayes;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
@@ -184,40 +185,15 @@ public class NaiveBayesModel {
       return; // empty models are valid
     }
 
-    if (model.getAlphaI() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: AlphaI has to be greater than 0!");
-    }
-
-    if (model.getVocabCount() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: The vocab count has to be greater than 0!");
-    }
-
-    if (model.getVocabCount() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: The vocab count has to be greater than 0!");
-    }
-    
-    if (model.getTotalSum() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: The vocab count has to be greater than 0!");
-    }    
-
-    if (model.getLabelSum() == null || model.getLabelSum().getNumNondefaultElements() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: The number of labels has to be greater than 0 or defined!");
-    }  
-    
-    if (model.getPerlabelThetaNormalizer() == null
-        || model.getPerlabelThetaNormalizer().getNumNondefaultElements() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: The number of theta normalizers has to be greater than 0 or defined!");
-    }
-    
-    if (model.getFeatureSum() == null || model.getFeatureSum().getNumNondefaultElements() <= 0) {
-      throw new IllegalArgumentException(
-          "Error: The number of features has to be greater than 0 or defined!");
-    }
+    Preconditions.checkArgument(model.getAlphaI() > 0, "Error: AlphaI has to be greater than 0!");
+    Preconditions.checkArgument(model.getVocabCount() > 0, "Error: The vocab count has to be greater than 0!");
+    Preconditions.checkArgument(model.getTotalSum() > 0, "Error: The vocab count has to be greater than 0!");
+    Preconditions.checkArgument(model.getLabelSum() != null && model.getLabelSum().getNumNondefaultElements() > 0,
+        "Error: The number of labels has to be greater than 0 and defined!");
+    Preconditions.checkArgument(model.getPerlabelThetaNormalizer() != null &&
+        model.getPerlabelThetaNormalizer().getNumNondefaultElements() > 0,
+        "Error: The number of theta normalizers has to be greater than 0 or defined!");
+    Preconditions.checkArgument(model.getFeatureSum() != null && model.getFeatureSum().getNumNondefaultElements() > 0,
+        "Error: The number of features has to be greater than 0 or defined!");
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesInstanceMapper.java Thu Jun  2 09:07:49 2011
@@ -20,6 +20,7 @@ package org.apache.mahout.classifier.nai
 import java.io.IOException;
 import java.net.URI;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.Path;
@@ -51,9 +52,8 @@ public class NaiveBayesInstanceMapper ex
     super.setup(context);
     Configuration conf = context.getConfiguration();
     URI[] localFiles = DistributedCache.getCacheFiles(conf);
-    if (localFiles == null || localFiles.length < 1) {
-      throw new IllegalArgumentException("missing paths from the DistributedCache");
-    }
+    Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
+        "missing paths from the DistributedCache");
     Path labelMapFile = new Path(localFiles[0].getPath());
     // key is word value is id
     for (Pair<Writable,IntWritable> record

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesThetaMapper.java Thu Jun  2 09:07:49 2011
@@ -20,6 +20,7 @@ package org.apache.mahout.classifier.nai
 import java.io.IOException;
 import java.net.URI;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.filecache.DistributedCache;
 import org.apache.hadoop.fs.Path;
@@ -56,9 +57,9 @@ public class NaiveBayesThetaMapper exten
     super.setup(context);
     Configuration conf = context.getConfiguration();
     URI[] localFiles = DistributedCache.getCacheFiles(conf);
-    if (localFiles == null || localFiles.length < 2) {
-      throw new IllegalArgumentException("missing paths from the DistributedCache");
-    }
+    Preconditions.checkArgument(localFiles != null && localFiles.length >= 2,
+        "missing paths from the DistributedCache");
+
     alphaI = conf.getFloat(NaiveBayesTrainer.ALPHA_I, 1.0f);
     Path weightFile = new Path(localFiles[0].getPath());
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/trainer/NaiveBayesTrainer.java Thu Jun  2 09:07:49 2011
@@ -59,7 +59,7 @@ public final class NaiveBayesTrainer {
                                       int numReducers,
                                       float alphaI,
                                       boolean trainComplementary)
-    throws IOException, InterruptedException, ClassNotFoundException {
+      throws IOException, InterruptedException, ClassNotFoundException {
     conf.setFloat(ALPHA_I, alphaI);
     Path labelMapPath = createLabelMapFile(inputLabels, conf, new Path(output, LABEL_MAP));
     Path classVectorPath =  new Path(output, CLASS_VECTORS);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/ModelSerializer.java Thu Jun  2 09:07:49 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.classifier.sgd;
 
+import com.google.common.io.Closeables;
 import org.apache.hadoop.io.Writable;
 
 import java.io.DataInputStream;
@@ -39,7 +40,7 @@ public final class ModelSerializer {
     try {
       PolymorphicWritable.write(out, model);
     } finally {
-      out.close();
+      Closeables.close(out, false);
     }
   }
 
@@ -48,7 +49,7 @@ public final class ModelSerializer {
     try {
       PolymorphicWritable.write(out, model);
     } finally {
-      out.close();
+      Closeables.close(out, false);
     }
   }
 
@@ -57,7 +58,7 @@ public final class ModelSerializer {
     try {
       PolymorphicWritable.write(out, model);
     } finally {
-      out.close();
+      Closeables.close(out, false);
     }
   }
 
@@ -66,7 +67,7 @@ public final class ModelSerializer {
     try {
       return PolymorphicWritable.read(dataIn, clazz);
     } finally {
-      dataIn.close();
+      Closeables.close(in, false);
     }
   }
 

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/PolymorphicWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/PolymorphicWritable.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/PolymorphicWritable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/sgd/PolymorphicWritable.java Thu Jun  2 09:07:49 2011
@@ -38,7 +38,7 @@ public final class PolymorphicWritable<T
 
   public static <T extends Writable> T read(DataInput dataInput, Class<? extends T> clazz) throws IOException {
     String className = dataInput.readUTF();
-    T r = null;
+    T r;
     try {
       r = Class.forName(className).asSubclass(clazz).newInstance();
     } catch (InstantiationException e) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java Thu Jun  2 09:07:49 2011
@@ -19,6 +19,7 @@ package org.apache.mahout.clustering;
 import java.io.IOException;
 import java.util.Iterator;
 
+import com.google.common.io.Closeables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -124,11 +125,13 @@ public class ClusterIterator {
   private static void writeClassifier(ClusterClassifier classifier, Path outPath, String k) throws IOException {
     Configuration config = new Configuration();
     FileSystem fs = FileSystem.get(outPath.toUri(), config);
-    SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, outPath,
-        Text.class, ClusterClassifier.class);
-    Writable key = new Text(k);
-    writer.append(key, classifier);
-    writer.close();
+    SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, outPath, Text.class, ClusterClassifier.class);
+    try {
+      Writable key = new Text(k);
+      writer.append(key, classifier);
+    } finally {
+      Closeables.close(writer, false);
+    }
   }
   
   private static ClusterClassifier readClassifier(Path inPath) throws IOException {
@@ -137,8 +140,11 @@ public class ClusterIterator {
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, inPath, config);
     Writable key = new Text();
     ClusterClassifier classifierOut = new ClusterClassifier();
-    reader.next(key, classifierOut);
-    reader.close();
+    try {
+      reader.next(key, classifierOut);
+    } finally {
+      Closeables.close(reader, false);
+    }
     return classifierOut;
   }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/AbstractJob.java Thu Jun  2 09:07:49 2011
@@ -34,6 +34,7 @@ import org.apache.commons.cli2.builder.G
 import org.apache.commons.cli2.commandline.Parser;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.InputFormat;
@@ -42,6 +43,7 @@ import org.apache.hadoop.mapreduce.JobCo
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.util.Tool;
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.slf4j.Logger;
@@ -91,6 +93,9 @@ public abstract class AbstractJob extend
   /** output path, populated by {@link #parseArguments(String[]) */
   private Path outputPath;
 
+  /** temp path, populated by {@link #parseArguments(String[]) */
+  private Path tempPath;
+
   private Map<String, String> argMap;
 
   /** internal list of options that have been added */
@@ -118,6 +123,14 @@ public abstract class AbstractJob extend
     return outputPath;
   }
 
+  protected Path getTempPath() {
+    return tempPath;
+  }
+
+  protected Path getTempPath(String directory) {
+    return new Path(tempPath, directory);
+  }
+
   /** Add an option with no argument whose presence can be checked for using
    *  {@code containsKey} method on the map returned by {@link #parseArguments(String[])};
    */
@@ -284,6 +297,8 @@ public abstract class AbstractJob extend
     argMap = new TreeMap<String, String>();
     maybePut(argMap, cmdLine, this.options.toArray(new Option[this.options.size()]));
 
+    this.tempPath = new Path(argMap.get("--tempDir").toString());
+
     log.info("Command line arguments: {}", argMap);
     return argMap;
   }
@@ -433,4 +448,20 @@ public abstract class AbstractJob extend
     return name.toString();
   }
 
+  /**
+   * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is obsolete when MultipleInputs is available
+   * again
+   *
+   * @param job
+   * @param referencePath
+   * @param inputPathOne
+   * @param inputPathTwo
+   * @throws IOException
+   */
+  public void setS3SafeCombinedInputPath(Job job, Path referencePath, Path inputPathOne, Path inputPathTwo)
+      throws IOException {
+    FileSystem fs = FileSystem.get(referencePath.toUri(), job.getConfiguration());
+    FileInputFormat.setInputPaths(job, inputPathOne.makeQualified(fs), inputPathTwo.makeQualified(fs));
+  }
+
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Thu Jun  2 09:07:49 2011
@@ -18,6 +18,7 @@
 package org.apache.mahout.common;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Arrays;
 import java.util.Iterator;
 
@@ -62,4 +63,9 @@ public final class HadoopUtil {
     return count;
   }
 
+  public static InputStream openStream(Path path, Configuration conf) throws IOException {
+    FileSystem fs = FileSystem.get(path.toUri(), conf);
+    return fs.open(path.makeQualified(fs));
+  }
+
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/IOUtils.java Thu Jun  2 09:07:49 2011
@@ -27,6 +27,7 @@ import java.sql.Statement;
 import java.util.Collection;
 
 import com.google.common.base.Preconditions;
+import com.google.common.io.Closeables;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,13 +45,7 @@ public final class IOUtils {
   public static void quietClose(Closeable... closeables) {
     Preconditions.checkNotNull(closeables, "Closables cannot be null");
     for (Closeable closeable : closeables) {
-      if (closeable != null) {
-        try {
-          closeable.close();
-        } catch (IOException ioe) {
-          log.warn("Unexpected exception while closing; continuing", ioe);
-        }
-      }
+      Closeables.closeQuietly(closeable);
     }
   }
   

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/FileLineIterable.java Thu Jun  2 09:07:49 2011
@@ -24,6 +24,11 @@ import java.nio.charset.Charset;
 import java.util.Iterator;
 
 import com.google.common.base.Charsets;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.common.HadoopUtil;
 
 /**
  * Iterable representing the lines of a text file. It can produce an {@link Iterator} over those lines. This
@@ -42,7 +47,7 @@ public final class FileLineIterable impl
   public FileLineIterable(File file) throws IOException {
     this(file, Charsets.UTF_8, false);
   }
-  
+
   /** Creates a  over a given file, assuming a UTF-8 encoding. */
   public FileLineIterable(File file, boolean skipFirstLine) throws IOException {
     this(file, Charsets.UTF_8, skipFirstLine);
@@ -52,7 +57,7 @@ public final class FileLineIterable impl
   public FileLineIterable(File file, Charset encoding, boolean skipFirstLine) throws IOException {
     this(FileLineIterator.getFileInputStream(file), encoding, skipFirstLine);
   }
-  
+
   public FileLineIterable(InputStream is) {
     this(is, Charsets.UTF_8, false);
   }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java?rev=1130443&r1=1130442&r2=1130443&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/RowSimilarityJob.java Thu Jun  2 09:07:49 2011
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import com.google.common.base.Preconditions;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
@@ -119,10 +120,9 @@ public class RowSimilarityJob extends Ab
 
     Path inputPath = getInputPath();
     Path outputPath = getOutputPath();
-    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
 
-    Path weightsPath = new Path(tempDirPath, "weights");
-    Path pairwiseSimilarityPath = new Path(tempDirPath, "pairwiseSimilarity");
+    Path weightsPath = getTempPath("weights");
+    Path pairwiseSimilarityPath = getTempPath("pairwiseSimilarity");
 
     AtomicInteger currentPhase = new AtomicInteger();
 
@@ -137,7 +137,6 @@ public class RowSimilarityJob extends Ab
                                VarIntWritable.class,
                                WeightedOccurrenceArray.class,
                                SequenceFileOutputFormat.class);
-
       weights.getConfiguration().set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
       weights.waitForCompletion(true);
     }
@@ -171,7 +170,6 @@ public class RowSimilarityJob extends Ab
                                IntWritable.class,
                                VectorWritable.class,
                                SequenceFileOutputFormat.class);
-      asMatrix.setPartitionerClass(HashPartitioner.class);
       asMatrix.setGroupingComparatorClass(SimilarityMatrixEntryKey.SimilarityMatrixEntryKeyGroupingComparator.class);
       asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
       asMatrix.waitForCompletion(true);
@@ -284,7 +282,7 @@ public class RowSimilarityJob extends Ab
    * computes the pairwise similarities
    */
   public static class SimilarityReducer
-      extends Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey, DistributedRowMatrix.MatrixEntryWritable> {
+      extends Reducer<WeightedRowPair,Cooccurrence,SimilarityMatrixEntryKey,DistributedRowMatrix.MatrixEntryWritable> {
 
     private DistributedVectorSimilarity similarity;
     private int numberOfColumns;
@@ -294,9 +292,8 @@ public class RowSimilarityJob extends Ab
       super.setup(ctx);
       similarity = instantiateSimilarity(ctx.getConfiguration().get(DISTRIBUTED_SIMILARITY_CLASSNAME));
       numberOfColumns = ctx.getConfiguration().getInt(NUMBER_OF_COLUMNS, -1);
-      if (numberOfColumns < 1) {
-        throw new IllegalStateException("Number of columns was not correctly set!");
-      }
+
+      Preconditions.checkArgument(numberOfColumns > 0, "Number of columns was not correctly set!");
     }
 
     @Override
@@ -341,9 +338,9 @@ public class RowSimilarityJob extends Ab
     protected void setup(Context ctx) throws IOException, InterruptedException {
       super.setup(ctx);
       maxSimilaritiesPerRow = ctx.getConfiguration().getInt(MAX_SIMILARITIES_PER_ROW, -1);
-      if (maxSimilaritiesPerRow < 1) {
-        throw new IllegalStateException("Maximum number of similarities per row was not correctly set!");
-      }
+
+      Preconditions.checkArgument(maxSimilaritiesPerRow > 0, "Maximum number of similarities per row was not " +
+          "correctly set!");
     }
 
     @Override



Mime
View raw message