mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r712399 [1/2] - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/impl/common/ main/java/org/apache/mahout/cf/taste/impl/neighborhood/ main/java/org/apache/mahout/cf/taste/impl/recommender/ main/java/org/apache/mahout/c...
Date Sat, 08 Nov 2008 14:39:42 GMT
Author: srowen
Date: Sat Nov  8 06:39:41 2008
New Revision: 712399

URL: http://svn.apache.org/viewvc?rev=712399&view=rev
Log:
Rename all remaining instances of 'correlation' to 'similarity'. Use PriorityQueue in top-items implementations.

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java
      - copied, changed from r712103, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/CorrelationTransform.java
Removed:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/CorrelationTransform.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastSet.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommenderTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarityTest.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplificationTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Sat Nov  8 06:39:41 2008
@@ -62,7 +62,7 @@
    * Creates a new {@link FastMap} with default capacity.
    */
   public FastMap() {
-    this(11, NO_MAX_SIZE);
+    this(5, NO_MAX_SIZE);
   }
 
   public FastMap(int size) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastSet.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastSet.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastSet.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastSet.java Sat Nov  8 06:39:41 2008
@@ -53,7 +53,7 @@
    * Creates a new {@link FastSet} with default capacity.
    */
   public FastSet() {
-    this(11);
+    this(5);
   }
 
   public FastSet(Collection<? extends K> c) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java Sat Nov  8 06:39:41 2008
@@ -56,7 +56,7 @@
     this.refreshHelper.addDependency(this.userSimilarity);
   }
 
-  final UserSimilarity getUserCorrelation() {
+  final UserSimilarity getUserSimilarity() {
     return userSimilarity;
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java Sat Nov  8 06:39:41 2008
@@ -21,16 +21,13 @@
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.recommender.TopItems;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.LinkedList;
 import java.util.List;
-import java.util.ListIterator;
 
 /**
  * <p>Computes a neighborhood consisting of the nearest n {@link User}s to a given {@link User}.
@@ -46,7 +43,7 @@
    * @param n neighborhood size
    * @param userSimilarity nearness metric
    * @param dataModel data model
-   * @throws IllegalArgumentException if n &lt; 1, or userCorrelation or dataModel are <code>null</code>
+   * @throws IllegalArgumentException if n &lt; 1, or userSimilarity or dataModel are <code>null</code>
    */
   public NearestNUserNeighborhood(int n,
                                   UserSimilarity userSimilarity,
@@ -61,7 +58,7 @@
    * @param samplingRate percentage of users to consider when building neighborhood -- decrease to
    * trade quality for performance
    * @throws IllegalArgumentException if n &lt; 1 or samplingRate is NaN or not in (0,1],
-   * or userCorrelation or dataModel are <code>null</code>
+   * or userSimilarity or dataModel are <code>null</code>
    */
   public NearestNUserNeighborhood(int n,
                                   UserSimilarity userSimilarity,
@@ -79,36 +76,11 @@
 
     DataModel dataModel = getDataModel();
     User theUser = dataModel.getUser(userID);
-    UserSimilarity userSimilarityImpl = getUserCorrelation();
+    UserSimilarity userSimilarityImpl = getUserSimilarity();
 
-    LinkedList<UserCorrelationPair> queue = new LinkedList<UserCorrelationPair>();
-    boolean full = false;
-    for (User user : dataModel.getUsers()) {
-      if (sampleForUser() && !userID.equals(user.getID())) {
-        double theCorrelation = userSimilarityImpl.userSimilarity(theUser, user);
-        if (!Double.isNaN(theCorrelation) && (!full || theCorrelation > queue.getLast().theCorrelation)) {
-          ListIterator<UserCorrelationPair> iterator = queue.listIterator(queue.size());
-          while (iterator.hasPrevious()) {
-            if (theCorrelation <= iterator.previous().theCorrelation) {
-              iterator.next();
-              break;
-            }
-          }
-          iterator.add(new UserCorrelationPair(user, theCorrelation));
-          if (full) {
-            queue.removeLast();
-          } else if (queue.size() > n) {
-            full = true;
-            queue.removeLast();
-          }
-        }
-      }
-    }
+    TopItems.Estimator<User> estimator = new Estimator(userSimilarityImpl, theUser);
 
-    List<User> neighborhood = new ArrayList<User>(queue.size());
-    for (UserCorrelationPair pair : queue) {
-      neighborhood.add(pair.user);
-    }
+    List<User> neighborhood = TopItems.getTopUsers(n, dataModel.getUsers(), null, estimator);
 
     log.trace("UserNeighborhood around user ID '{}' is: {}", userID, neighborhood);
 
@@ -120,34 +92,20 @@
     return "NearestNUserNeighborhood";
   }
 
-  private static final class UserCorrelationPair implements Comparable<UserCorrelationPair> {
-
-    final User user;
-    final double theCorrelation;
-
-    private UserCorrelationPair(User user, double theCorrelation) {
-      this.user = user;
-      this.theCorrelation = theCorrelation;
+  private static class Estimator implements TopItems.Estimator<User> {
+    private final UserSimilarity userSimilarityImpl;
+    private final User theUser;
+
+    public Estimator(UserSimilarity userSimilarityImpl, User theUser) {
+      this.userSimilarityImpl = userSimilarityImpl;
+      this.theUser = theUser;
     }
 
-    @Override
-    public int hashCode() {
-      return user.hashCode() ^ RandomUtils.hashDouble(theCorrelation);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (!(o instanceof UserCorrelationPair)) {
-        return false;
+    public double estimate(User user) throws TasteException {
+      if (user.equals(theUser)) {
+        return Double.NaN;
       }
-      UserCorrelationPair other = (UserCorrelationPair) o;
-      return user.equals(other.user) && theCorrelation == other.theCorrelation;
-    }
-
-    public int compareTo(UserCorrelationPair otherPair) {
-      double otherCorrelation = otherPair.theCorrelation;
-      return theCorrelation > otherCorrelation ? -1 : theCorrelation < otherCorrelation ? 1 : 0;
+      return userSimilarityImpl.userSimilarity(theUser, user);
     }
   }
-
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java Sat Nov  8 06:39:41 2008
@@ -46,7 +46,7 @@
    * @param userSimilarity similarity metric
    * @param dataModel data model
    * @throws IllegalArgumentException if threshold is {@link Double#NaN},
-   * or if samplingRate is not positive and less than or equal to 1.0, or if userCorrelation
+   * or if samplingRate is not positive and less than or equal to 1.0, or if userSimilarity
    * or dataModel are <code>null</code>
    */
   public ThresholdUserNeighborhood(double threshold,
@@ -62,7 +62,7 @@
    * @param samplingRate percentage of users to consider when building neighborhood -- decrease to
    * trade quality for performance
    * @throws IllegalArgumentException if threshold or samplingRate is {@link Double#NaN},
-   * or if samplingRate is not positive and less than or equal to 1.0, or if userCorrelation
+   * or if samplingRate is not positive and less than or equal to 1.0, or if userSimilarity
    * or dataModel are <code>null</code>
    */
   public ThresholdUserNeighborhood(double threshold,
@@ -83,13 +83,13 @@
     User theUser = dataModel.getUser(userID);
     List<User> neighborhood = new ArrayList<User>();
     Iterator<? extends User> users = dataModel.getUsers().iterator();
-    UserSimilarity userSimilarityImpl = getUserCorrelation();
+    UserSimilarity userSimilarityImpl = getUserSimilarity();
 
     while (users.hasNext()) {
       User user = users.next();
       if (sampleForUser() && !userID.equals(user.getID())) {
-        double theCorrelation = userSimilarityImpl.userSimilarity(theUser, user);
-        if (!Double.isNaN(theCorrelation) && theCorrelation >= threshold) {
+        double theSimilarity = userSimilarityImpl.userSimilarity(theUser, user);
+        if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) {
           neighborhood.add(user);
         }
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java Sat Nov  8 06:39:41 2008
@@ -25,7 +25,8 @@
 import java.util.Comparator;
 
 /**
- * <p>A simple {@link org.apache.mahout.cf.taste.recommender.Rescorer} which always returns the original score.</p>
+ * <p>Defines ordering on {@link RecommendedItem} by the rescored value of the recommendations' estimated
+ * preference value, from high to low.</p>
  */
 final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable {
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java Sat Nov  8 06:39:41 2008
@@ -28,9 +28,9 @@
 import java.util.Random;
 
 /**
- * <p>Defines cluster similarity as the <em>smallest</em> correlation between any two
+ * <p>Defines cluster similarity as the <em>smallest</em> similarity between any two
  * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that clusters are close
- * when <em>all pairs</em> of their members have relatively high correlation.</p>
+ * when <em>all pairs</em> of their members have relatively high similarity.</p>
  */
 public final class FarthestNeighborClusterSimilarity implements ClusterSimilarity {
 
@@ -40,17 +40,17 @@
   private final double samplingPercentage;
 
   /**
-   * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.
-   * All user-user correlations are examined.</p>
+   * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link UserSimilarity}.
+   * All user-user similarities are examined.</p>
    */
   public FarthestNeighborClusterSimilarity(UserSimilarity similarity) {
     this(similarity, 1.0);
   }
 
   /**
-   * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.
+   * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link UserSimilarity}.
    * By setting <code>samplingPercentage</code> to a value less than 1.0, this implementation will only examine
-   * that fraction of all user-user correlations between two clusters, increasing performance at the expense
+   * that fraction of all user-user similarities between two clusters, increasing performance at the expense
    * of accuracy.</p>
    */
   public FarthestNeighborClusterSimilarity(UserSimilarity similarity, double samplingPercentage) {
@@ -69,22 +69,22 @@
     if (cluster1.isEmpty() || cluster2.isEmpty()) {
       return Double.NaN;
     }
-    double leastCorrelation = Double.POSITIVE_INFINITY;
+    double leastSimilarity = Double.POSITIVE_INFINITY;
     for (User user1 : cluster1) {
       if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
         for (User user2 : cluster2) {
-          double theCorrelation = similarity.userSimilarity(user1, user2);
-          if (theCorrelation < leastCorrelation) {
-            leastCorrelation = theCorrelation;
+          double theSimilarity = similarity.userSimilarity(user1, user2);
+          if (theSimilarity < leastSimilarity) {
+            leastSimilarity = theSimilarity;
           }
         }
       }
     }
     // We skipped everything? well, at least try comparing the first Users to get some value
-    if (leastCorrelation == Double.POSITIVE_INFINITY) {
+    if (leastSimilarity == Double.POSITIVE_INFINITY) {
       return similarity.userSimilarity(cluster1.iterator().next(), cluster2.iterator().next());
     }
-    return leastCorrelation;
+    return leastSimilarity;
   }
 
   public void refresh(Collection<Refreshable> alreadyRefreshed) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java Sat Nov  8 06:39:41 2008
@@ -46,14 +46,16 @@
  * {@link org.apache.mahout.cf.taste.model.DataModel} and {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity}
  * to produce recommendations. This class represents Taste's support for item-based recommenders.</p>
  *
- * <p>The {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} is the most important point to discuss here. Item-based recommenders
- * are useful because they can take advantage of something to be very fast: they base their computations
- * on item correlation, not user correlation, and item correlation is relatively static. It can be
+ * <p>The {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} is the most important point to discuss here.
+ * Item-based recommenders are useful because they can take advantage of something to be very fast: they base
+ * their computations on item similarity, not user similarity, and item similarity is relatively static. It can be
  * precomputed, instead of re-computed in real time.</p>
  *
- * <p>Thus it's strongly recommended that you use {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity}
- * with pre-computed correlations if you're going to use this class. You can use
- * {@link org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity} too, which computes correlations in real-time,
+ * <p>Thus it's strongly recommended that you use
+ * {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity}
+ * with pre-computed similarities if you're going to use this class. You can use
+ * {@link org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity} too, 
+ * which computes similarities in real-time,
  * but will probably find this painfully slow for large amounts of data.</p>
  */
 public final class GenericItemBasedRecommender extends AbstractRecommender implements ItemBasedRecommender {
@@ -190,21 +192,21 @@
 
   private double doEstimatePreference(User theUser, Item item) throws TasteException {
     double preference = 0.0;
-    double totalCorrelation = 0.0;
+    double totalSimilarity = 0.0;
     Preference[] prefs = theUser.getPreferencesAsArray();
     for (int i = 0; i < prefs.length; i++) {
       Preference pref = prefs[i];
-      double theCorrelation = similarity.itemSimilarity(item, pref.getItem());
-      if (!Double.isNaN(theCorrelation)) {
+      double theSimilarity = similarity.itemSimilarity(item, pref.getItem());
+      if (!Double.isNaN(theSimilarity)) {
         // Why + 1.0? similarity ranges from -1.0 to 1.0, and we want to use it as a simple
         // weight. To avoid negative values, we add 1.0 to put it in
         // the [0.0,2.0] range which is reasonable for weights
-        theCorrelation += 1.0;
-        preference += theCorrelation * pref.getValue();
-        totalCorrelation += theCorrelation;
+        theSimilarity += 1.0;
+        preference += theSimilarity * pref.getValue();
+        totalSimilarity += theSimilarity;
       }
     }
-    return totalCorrelation == 0.0 ? Double.NaN : preference / totalCorrelation;
+    return totalSimilarity == 0.0 ? Double.NaN : preference / totalSimilarity;
   }
 
   private static int getNumPreferences(User theUser) {
@@ -307,8 +309,8 @@
       if (pref == null) {
         return Double.NaN;
       }
-      double correlationValue = similarity.itemSimilarity(recommendedItem, item);
-      return (1.0 + correlationValue) * pref.getValue();
+      double similarityValue = similarity.itemSimilarity(recommendedItem, item);
+      return (1.0 + similarityValue) * pref.getValue();
     }
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java Sat Nov  8 06:39:41 2008
@@ -82,13 +82,7 @@
    */
   public int compareTo(RecommendedItem other) {
     double otherValue = other.getValue();
-    if (value < otherValue) {
-      return 1;
-    } else if (value > otherValue) {
-      return -1;
-    } else {
-      return 0;
-    }
+    return value > otherValue ? -1 : value < otherValue ? 1 : 0;
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java Sat Nov  8 06:39:41 2008
@@ -118,20 +118,13 @@
                                      Rescorer<Pair<User, User>> rescorer) throws TasteException {
     User toUser = getDataModel().getUser(userID);
     TopItems.Estimator<User> estimator = new MostSimilarEstimator(toUser, similarity, rescorer);
-    return doMostSimilarUsers(userID, howMany, estimator);
+    return doMostSimilarUsers(howMany, estimator);
   }
 
-  private List<User> doMostSimilarUsers(Object userID,
-                                        int howMany,
+  private List<User> doMostSimilarUsers(int howMany,
                                         TopItems.Estimator<User> estimator) throws TasteException {
     DataModel model = getDataModel();
-    User toUser = model.getUser(userID);
-    Collection<User> allUsers = new FastSet<User>(model.getNumUsers());
-    for (User user : model.getUsers()) {
-      allUsers.add(user);
-    }
-    allUsers.remove(toUser);
-    return TopItems.getTopUsers(howMany, allUsers, null, estimator);
+    return TopItems.getTopUsers(howMany, model.getUsers(), null, estimator);
   }
 
   private double doEstimatePreference(User theUser, Collection<User> theNeighborhood, Item item)
@@ -140,21 +133,21 @@
       return Double.NaN;
     }
     double preference = 0.0;
-    double totalCorrelation = 0.0;
+    double totalSimilarity = 0.0;
     for (User user : theNeighborhood) {
       if (!user.equals(theUser)) {
         // See GenericItemBasedRecommender.doEstimatePreference() too
         Preference pref = user.getPreferenceFor(item.getID());
         if (pref != null) {
-          double theCorrelation = similarity.userSimilarity(theUser, user) + 1.0;
-          if (!Double.isNaN(theCorrelation)) {
-            preference += theCorrelation * pref.getValue();
-            totalCorrelation += theCorrelation;
+          double theSimilarity = similarity.userSimilarity(theUser, user) + 1.0;
+          if (!Double.isNaN(theSimilarity)) {
+            preference += theSimilarity * pref.getValue();
+            totalSimilarity += theSimilarity;
           }
         }
       }
     }
-    return totalCorrelation == 0.0 ? Double.NaN : preference / totalCorrelation;
+    return totalSimilarity == 0.0 ? Double.NaN : preference / totalSimilarity;
   }
 
   private static Set<Item> getAllOtherItems(Iterable<User> theNeighborhood, User theUser) {
@@ -196,6 +189,10 @@
     }
 
     public double estimate(User user) throws TasteException {
+      // Don't consider the user itself as a possible most similar user
+      if (user.equals(toUser)) {
+        return Double.NaN;
+      }
       Pair<User, User> pair = new Pair<User, User>(toUser, user);
       if (rescorer != null && rescorer.isFiltered(pair)) {
         return Double.NaN;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java Sat Nov  8 06:39:41 2008
@@ -28,9 +28,9 @@
 import java.util.Random;
 
 /**
- * <p>Defines cluster similarity as the <em>largest</em> correlation between any two
+ * <p>Defines cluster similarity as the <em>largest</em> similarity between any two
  * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that clusters are close
- * when <em>some pair</em> of their members has high correlation.</p>
+ * when <em>some pair</em> of their members has high similarity.</p>
  */
 public final class NearestNeighborClusterSimilarity implements ClusterSimilarity {
 
@@ -41,7 +41,7 @@
 
   /**
    * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.
-   * All user-user correlations are examined.</p>
+   * All user-user similarities are examined.</p>
    */
   public NearestNeighborClusterSimilarity(UserSimilarity similarity) {
     this(similarity, 1.0);
@@ -50,7 +50,7 @@
   /**
    * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.
    * By setting <code>samplingPercentage</code> to a value less than 1.0, this implementation will only examine
-   * that fraction of all user-user correlations between two clusters, increasing performance at the expense
+   * that fraction of all user-user similarities between two clusters, increasing performance at the expense
    * of accuracy.</p>
    */
   public NearestNeighborClusterSimilarity(UserSimilarity similarity, double samplingPercentage) {
@@ -69,22 +69,22 @@
     if (cluster1.isEmpty() || cluster2.isEmpty()) {
       return Double.NaN;
     }
-    double greatestCorrelation = Double.NEGATIVE_INFINITY;
+    double greatestSimilarity = Double.NEGATIVE_INFINITY;
     for (User user1 : cluster1) {
       if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
         for (User user2 : cluster2) {
-          double theCorrelation = similarity.userSimilarity(user1, user2);
-          if (theCorrelation > greatestCorrelation) {
-            greatestCorrelation = theCorrelation;
+          double theSimilarity = similarity.userSimilarity(user1, user2);
+          if (theSimilarity > greatestSimilarity) {
+            greatestSimilarity = theSimilarity;
           }
         }
       }
     }
     // We skipped everything? well, at least try comparing the first Users to get some value
-    if (greatestCorrelation == Double.NEGATIVE_INFINITY) {
+    if (greatestSimilarity == Double.NEGATIVE_INFINITY) {
       return similarity.userSimilarity(cluster1.iterator().next(), cluster2.iterator().next());
     }
-    return greatestCorrelation;
+    return greatestSimilarity;
   }
 
   public void refresh(Collection<Refreshable> alreadyRefreshed) {

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java?rev=712399&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java Sat Nov  8 06:39:41 2008
@@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+
+/**
+ * Simply encapuslates a {@link User} and a similarity value.
+ */
+public final class SimilarUser implements Comparable<SimilarUser> {
+
+  private final User user;
+  private final double similarity;
+
+  public SimilarUser(User user, double similarity) {
+    this.user = user;
+    this.similarity = similarity;
+  }
+
+  User getUser() {
+    return user;
+  }
+
+  double getSimilarity() {
+    return similarity;
+  }
+
+  @Override
+  public int hashCode() {
+    return user.hashCode() ^ RandomUtils.hashDouble(similarity);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof SimilarUser)) {
+      return false;
+    }
+    SimilarUser other = (SimilarUser) o;
+    return user.equals(other.user) && similarity == other.similarity;
+  }
+
+  @Override
+  public String toString() {
+    return "SimilarUser[user:" + user + ", similarity:" + similarity + ']';
+  }
+
+  /**
+   * Defines an ordering from most similar to least similar.
+   */
+  public int compareTo(SimilarUser other) {
+    double otherSimilarity = other.similarity;
+    return similarity > otherSimilarity ? -1 : similarity < otherSimilarity ? 1 : 0;
+  }
+
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java Sat Nov  8 06:39:41 2008
@@ -19,21 +19,19 @@
 
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.model.Item;
-import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 import org.apache.mahout.cf.taste.recommender.Rescorer;
 
 import java.util.ArrayList;
-import java.util.LinkedList;
+import java.util.Collections;
 import java.util.List;
-import java.util.ListIterator;
+import java.util.PriorityQueue;
+import java.util.Queue;
 
 /**
- * <p>A simple class that refactors the "find top N recommended items" logic that is used in
- * several places in Taste.</p>
+ * <p>A simple class that refactors the "find top N things" logic that is used in several places.</p>
  */
 public final class TopItems {
 
@@ -47,68 +45,60 @@
     if (allItems == null || estimator == null) {
       throw new IllegalArgumentException("argument is null");
     }
-    LinkedList<RecommendedItem> topItems = new LinkedList<RecommendedItem>();
+    Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(howMany + 1, Collections.reverseOrder());
     boolean full = false;
+    double lowestTopValue = Double.NEGATIVE_INFINITY;
     for (Item item : allItems) {
       if (item.isRecommendable() && (rescorer == null || !rescorer.isFiltered(item))) {
         double preference = estimator.estimate(item);
         double rescoredPref = rescorer == null ? preference : rescorer.rescore(item, preference);
-        if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > topItems.getLast().getValue())) {
-          // I think this is faster than Collections.binarySearch() over a LinkedList since our
-          // comparisons are cheap, which binarySearch() economizes at the expense of more traversals.
-          // We also know that the right position tends to be at the end of the list.
-          ListIterator<RecommendedItem> iterator = topItems.listIterator(topItems.size());
-          while (iterator.hasPrevious()) {
-            if (rescoredPref <= iterator.previous().getValue()) {
-              iterator.next();
-              break;
-            }
-          }
-          iterator.add(new GenericRecommendedItem(item, rescoredPref));
+        if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > lowestTopValue)) {
+          topItems.add(new GenericRecommendedItem(item, rescoredPref));
           if (full) {
-            topItems.removeLast();
+            topItems.poll();
           } else if (topItems.size() > howMany) {
             full = true;
-            topItems.removeLast();
+            topItems.poll();
           }
+          lowestTopValue = topItems.peek().getValue();
         }
       }
     }
-    return topItems;
+    List<RecommendedItem> result = new ArrayList<RecommendedItem>(topItems.size());
+    result.addAll(topItems);
+    Collections.sort(result);
+    return result;
   }
 
   public static List<User> getTopUsers(int howMany,
-                                       Iterable<User> allUsers,
+                                       Iterable<? extends User> allUsers,
                                        Rescorer<User> rescorer,
                                        Estimator<User> estimator) throws TasteException {
-    LinkedList<SimilarUser> topUsers = new LinkedList<SimilarUser>();
+    Queue<SimilarUser> topUsers = new PriorityQueue<SimilarUser>(howMany + 1, Collections.reverseOrder());
     boolean full = false;
+    double lowestTopValue = Double.NEGATIVE_INFINITY;
     for (User user : allUsers) {
       if (rescorer != null && rescorer.isFiltered(user)) {
         continue;
       }
       double similarity = estimator.estimate(user);
       double rescoredSimilarity = rescorer == null ? similarity : rescorer.rescore(user, similarity);
-      if (!Double.isNaN(rescoredSimilarity) &&
-          (!full || rescoredSimilarity > topUsers.getLast().getSimilarity())) {
-        ListIterator<SimilarUser> iterator = topUsers.listIterator(topUsers.size());
-        while (iterator.hasPrevious()) {
-          if (rescoredSimilarity <= iterator.previous().getSimilarity()) {
-            iterator.next();
-            break;
-          }
-        }
-        iterator.add(new SimilarUser(user, similarity));
+      if (!Double.isNaN(rescoredSimilarity) && (!full || rescoredSimilarity > lowestTopValue)) {
+        topUsers.add(new SimilarUser(user, similarity));
         if (full) {
-          topUsers.removeLast();
+          topUsers.poll();
         } else if (topUsers.size() > howMany) {
           full = true;
-          topUsers.removeLast();
+          topUsers.poll();
         }
+        lowestTopValue = topUsers.peek().getSimilarity();
       }
     }
-    List<User> result = new ArrayList<User>(topUsers.size());
-    for (SimilarUser similarUser : topUsers) {
+    List<SimilarUser> sorted = new ArrayList<SimilarUser>(topUsers.size());
+    sorted.addAll(topUsers);
+    Collections.sort(sorted);
+    List<User> result = new ArrayList<User>(sorted.size());
+    for (SimilarUser similarUser : sorted) {
       result.add(similarUser.getUser());
     }
     return result;
@@ -121,32 +111,30 @@
    * @see GenericItemSimilarity#GenericItemSimilarity(org.apache.mahout.cf.taste.similarity.ItemSimilarity,
    *  org.apache.mahout.cf.taste.model.DataModel, int)
    */
-  public static List<GenericItemSimilarity.ItemItemCorrelation> getTopItemItemCorrelations(
-          int howMany, Iterable<GenericItemSimilarity.ItemItemCorrelation> allCorrelations) {
-    LinkedList<GenericItemSimilarity.ItemItemCorrelation> topCorrelations =
-            new LinkedList<GenericItemSimilarity.ItemItemCorrelation>();
+  public static List<GenericItemSimilarity.ItemItemSimilarity> getTopItemItemSimilarities(
+          int howMany, Iterable<GenericItemSimilarity.ItemItemSimilarity> allSimilarities) {
+    Queue<GenericItemSimilarity.ItemItemSimilarity> topSimilarities =
+            new PriorityQueue<GenericItemSimilarity.ItemItemSimilarity>(howMany + 1, Collections.reverseOrder());
     boolean full = false;
-    for (GenericItemSimilarity.ItemItemCorrelation correlation : allCorrelations) {
-      double value = correlation.getValue();
-      if (!full || value > topCorrelations.getLast().getValue()) {
-        ListIterator<GenericItemSimilarity.ItemItemCorrelation> iterator =
-                topCorrelations.listIterator(topCorrelations.size());
-        while (iterator.hasPrevious()) {
-          if (value <= iterator.previous().getValue()) {
-            iterator.next();
-            break;
-          }
-        }
-        iterator.add(correlation);
+    double lowestTopValue = Double.NEGATIVE_INFINITY;
+    for (GenericItemSimilarity.ItemItemSimilarity similarity : allSimilarities) {
+      double value = similarity.getValue();
+      if (!full || value > lowestTopValue) {
+        topSimilarities.add(similarity);
         if (full) {
-          topCorrelations.removeLast();
-        } else if (topCorrelations.size() > howMany) {
+          topSimilarities.poll();
+        } else if (topSimilarities.size() > howMany) {
           full = true;
-          topCorrelations.removeLast();
+          topSimilarities.poll();
         }
+        lowestTopValue = topSimilarities.peek().getValue();
       }
     }
-    return topCorrelations;
+    List<GenericItemSimilarity.ItemItemSimilarity> result =
+      new ArrayList<GenericItemSimilarity.ItemItemSimilarity>(topSimilarities.size());
+    result.addAll(topSimilarities);
+    Collections.sort(result);
+    return result;
   }
 
   public static interface Estimator<T> {
@@ -154,58 +142,4 @@
     double estimate(T thing) throws TasteException;
   }
 
-  // Hmm, should this be exposed publicly like RecommendedItem?
-  private static class SimilarUser implements User {
-
-    private final User user;
-    private final double similarity;
-
-    private SimilarUser(User user, double similarity) {
-      this.user = user;
-      this.similarity = similarity;
-    }
-
-    public Object getID() {
-      return user.getID();
-    }
-
-    public Preference getPreferenceFor(Object itemID) {
-      return user.getPreferenceFor(itemID);
-    }
-
-    public Iterable<Preference> getPreferences() {
-      return user.getPreferences();
-    }
-
-    public Preference[] getPreferencesAsArray() {
-      return user.getPreferencesAsArray();
-    }
-
-    User getUser() {
-      return user;
-    }
-
-    double getSimilarity() {
-      return similarity;
-    }
-
-    @Override
-    public int hashCode() {
-      return user.hashCode() ^ RandomUtils.hashDouble(similarity);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (!(o instanceof SimilarUser)) {
-        return false;
-      }
-      SimilarUser other = (SimilarUser) o;
-      return user.equals(other.user) && similarity == other.similarity;
-    }
-
-    public int compareTo(User user) {
-      return this.user.compareTo(user);
-    }
-  }
-
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Sat Nov  8 06:39:41 2008
@@ -385,8 +385,8 @@
 
     TopItems.Estimator<Item> estimator = new Estimator(cluster);
 
-    List<RecommendedItem> topItems =
-            TopItems.getTopItems(Integer.MAX_VALUE, allItems, null, estimator);
+    // TODO don't hardcode 100, figure out some reasonable value
+    List<RecommendedItem> topItems = TopItems.getTopItems(100, allItems, null, estimator);
 
     log.debug("Recommendations are: {}", topItems);
     return Collections.unmodifiableList(topItems);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/AbstractSimilarity.java Sat Nov  8 06:39:41 2008
@@ -28,7 +28,7 @@
 import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.User;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
+import org.apache.mahout.cf.taste.transforms.SimilarityTransform;
 import org.apache.mahout.cf.taste.transforms.PreferenceTransform;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -38,8 +38,7 @@
 
 /**
  * Abstract superclass encapsulating functionality that is common to most
- * implementations in this package, including the basic correlation algorithm,
- * normalization, transforms, etc.
+ * implementations in this package.
  */
 abstract class AbstractSimilarity implements UserSimilarity, ItemSimilarity {
 
@@ -48,7 +47,7 @@
   private final DataModel dataModel;
   private PreferenceInferrer inferrer;
   private PreferenceTransform prefTransform;
-  private CorrelationTransform<Object> correlationTransform;
+  private SimilarityTransform<Object> similarityTransform;
   private boolean weighted;
   private int cachedNumItems;
   private int cachedNumUsers;
@@ -82,7 +81,7 @@
     this.refreshHelper.addDependency(this.dataModel);
     this.refreshHelper.addDependency(this.inferrer);
     this.refreshHelper.addDependency(this.prefTransform);
-    this.refreshHelper.addDependency(this.correlationTransform);
+    this.refreshHelper.addDependency(this.similarityTransform);
   }
 
   final DataModel getDataModel() {
@@ -108,12 +107,12 @@
     this.prefTransform = prefTransform;
   }
 
-  public final CorrelationTransform<Object> getCorrelationTransform() {
-    return correlationTransform;
+  public final SimilarityTransform<Object> getSimilarityTransform() {
+    return similarityTransform;
   }
 
-  public final void setCorrelationTransform(CorrelationTransform<Object> correlationTransform) {
-    this.correlationTransform = correlationTransform;
+  public final void setSimilarityTransform(SimilarityTransform<Object> similarityTransform) {
+    this.similarityTransform = similarityTransform;
   }
 
   final boolean isWeighted() {
@@ -121,12 +120,12 @@
   }
 
   /**
-   * <p>Several subclasses in this package implement this method to actually compute the correlation
+   * <p>Several subclasses in this package implement this method to actually compute the similarity
    * from figures computed over users or items. Note that the computations in this class "center" the
    * data, such that X and Y's mean are 0.</p>
    *
    * <p>Note that the sum of all X and Y values must then be 0. This value isn't passed down into
-   * the standard correlation computations as a result.</p>
+   * the standard similarity computations as a result.</p>
    *
    * @param n total number of users or items
    * @param sumXY sum of product of user/item preference values, over all items/users prefererred by
@@ -134,7 +133,7 @@
    * @param sumX2 sum of the square of user/item preference values, over the first item/user
    * @param sumY2 sum of the square of the user/item preference values, over the second item/user
    * @param sumXYdiff2 sum of squares of differences in X and Y values
-   * @return correlation value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no correlation
+   * @return similarity value between -1.0 and 1.0, inclusive, or {@link Double#NaN} if no similarity
    *         can be computed (e.g. when no {@link Item}s have been rated by both {@link User}s
    */
   abstract double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2);
@@ -244,8 +243,8 @@
 
     double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
 
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(user1, user2, result);
+    if (similarityTransform != null) {
+      result = similarityTransform.transformSimilarity(user1, user2, result);
     }
 
     if (!Double.isNaN(result)) {
@@ -332,8 +331,8 @@
 
     double result = computeResult(count, centeredSumXY, centeredSumX2, centeredSumY2, sumXYdiff2);
 
-    if (correlationTransform != null) {
-      result = correlationTransform.transformCorrelation(item1, item2, result);
+    if (similarityTransform != null) {
+      result = similarityTransform.transformSimilarity(item1, item2, result);
     }
 
     if (!Double.isNaN(result)) {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingItemSimilarity.java Sat Nov  8 06:39:41 2008
@@ -35,7 +35,7 @@
 public final class CachingItemSimilarity implements ItemSimilarity {
 
   private final ItemSimilarity similarity;
-  private final Cache<Pair<Item, Item>, Double> correlationCache;
+  private final Cache<Pair<Item, Item>, Double> similarityCache;
 
   public CachingItemSimilarity(ItemSimilarity similarity, DataModel dataModel) throws TasteException {
     if (similarity == null) {
@@ -43,7 +43,7 @@
     }
     this.similarity = similarity;
     int maxCacheSize = dataModel.getNumItems(); // just a dumb heuristic for sizing
-    this.correlationCache = new Cache<Pair<Item, Item>, Double>(new CorrelationRetriever(similarity), maxCacheSize);
+    this.similarityCache = new Cache<Pair<Item, Item>, Double>(new SimilarityRetriever(similarity), maxCacheSize);
   }
 
   public double itemSimilarity(Item item1, Item item2) throws TasteException {
@@ -53,21 +53,22 @@
     } else {
       key = new Pair<Item, Item>(item2, item1);
     }
-    return correlationCache.get(key);
+    return similarityCache.get(key);
   }
 
   public void refresh(Collection<Refreshable> alreadyRefreshed) {
-    correlationCache.clear();
+    similarityCache.clear();
     alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
     RefreshHelper.maybeRefresh(alreadyRefreshed, similarity);
   }
 
-  private static final class CorrelationRetriever implements Retriever<Pair<Item, Item>, Double> {
+  private static final class SimilarityRetriever implements Retriever<Pair<Item, Item>, Double> {
     private final ItemSimilarity similarity;
 
-    private CorrelationRetriever(ItemSimilarity similarity) {
+    private SimilarityRetriever(ItemSimilarity similarity) {
       this.similarity = similarity;
     }
+
     public Double get(Pair<Item, Item> key) throws TasteException {
       return similarity.itemSimilarity(key.getFirst(), key.getSecond());
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CachingUserSimilarity.java Sat Nov  8 06:39:41 2008
@@ -36,7 +36,7 @@
 public final class CachingUserSimilarity implements UserSimilarity {
 
   private final UserSimilarity similarity;
-  private final Cache<Pair<User, User>, Double> correlationCache;
+  private final Cache<Pair<User, User>, Double> similarityCache;
 
   public CachingUserSimilarity(UserSimilarity similarity, DataModel dataModel) throws TasteException {
     if (similarity == null) {
@@ -44,7 +44,7 @@
     }
     this.similarity = similarity;
     int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing    
-    this.correlationCache = new Cache<Pair<User, User>, Double>(new CorrelationRetriever(similarity), maxCacheSize);
+    this.similarityCache = new Cache<Pair<User, User>, Double>(new SimilarityRetriever(similarity), maxCacheSize);
   }
 
   public double userSimilarity(User user1, User user2) throws TasteException {
@@ -54,26 +54,27 @@
     } else {
       key = new Pair<User, User>(user2, user1);
     }
-    return correlationCache.get(key);
+    return similarityCache.get(key);
   }
 
   public void setPreferenceInferrer(PreferenceInferrer inferrer) {
-    correlationCache.clear();
+    similarityCache.clear();
     similarity.setPreferenceInferrer(inferrer);
   }
 
   public void refresh(Collection<Refreshable> alreadyRefreshed) {
-    correlationCache.clear();
+    similarityCache.clear();
     alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
     RefreshHelper.maybeRefresh(alreadyRefreshed, similarity);
   }
 
-  private static final class CorrelationRetriever implements Retriever<Pair<User, User>, Double> {
+  private static final class SimilarityRetriever implements Retriever<Pair<User, User>, Double> {
     private final UserSimilarity similarity;
 
-    private CorrelationRetriever(UserSimilarity similarity) {
+    private SimilarityRetriever(UserSimilarity similarity) {
       this.similarity = similarity;
     }
+
     public Double get(Pair<User, User> key) throws TasteException {
       return similarity.userSimilarity(key.getFirst(), key.getSecond());
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Sat Nov  8 06:39:41 2008
@@ -22,12 +22,12 @@
 import org.apache.mahout.cf.taste.model.DataModel;
 
 /**
- * <p>An implementation of a "correlation" based on the Euclidean "distance" between two
+ * <p>An implementation of a "similarity" based on the Euclidean "distance" between two
  * {@link org.apache.mahout.cf.taste.model.User}s X and Y. Thinking of items as dimensions
  * and preferences as points along those dimensions, a distance is computed using all
  * items (dimensions) where both users have expressed a preference for that item. This
  * is simply the square root of the sum of the squares of differences in position (preference)
- * along each dimension. The correlation is then computed as 1 / (1 + distance), so the
+ * along each dimension. The similarity is then computed as 1 / (1 + distance), so the
  * resulting values are in the range (0,1].</p>
  */
 public final class EuclideanDistanceSimilarity extends AbstractSimilarity {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java Sat Nov  8 06:39:41 2008
@@ -35,7 +35,7 @@
 
 /**
  * <p>A "generic" {@link ItemSimilarity} which takes a static list of precomputed {@link Item}
- * correlations and bases its responses on that alone. The values may have been precomputed
+ * similarities and bases its responses on that alone. The values may have been precomputed
  * offline by another process, stored in a file, and then read and fed into an instance of this class.</p>
  *
  * <p>This is perhaps the best {@link ItemSimilarity} to use with
@@ -45,98 +45,104 @@
  */
 public final class GenericItemSimilarity implements ItemSimilarity {
 
-  private final Map<Item, Map<Item, Double>> correlationMaps = new FastMap<Item, Map<Item, Double>>();
+  private final Map<Item, Map<Item, Double>> similarityMaps = new FastMap<Item, Map<Item, Double>>();
 
   /**
-   * <p>Creates a {@link GenericItemSimilarity} from a precomputed list of {@link ItemItemCorrelation}s. Each
-   * represents the correlation between two distinct items. Since correlation is assumed to be symmetric,
-   * it is not necessary to specify correlation between item1 and item2, and item2 and item1. Both are the same.
-   * It is also not necessary to specify a correlation between any item and itself; these are assumed to be 1.0.</p>
+   * <p>Creates a {@link GenericItemSimilarity} from a precomputed list of
+   * {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity}s. Each
+   * represents the similarity between two distinct items. Since similarity is assumed to be symmetric,
+   * it is not necessary to specify similarity between item1 and item2, and item2 and item1. Both are the same.
+   * It is also not necessary to specify a similarity between any item and itself; these are assumed to be 1.0.</p>
    *
-   * <p>Note that specifying a correlation between two items twice is not an error, but, the later value will
+   * <p>Note that specifying a similarity between two items twice is not an error, but, the later value will
    * win.</p>
    *
-   * @param correlations set of {@link ItemItemCorrelation}s on which to base this instance
+   * @param similarities set of
+   *  {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity}s
+   *  on which to base this instance
    */
-  public GenericItemSimilarity(Iterable<ItemItemCorrelation> correlations) {
-    initCorrelationMaps(correlations);
+  public GenericItemSimilarity(Iterable<ItemItemSimilarity> similarities) {
+    initSimilarityMaps(similarities);
   }
 
   /**
-   * <p>Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified number of correlations
-   * from the given {@link Iterable} of correlations. It will keep those with the highest correlation --
+   * <p>Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified number of similarities
+   * from the given {@link Iterable} of similarities. It will keep those with the highest similarity --
    * those that are therefore most important.</p>
    *
    * <p>Thanks to tsmorton for suggesting this and providing part of the implementation.</p>
    *
-   * @param correlations set of {@link ItemItemCorrelation}s on which to base this instance
-   * @param maxToKeep maximum number of correlations to keep
+   * @param similarities set of
+   *  {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity}s
+   *  on which to base this instance
+   * @param maxToKeep maximum number of similarities to keep
    */
-  public GenericItemSimilarity(Iterable<ItemItemCorrelation> correlations, int maxToKeep) {
-    Iterable<ItemItemCorrelation> keptCorrelations = TopItems.getTopItemItemCorrelations(maxToKeep, correlations);
-    initCorrelationMaps(keptCorrelations);
+  public GenericItemSimilarity(Iterable<ItemItemSimilarity> similarities, int maxToKeep) {
+    Iterable<ItemItemSimilarity> keptSimilarities = TopItems.getTopItemItemSimilarities(maxToKeep, similarities);
+    initSimilarityMaps(keptSimilarities);
   }
 
   /**
-   * <p>Builds a list of item-item correlations given an {@link ItemSimilarity} implementation and a
-   * {@link DataModel}, rather than a list of {@link ItemItemCorrelation}s.</p>
+   * <p>Builds a list of item-item similarities given an {@link ItemSimilarity} implementation and a
+   * {@link DataModel}, rather than a list of
+   * {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity.ItemItemSimilarity}s.</p>
    *
    * <p>It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing some of the point
-   * of an item-based recommender. Item-based recommenders use the assumption that item-item correlations
+   * of an item-based recommender. Item-based recommenders use the assumption that item-item similarities
    * are relatively fixed, and might be known already independent of user preferences. Hence it is useful
    * to inject that information, using {@link #GenericItemSimilarity(Iterable)}.</p>
    *
-   * @param otherSimilarity other {@link ItemSimilarity} to get correlations from
+   * @param otherSimilarity other {@link ItemSimilarity} to get similarities from
    * @param dataModel data model to get {@link Item}s from
    * @throws TasteException if an error occurs while accessing the {@link DataModel} items
    */
   public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel) throws TasteException {
     List<? extends Item> items = IteratorUtils.iterableToList(dataModel.getItems());
-    Iterator<ItemItemCorrelation> it = new DataModelCorrelationsIterator(otherSimilarity, items);
-    initCorrelationMaps(new IteratorIterable<ItemItemCorrelation>(it));
+    Iterator<ItemItemSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, items);
+    initSimilarityMaps(new IteratorIterable<ItemItemSimilarity>(it));
   }
 
   /**
    * <p>Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will only
-   * keep the specified number of correlations from the given {@link DataModel}.
-   * It will keep those with the highest correlation -- those that are therefore most important.</p>
+   * keep the specified number of similarities from the given {@link DataModel}.
+   * It will keep those with the highest similarity -- those that are therefore most important.</p>
    *
    * <p>Thanks to tsmorton for suggesting this and providing part of the implementation.</p>
    *
-   * @param otherSimilarity other {@link ItemSimilarity} to get correlations from
+   * @param otherSimilarity other {@link ItemSimilarity} to get similarities from
    * @param dataModel data model to get {@link Item}s from
-   * @param maxToKeep maximum number of correlations to keep
+   * @param maxToKeep maximum number of similarities to keep
    * @throws TasteException if an error occurs while accessing the {@link DataModel} items
    */
   public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
           throws TasteException {
     List<? extends Item> items = IteratorUtils.iterableToList(dataModel.getItems());
-    Iterator<ItemItemCorrelation> it = new DataModelCorrelationsIterator(otherSimilarity, items);
-    Iterable<ItemItemCorrelation> keptCorrelations =
-            TopItems.getTopItemItemCorrelations(maxToKeep, new IteratorIterable<ItemItemCorrelation>(it));
-    initCorrelationMaps(keptCorrelations);
+    Iterator<ItemItemSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity, items);
+    Iterable<ItemItemSimilarity> keptSimilarities =
+            TopItems.getTopItemItemSimilarities(maxToKeep, new IteratorIterable<ItemItemSimilarity>(it));
+    initSimilarityMaps(keptSimilarities);
   }
 
-  private void initCorrelationMaps(Iterable<ItemItemCorrelation> correlations) {
-    for (ItemItemCorrelation iic : correlations) {
-      Item correlationItem1 = iic.getItem1();
-      Item correlationItem2 = iic.getItem2();
-      int compare = correlationItem1.compareTo(correlationItem2);
+  private void initSimilarityMaps(Iterable<ItemItemSimilarity> similarities) {
+    for (ItemItemSimilarity iic : similarities) {
+      Item similarityItem1 = iic.getItem1();
+      Item similarityItem2 = iic.getItem2();
+      int compare = similarityItem1.compareTo(similarityItem2);
       if (compare != 0) {
         // Order them -- first key should be the "smaller" one
         Item item1;
         Item item2;
         if (compare < 0) {
-          item1 = correlationItem1;
-          item2 = correlationItem2;
+          item1 = similarityItem1;
+          item2 = similarityItem2;
         } else {
-          item1 = correlationItem2;
-          item2 = correlationItem1;
+          item1 = similarityItem2;
+          item2 = similarityItem1;
         }
-        Map<Item, Double> map = correlationMaps.get(item1);
+        Map<Item, Double> map = similarityMaps.get(item1);
         if (map == null) {
           map = new FastMap<Item, Double>();
-          correlationMaps.put(item1, map);
+          similarityMaps.put(item1, map);
         }
         map.put(item2, iic.getValue());
       }
@@ -145,13 +151,13 @@
   }
 
   /**
-   * <p>Returns the correlation between two items. Note that correlation is assumed to be symmetric, that
-   * <code>itemCorrelation(item1, item2) == itemCorrelation(item2, item1)</code>, and that
-   * <code>itemCorrelation(item1, item1) == 1.0</code> for all items.</p>
+   * <p>Returns the similarity between two items. Note that similarity is assumed to be symmetric, that
+   * <code>itemSimilarity(item1, item2) == itemSimilarity(item2, item1)</code>, and that
+   * <code>itemSimilarity(item1, item1) == 1.0</code> for all items.</p>
    *
    * @param item1 first item
    * @param item2 second item
-   * @return correlation between the two
+   * @return similarity between the two
    */
   public double itemSimilarity(Item item1, Item item2) {
     int compare = item1.compareTo(item2);
@@ -167,12 +173,12 @@
       first = item2;
       second = item1;
     }
-    Map<Item, Double> nextMap = correlationMaps.get(first);
+    Map<Item, Double> nextMap = similarityMaps.get(first);
     if (nextMap == null) {
       return Double.NaN;
     }
-    Double correlation = nextMap.get(second);
-    return correlation == null ? Double.NaN : correlation;
+    Double similarity = nextMap.get(second);
+    return similarity == null ? Double.NaN : similarity;
   }
 
   public void refresh(Collection<Refreshable> alreadyRefreshed) {
@@ -180,12 +186,9 @@
   }
 
   /**
-   * Encapsulates a correlation between two items. Correlation must be in the range [-1.0,1.0].
+   * Encapsulates a similarity between two items. Similarity must be in the range [-1.0,1.0].
    */
-  public static final class ItemItemCorrelation {
-
-    // Somehow I think this class should be a top-level class now.
-    // But I have a love affair with inner classes.
+  public static final class ItemItemSimilarity implements Comparable<ItemItemSimilarity> {
 
     private final Item item1;
     private final Item item2;
@@ -194,10 +197,10 @@
     /**
      * @param item1 first item
      * @param item2 second item
-     * @param value correlation between the two
+     * @param value similarity between the two
      * @throws IllegalArgumentException if value is NaN, less than -1.0 or greater than 1.0
      */
-    public ItemItemCorrelation(Item item1, Item item2, double value) {
+    public ItemItemSimilarity(Item item1, Item item2, double value) {
       if (item1 == null || item2 == null) {
         throw new IllegalArgumentException("An item is null");
       }
@@ -223,12 +226,20 @@
 
     @Override
     public String toString() {
-      return "ItemItemCorrelation[" + item1 + ',' + item2 + ':' + value + ']';
+      return "ItemItemSimilarity[" + item1 + ',' + item2 + ':' + value + ']';
+    }
+
+    /**
+     * Defines an ordering from highest similarity to lowest.
+     */
+    public int compareTo(ItemItemSimilarity other) {
+      double otherValue = other.value;
+      return value > otherValue ? -1 : value < otherValue ? 1 : 0;
     }
 
   }
 
-  private static final class DataModelCorrelationsIterator implements Iterator<ItemItemCorrelation> {
+  private static final class DataModelSimilaritiesIterator implements Iterator<ItemItemSimilarity> {
 
     private final ItemSimilarity otherSimilarity;
     private final List<? extends Item> items;
@@ -237,7 +248,7 @@
     private Item item1;
     private int j;
 
-    private DataModelCorrelationsIterator(ItemSimilarity otherSimilarity, List<? extends Item> items) {
+    private DataModelSimilaritiesIterator(ItemSimilarity otherSimilarity, List<? extends Item> items) {
       this.otherSimilarity = otherSimilarity;
       this.items = items;
       this.size = items.size();
@@ -250,19 +261,19 @@
       return i < size - 1;
     }
 
-    public ItemItemCorrelation next() {
+    public ItemItemSimilarity next() {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
       Item item2 = items.get(j);
-      double correlation;
+      double similarity;
       try {
-        correlation = otherSimilarity.itemSimilarity(item1, item2);
+        similarity = otherSimilarity.itemSimilarity(item1, item2);
       } catch (TasteException te) {
         // ugly:
         throw new RuntimeException(te);
       }
-      ItemItemCorrelation result = new ItemItemCorrelation(item1, item2, correlation);
+      ItemItemSimilarity result = new ItemItemSimilarity(item1, item2, similarity);
       j++;
       if (j == size) {
         i++;

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java Sat Nov  8 06:39:41 2008
@@ -35,7 +35,7 @@
 /**
  * <p>Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of preference
  * values themselves. That is, each {@link User}'s preferences are sorted and then assign a rank as their preference
- * value, with 1 being assigned to the least preferred item. Then the Pearson itemCorrelation of these rank values is
+ * value, with 1 being assigned to the least preferred item. Then the Pearson correlation of these rank values is
  * computed.</p>
  */
 public final class SpearmanCorrelationSimilarity implements UserSimilarity {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java Sat Nov  8 06:39:41 2008
@@ -33,8 +33,9 @@
 import java.util.Collection;
 
 /**
- * <p>An implementation of a "correlation" based on the
- * <a href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29">Tanimoto coefficient</a>,
+ * <p>An implementation of a "similarity" based on the
+ * <a href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29">
+ * Tanimoto coefficient</a>,
  * or extended <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard coefficient</a>.</p>
  *
  * <p>This is intended for "binary" data sets where a user either expresses a generic "yes" preference

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/transforms/CaseAmplification.java Sat Nov  8 06:39:41 2008
@@ -18,16 +18,16 @@
 package org.apache.mahout.cf.taste.impl.transforms;
 
 import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.transforms.CorrelationTransform;
+import org.apache.mahout.cf.taste.transforms.SimilarityTransform;
 
 import java.util.Collection;
 
 /**
- * <p>Applies "case amplification" to correlations. This essentially makes big values bigger
+ * <p>Applies "case amplification" to similarities. This essentially makes big values bigger
  * and small values smaller by raising each score to a power. It could however be used to achieve the
  * opposite effect.</p>
  */
-public final class CaseAmplification implements CorrelationTransform<Object> {
+public final class CaseAmplification implements SimilarityTransform<Object> {
 
   private final double factor;
 
@@ -45,16 +45,16 @@
   }
 
   /**
-   * <p>Transforms one correlation value. This implementation is such that it's possible to define this
+   * <p>Transforms one similarity value. This implementation is such that it's possible to define this
    * transformation on one value in isolation. The "thing" parameters are therefore unused.</p>
    *
    * @param thing1 unused
    * @param thing2 unused
-   * @param value correlation to transform
+   * @param value similarity to transform
    * @return <code>value<sup>factor</sup></code> if value is nonnegative;
    *         <code>-value<sup>-factor</sup></code> otherwise
    */
-  public double transformCorrelation(Object thing1, Object thing2, double value) {
+  public double transformSimilarity(Object thing1, Object thing2, double value) {
     return value < 0.0 ? -Math.pow(-value, factor) : Math.pow(value, factor);
   }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java Sat Nov  8 06:39:41 2008
@@ -39,7 +39,7 @@
   /**
    * @param itemID ID of {@link Item} for which to find most similar other {@link Item}s
    * @param howMany desired number of most similar {@link Item}s to find
-   * @param rescorer {@link Rescorer} which can adjust item-item correlation
+   * @param rescorer {@link Rescorer} which can adjust item-item similarity
    * estimates used to determine most similar items
    * @return {@link Item}s most similar to the given item, ordered from most similar to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}
@@ -60,7 +60,7 @@
   /**
    * @param itemIDs IDs of {@link Item} for which to find most similar other {@link Item}s
    * @param howMany desired number of most similar {@link Item}s to find
-   * @param rescorer {@link Rescorer} which can adjust item-item correlation
+   * @param rescorer {@link Rescorer} which can adjust item-item similarity
    * estimates used to determine most similar items
    * @return {@link Item}s most similar to the given items, ordered from most similar to least
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java Sat Nov  8 06:39:41 2008
@@ -38,7 +38,7 @@
   /**
    * @param userID ID of {@link User} for which to find most similar other {@link User}s
    * @param howMany desired number of most similar {@link User}s to find
-   * @param rescorer {@link Rescorer} which can adjust user-user correlation
+   * @param rescorer {@link Rescorer} which can adjust user-user similarity
    * estimates used to determine most similar users
    * @return {@link User}s most similar to the given user
    * @throws TasteException if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java?rev=712399&r1=712398&r2=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/PreferenceTransform.java Sat Nov  8 06:39:41 2008
@@ -23,7 +23,7 @@
 
 /**
  * <p>Implementations encapsulate a transform on a {@link Preference}'s value. These transformations are
- * typically applied to values before they are used to compute a correlation value. They are typically not
+ * typically applied to values before they are used to compute a similarity value. They are typically not
  * applied elsewhere; in particular {@link org.apache.mahout.cf.taste.model.DataModel}s no longer use a transform
  * like this to transform all of their preference values at the source.</p>
  */

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java (from r712103, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/CorrelationTransform.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/CorrelationTransform.java&r1=712103&r2=712399&rev=712399&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/CorrelationTransform.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/transforms/SimilarityTransform.java Sat Nov  8 06:39:41 2008
@@ -20,19 +20,18 @@
 import org.apache.mahout.cf.taste.common.Refreshable;
 
 /**
- * <p>Implementations encapsulate some transformation on correlation values between two
- * things, where things might be {@link org.apache.mahout.cf.taste.model.User}s or {@link org.apache.mahout.cf.taste.model.Item}s or
+ * <p>Implementations encapsulate some transformation on similarity values between two
+ * things, where things might be {@link org.apache.mahout.cf.taste.model.User}s or
+ * {@link org.apache.mahout.cf.taste.model.Item}s or
  * something else.</p>
  */
-public interface CorrelationTransform<T> extends Refreshable {
+public interface SimilarityTransform<T> extends Refreshable {
 
   /**
-   * @param thing1
-   * @param thing2
-   * @param value original correlation between thing1 and thing2
+   * @param value original similarity between thing1 and thing2
    * (should be in [-1,1])
-   * @return transformed correlation (should be in [-1,1])
+   * @return transformed similarity (should be in [-1,1])
    */
-  double transformCorrelation(T thing1, T thing2, double value);
+  double transformSimilarity(T thing1, T thing2, double value);
 
 }



Mime
View raw message