mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r770768 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender: FarthestNeighborClusterSimilarity.java NearestNeighborClusterSimilarity.java TreeClusteringRecommender.java
Date Fri, 01 May 2009 17:38:14 GMT
Author: srowen
Date: Fri May  1 17:38:13 2009
New Revision: 770768

URL: http://svn.apache.org/viewvc?rev=770768&view=rev
Log: (empty)

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=770768&r1=770767&r2=770768&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
Fri May  1 17:38:13 2009
@@ -21,23 +21,20 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
 import org.apache.mahout.cf.taste.model.User;
 
 import java.util.Collection;
-import java.util.Random;
 
 /**
  * <p>Defines cluster similarity as the <em>smallest</em> similarity between
any two
- * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that
clusters are close
+ * {@link User}s in the clusters -- that is, it says that clusters are close
  * when <em>all pairs</em> of their members have relatively high similarity.</p>
  */
 public final class FarthestNeighborClusterSimilarity implements ClusterSimilarity {
 
-  private static final Random random = RandomUtils.getRandom();
-
   private final UserSimilarity similarity;
-  private final double samplingPercentage;
+  private final double samplingRate;
 
   /**
    * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link
UserSimilarity}.
@@ -49,19 +46,19 @@
 
   /**
    * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link
UserSimilarity}.
-   * By setting <code>samplingPercentage</code> to a value less than 1.0, this
implementation will only examine
+   * By setting <code>samplingRate</code> to a value less than 1.0, this implementation
will only examine
    * that fraction of all user-user similarities between two clusters, increasing performance
at the expense
    * of accuracy.</p>
    */
-  public FarthestNeighborClusterSimilarity(UserSimilarity similarity, double samplingPercentage)
{
+  public FarthestNeighborClusterSimilarity(UserSimilarity similarity, double samplingRate)
{
     if (similarity == null) {
       throw new IllegalArgumentException("similarity is null");
     }
-    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage
> 1.0) {
-      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate > 1.0) {
+      throw new IllegalArgumentException("samplingRate is invalid: " + samplingRate);
     }
     this.similarity = similarity;
-    this.samplingPercentage = samplingPercentage;
+    this.samplingRate = samplingRate;
   }
 
   @Override
@@ -71,13 +68,12 @@
       return Double.NaN;
     }
     double leastSimilarity = Double.POSITIVE_INFINITY;
-    for (User user1 : cluster1) {
-      if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
-        for (User user2 : cluster2) {
-          double theSimilarity = similarity.userSimilarity(user1, user2);
-          if (theSimilarity < leastSimilarity) {
-            leastSimilarity = theSimilarity;
-          }
+    Iterable<User> someUsers = SamplingIterable.maybeWrapIterable(cluster1, samplingRate);
+    for (User user1 : someUsers) {
+      for (User user2 : cluster2) {
+        double theSimilarity = similarity.userSimilarity(user1, user2);
+        if (theSimilarity < leastSimilarity) {
+          leastSimilarity = theSimilarity;
         }
       }
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=770768&r1=770767&r2=770768&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
Fri May  1 17:38:13 2009
@@ -21,26 +21,23 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
 import org.apache.mahout.cf.taste.model.User;
 
 import java.util.Collection;
-import java.util.Random;
 
 /**
  * <p>Defines cluster similarity as the <em>largest</em> similarity between
any two
- * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that
clusters are close
+ * {@link User}s in the clusters -- that is, it says that clusters are close
  * when <em>some pair</em> of their members has high similarity.</p>
  */
 public final class NearestNeighborClusterSimilarity implements ClusterSimilarity {
 
-  private static final Random random = RandomUtils.getRandom();
-
   private final UserSimilarity similarity;
-  private final double samplingPercentage;
+  private final double samplingRate;
 
   /**
-   * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link
org.apache.mahout.cf.taste.similarity.UserSimilarity}.
+   * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link
UserSimilarity}.
    * All user-user similarities are examined.</p>
    */
   public NearestNeighborClusterSimilarity(UserSimilarity similarity) {
@@ -48,20 +45,20 @@
   }
 
   /**
-   * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link
org.apache.mahout.cf.taste.similarity.UserSimilarity}.
-   * By setting <code>samplingPercentage</code> to a value less than 1.0, this
implementation will only examine
+   * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link
UserSimilarity}.
+   * By setting <code>samplingRate</code> to a value less than 1.0, this implementation
will only examine
    * that fraction of all user-user similarities between two clusters, increasing performance
at the expense
    * of accuracy.</p>
    */
-  public NearestNeighborClusterSimilarity(UserSimilarity similarity, double samplingPercentage)
{
+  public NearestNeighborClusterSimilarity(UserSimilarity similarity, double samplingRate)
{
     if (similarity == null) {
       throw new IllegalArgumentException("similarity is null");
     }
-    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage
> 1.0) {
-      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate > 1.0) {
+      throw new IllegalArgumentException("samplingRate is invalid: " + samplingRate);
     }
     this.similarity = similarity;
-    this.samplingPercentage = samplingPercentage;
+    this.samplingRate = samplingRate;
   }
 
   @Override
@@ -70,14 +67,13 @@
     if (cluster1.isEmpty() || cluster2.isEmpty()) {
       return Double.NaN;
     }
+    Iterable<User> someUsers = SamplingIterable.maybeWrapIterable(cluster1, samplingRate);
     double greatestSimilarity = Double.NEGATIVE_INFINITY;
-    for (User user1 : cluster1) {
-      if (samplingPercentage >= 1.0 || random.nextDouble() < samplingPercentage) {
-        for (User user2 : cluster2) {
-          double theSimilarity = similarity.userSimilarity(user1, user2);
-          if (theSimilarity > greatestSimilarity) {
-            greatestSimilarity = theSimilarity;
-          }
+    for (User user1 : someUsers) {
+      for (User user2 : cluster2) {
+        double theSimilarity = similarity.userSimilarity(user1, user2);
+        if (theSimilarity > greatestSimilarity) {
+          greatestSimilarity = theSimilarity;
         }
       }
     }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=770768&r1=770767&r2=770768&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
Fri May  1 17:38:13 2009
@@ -60,13 +60,15 @@
  */
 public final class TreeClusteringRecommender extends AbstractRecommender implements ClusteringRecommender
{
 
+  private static final Random r = RandomUtils.getRandom();
+
   private static final Logger log = LoggerFactory.getLogger(TreeClusteringRecommender.class);
 
   private final ClusterSimilarity clusterSimilarity;
   private final int numClusters;
   private final double clusteringThreshold;
   private final boolean clusteringByThreshold;
-  private final double samplingPercentage;
+  private final double samplingRate;
   private Map<Object, List<RecommendedItem>> topRecsByUserID;
   private Collection<Collection<User>> allClusters;
   private Map<Object, Collection<User>> clustersByUserID;
@@ -91,16 +93,16 @@
    * @param dataModel {@link DataModel} which provdes {@link User}s
    * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
    * @param numClusters desired number of clusters to create
-   * @param samplingPercentage percentage of all cluster-cluster pairs to consider when finding
+   * @param samplingRate percentage of all cluster-cluster pairs to consider when finding
    * next-most-similar clusters. Decreasing this value from 1.0 can increase performance
at the
    * cost of accuracy
    * @throws IllegalArgumentException if arguments are <code>null</code>, or
<code>numClusters</code> is
-   * less than 2, or samplingPercentage is {@link Double#NaN} or nonpositive or greater than
1.0
+   * less than 2, or samplingRate is {@link Double#NaN} or nonpositive or greater than 1.0
    */
   public TreeClusteringRecommender(DataModel dataModel,
                                    ClusterSimilarity clusterSimilarity,
                                    int numClusters,
-                                   double samplingPercentage) {
+                                   double samplingRate) {
     super(dataModel);
     if (clusterSimilarity == null) {
       throw new IllegalArgumentException("clusterSimilarity is null");
@@ -108,14 +110,14 @@
     if (numClusters < 2) {
       throw new IllegalArgumentException("numClusters must be at least 2");
     }
-    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage
> 1.0) {
-      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate > 1.0) {
+      throw new IllegalArgumentException("samplingRate is invalid: " + samplingRate);
     }
     this.clusterSimilarity = clusterSimilarity;
     this.numClusters = numClusters;
     this.clusteringThreshold = Double.NaN;
     this.clusteringByThreshold = false;
-    this.samplingPercentage = samplingPercentage;
+    this.samplingRate = samplingRate;
     this.buildClustersLock = new ReentrantLock();
     this.refreshHelper = new RefreshHelper(new Callable<Object>() {
       @Override
@@ -147,16 +149,16 @@
    * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
    * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated
into larger
    * clusters until the next two nearest clusters' similarity drops below this threshold
-   * @param samplingPercentage percentage of all cluster-cluster pairs to consider when finding
+   * @param samplingRate percentage of all cluster-cluster pairs to consider when finding
    * next-most-similar clusters. Decreasing this value from 1.0 can increase performance
at the
    * cost of accuracy
    * @throws IllegalArgumentException if arguments are <code>null</code>, or
<code>clusteringThreshold</code> is
-   * {@link Double#NaN}, or samplingPercentage is {@link Double#NaN} or nonpositive or greater
than 1.0
+   * {@link Double#NaN}, or samplingRate is {@link Double#NaN} or nonpositive or greater
than 1.0
    */
   public TreeClusteringRecommender(DataModel dataModel,
                                    ClusterSimilarity clusterSimilarity,
                                    double clusteringThreshold,
-                                   double samplingPercentage) {
+                                   double samplingRate) {
     super(dataModel);
     if (clusterSimilarity == null) {
       throw new IllegalArgumentException("clusterSimilarity is null");
@@ -164,14 +166,14 @@
     if (Double.isNaN(clusteringThreshold)) {
       throw new IllegalArgumentException("clusteringThreshold must not be NaN");
     }
-    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage
> 1.0) {
-      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate > 1.0) {
+      throw new IllegalArgumentException("samplingRate is invalid: " + samplingRate);
     }
     this.clusterSimilarity = clusterSimilarity;
     this.numClusters = Integer.MIN_VALUE;
     this.clusteringThreshold = clusteringThreshold;
     this.clusteringByThreshold = true;
-    this.samplingPercentage = samplingPercentage;
+    this.samplingRate = samplingRate;
     this.buildClustersLock = new ReentrantLock();
     this.refreshHelper = new RefreshHelper(new Callable<Object>() {
       @Override
@@ -345,11 +347,10 @@
     int size = clusters.size();
     Pair<Collection<User>, Collection<User>> nearestPair = null;
     double bestSimilarity = Double.NEGATIVE_INFINITY;
-    Random r = RandomUtils.getRandom();
     for (int i = 0; i < size; i++) {
       Collection<User> cluster1 = clusters.get(i);
       for (int j = i + 1; j < size; j++) {
-        if (samplingPercentage >= 1.0 || r.nextDouble() < samplingPercentage) {
+        if (samplingRate >= 1.0 || r.nextDouble() < samplingRate) {
           Collection<User> cluster2 = clusters.get(j);
           double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
           if (!Double.isNaN(similarity) && similarity > bestSimilarity) {



Mime
View raw message