mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r770553 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: common/ neighborhood/
Date Fri, 01 May 2009 07:32:27 GMT
Author: srowen
Date: Fri May  1 07:32:27 2009
New Revision: 770553

URL: http://svn.apache.org/viewvc?rev=770553&view=rev
Log:
Added SamplingIterator and SamplingIterable and fixed up issue with sampling in NearestNUserNeighborhood

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java?rev=770553&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
Fri May  1 07:32:27 2009
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * Wraps an {@link Iterable} whose {@link Iterable#iterator()} returns only some subset of
the elements
+ * that it would, as determined by a sampling rate parameter.
+ */
+public final class SamplingIterable<T> implements Iterable<T> {
+
+  private final Iterable<? extends T> delegate;
+  private final double samplingRate;
+
+  public SamplingIterable(Iterable<? extends T> delegate, double samplingRate) {
+    this.delegate = delegate;
+    this.samplingRate = samplingRate;
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    return new SamplingIterator<T>(delegate.iterator(), samplingRate);
+  }
+
+  public static <T> Iterable<T> maybeWrapIterable(Iterable<T> delegate,
double samplingRate) {
+    return samplingRate >= 1.0 ? delegate : new SamplingIterable<T>(delegate, samplingRate);
+  }
+
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java?rev=770553&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
Fri May  1 07:32:27 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.Random;
+import java.util.NoSuchElementException;
+
+/**
+ * Wraps an {@link Iterator} and returns only some subset of the elements
+ * that it would, as determined by a sampling rate parameter.
+ */
+public final class SamplingIterator<T> implements Iterator<T> {
+
+  private static final Random r = RandomUtils.getRandom();
+
+  private final Iterator<? extends T> delegate;
+  private final double samplingRate;
+  private T next;
+  private boolean hasNext;
+
+  public SamplingIterator(Iterator<? extends T> delegate, double samplingRate) {
+    this.delegate = delegate;
+    this.samplingRate = samplingRate;
+    this.hasNext = true;
+    doNext();
+  }
+
+  @Override
+  public boolean hasNext() {
+    return hasNext;
+  }
+
+  @Override
+  public T next() {
+    if (hasNext) {
+      T result = next;
+      doNext();
+      return result;
+    }
+    throw new NoSuchElementException();
+  }
+
+  private void doNext() {
+    boolean found = false;
+    while (delegate.hasNext()) {
+      T delegateNext = delegate.next();
+      if (r.nextDouble() < samplingRate) {
+        next = delegateNext;
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      hasNext = false;
+      next = null;
+    }
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
Fri May  1 07:32:27 2009
@@ -20,20 +20,16 @@
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
 
 import java.util.Collection;
-import java.util.Random;
 
 /**
  * <p>Contains methods and resources useful to all classes in this package.</p>
  */
 abstract class AbstractUserNeighborhood implements UserNeighborhood {
 
-  private static final Random random = RandomUtils.getRandom();
-
   private final UserSimilarity userSimilarity;
   private final DataModel dataModel;
   private final double samplingRate;
@@ -64,8 +60,8 @@
     return dataModel;
   }
 
-  final boolean sampleForUser() {
-    return samplingRate >= 1.0 || random.nextDouble() < samplingRate;
+  final double getSamplingRate() {
+    return samplingRate;
   }
 
   @Override

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
Fri May  1 07:32:27 2009
@@ -22,6 +22,7 @@
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -31,7 +32,7 @@
 
 /**
  * <p>Computes a neighborhood consisting of the nearest n {@link User}s to a given
{@link User}.
- * "Nearest" is defined by the given {@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.</p>
+ * "Nearest" is defined by the given {@link UserSimilarity}.</p>
  */
 public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
 
@@ -97,7 +98,8 @@
 
     TopItems.Estimator<User> estimator = new Estimator(userSimilarityImpl, theUser,
minSimilarity);
 
-    List<User> neighborhood = TopItems.getTopUsers(n, dataModel.getUsers(), null, estimator);
+    Iterable<? extends User> users = SamplingIterable.maybeWrapIterable(dataModel.getUsers(),
getSamplingRate());
+    List<User> neighborhood = TopItems.getTopUsers(n, users, null, estimator);
 
     log.trace("UserNeighborhood around user ID '{}' is: {}", userID, neighborhood);
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
Fri May  1 07:32:27 2009
@@ -21,6 +21,7 @@
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -33,7 +34,7 @@
 /**
  * <p>Computes a neigbhorhood consisting of all {@link User}s whose similarity to the
  * given {@link User} meets or exceeds a certain threshold. Similarity is defined by the
given
- * {@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.</p>
+ * {@link UserSimilarity}.</p>
  */
 public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
 
@@ -83,12 +84,14 @@
     DataModel dataModel = getDataModel();
     User theUser = dataModel.getUser(userID);
     List<User> neighborhood = new ArrayList<User>();
-    Iterator<? extends User> users = dataModel.getUsers().iterator();
+    Iterable<? extends User> usersIterable =
+      SamplingIterable.maybeWrapIterable(dataModel.getUsers(), getSamplingRate());
+    Iterator<? extends User> users = usersIterable.iterator();
     UserSimilarity userSimilarityImpl = getUserSimilarity();
 
     while (users.hasNext()) {
       User user = users.next();
-      if (sampleForUser() && !userID.equals(user.getID())) {
+      if (!userID.equals(user.getID())) {
         double theSimilarity = userSimilarityImpl.userSimilarity(theUser, user);
         if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) {
           neighborhood.add(user);



Mime
View raw message