mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r764963 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: recommender/TopItems.java similarity/GenericUserSimilarity.java
Date Tue, 14 Apr 2009 21:32:58 GMT
Author: srowen
Date: Tue Apr 14 21:32:56 2009
New Revision: 764963

URL: http://svn.apache.org/viewvc?rev=764963&view=rev
Log:
Add GenericUserSimilarity

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
      - copied, changed from r764187, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java?rev=764963&r1=764962&r2=764963&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
Tue Apr 14 21:32:56 2009
@@ -19,6 +19,7 @@
 
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.GenericUserSimilarity;
 import org.apache.mahout.cf.taste.model.Item;
 import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -137,6 +138,32 @@
     return result;
   }
 
+  public static List<GenericUserSimilarity.UserUserSimilarity> getTopUserUserSimilarities(
+          int howMany, Iterable<GenericUserSimilarity.UserUserSimilarity> allSimilarities)
{
+    Queue<GenericUserSimilarity.UserUserSimilarity> topSimilarities =
+            new PriorityQueue<GenericUserSimilarity.UserUserSimilarity>(howMany + 1,
Collections.reverseOrder());
+    boolean full = false;
+    double lowestTopValue = Double.NEGATIVE_INFINITY;
+    for (GenericUserSimilarity.UserUserSimilarity similarity : allSimilarities) {
+      double value = similarity.getValue();
+      if (!full || value > lowestTopValue) {
+        topSimilarities.add(similarity);
+        if (full) {
+          topSimilarities.poll();
+        } else if (topSimilarities.size() > howMany) {
+          full = true;
+          topSimilarities.poll();
+        }
+        lowestTopValue = topSimilarities.peek().getValue();
+      }
+    }
+    List<GenericUserSimilarity.UserUserSimilarity> result =
+      new ArrayList<GenericUserSimilarity.UserUserSimilarity>(topSimilarities.size());
+    result.addAll(topSimilarities);
+    Collections.sort(result);
+    return result;
+  }
+
   public interface Estimator<T> {
 
     double estimate(T thing) throws TasteException;

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
(from r764187, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java&r1=764187&r2=764963&rev=764963&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericItemSimilarity.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/GenericUserSimilarity.java
Tue Apr 14 21:32:56 2009
@@ -19,14 +19,15 @@
 
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
 import org.apache.mahout.cf.taste.impl.common.FastMap;
 import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
 import org.apache.mahout.cf.taste.impl.common.IteratorUtils;
 import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
 import org.apache.mahout.cf.taste.model.DataModel;
-import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.User;
 
 import java.util.Collection;
 import java.util.Iterator;
@@ -34,148 +35,77 @@
 import java.util.Map;
 import java.util.NoSuchElementException;
 
-/**
- * <p>A "generic" {@link ItemSimilarity} which takes a static list of precomputed {@link
Item}
- * similarities and bases its responses on that alone. The values may have been precomputed
- * offline by another process, stored in a file, and then read and fed into an instance of
this class.</p>
- *
- * <p>This is perhaps the best {@link ItemSimilarity} to use with
- * {@link org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender}, for now,
since the point of item-based
- * recommenders is that they can take advantage of the fact that item similarity is relatively
static,
- * can be precomputed, and then used in computation to gain a significant performance advantage.</p>
- */
-public final class GenericItemSimilarity implements ItemSimilarity {
-
-  private final Map<Item, Map<Item, Double>> similarityMaps = new FastMap<Item,
Map<Item, Double>>();
-
-  /**
-   * <p>Creates a {@link GenericItemSimilarity} from a precomputed list of
-   * {@link ItemItemSimilarity}s. Each
-   * represents the similarity between two distinct items. Since similarity is assumed to
be symmetric,
-   * it is not necessary to specify similarity between item1 and item2, and item2 and item1.
Both are the same.
-   * It is also not necessary to specify a similarity between any item and itself; these
are assumed to be 1.0.</p>
-   *
-   * <p>Note that specifying a similarity between two items twice is not an error,
but, the later value will
-   * win.</p>
-   *
-   * @param similarities set of
-   *  {@link ItemItemSimilarity}s
-   *  on which to base this instance
-   */
-  public GenericItemSimilarity(Iterable<ItemItemSimilarity> similarities) {
+public final class GenericUserSimilarity implements UserSimilarity {
+
+  private final Map<User, Map<User, Double>> similarityMaps = new FastMap<User,
Map<User, Double>>();
+
+  public GenericUserSimilarity(Iterable<UserUserSimilarity> similarities) {
     initSimilarityMaps(similarities);
   }
 
-  /**
-   * <p>Like {@link #GenericItemSimilarity(Iterable)}, but will only keep the specified
number of similarities
-   * from the given {@link Iterable} of similarities. It will keep those with the highest
similarity --
-   * those that are therefore most important.</p>
-   *
-   * <p>Thanks to tsmorton for suggesting this and providing part of the implementation.</p>
-   *
-   * @param similarities set of
-   *  {@link ItemItemSimilarity}s
-   *  on which to base this instance
-   * @param maxToKeep maximum number of similarities to keep
-   */
-  public GenericItemSimilarity(Iterable<ItemItemSimilarity> similarities, int maxToKeep)
{
-    Iterable<ItemItemSimilarity> keptSimilarities = TopItems.getTopItemItemSimilarities(maxToKeep,
similarities);
+  public GenericUserSimilarity(Iterable<UserUserSimilarity> similarities, int maxToKeep)
{
+    Iterable<UserUserSimilarity> keptSimilarities = TopItems.getTopUserUserSimilarities(maxToKeep,
similarities);
     initSimilarityMaps(keptSimilarities);
   }
 
-  /**
-   * <p>Builds a list of item-item similarities given an {@link ItemSimilarity} implementation
and a
-   * {@link DataModel}, rather than a list of
-   * {@link ItemItemSimilarity}s.</p>
-   *
-   * <p>It's valid to build a {@link GenericItemSimilarity} this way, but perhaps missing
some of the point
-   * of an item-based recommender. Item-based recommenders use the assumption that item-item
similarities
-   * are relatively fixed, and might be known already independent of user preferences. Hence
it is useful
-   * to inject that information, using {@link #GenericItemSimilarity(Iterable)}.</p>
-   *
-   * @param otherSimilarity other {@link ItemSimilarity} to get similarities from
-   * @param dataModel data model to get {@link Item}s from
-   * @throws TasteException if an error occurs while accessing the {@link DataModel} items
-   */
-  public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel) throws
TasteException {
-    List<? extends Item> items = IteratorUtils.iterableToList(dataModel.getItems());
-    Iterator<ItemItemSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity,
items);
-    initSimilarityMaps(new IteratorIterable<ItemItemSimilarity>(it));
+  public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel) throws
TasteException {
+    List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
+    Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity,
users);
+    initSimilarityMaps(new IteratorIterable<UserUserSimilarity>(it));
   }
 
-  /**
-   * <p>Like {@link #GenericItemSimilarity(ItemSimilarity, DataModel)} )}, but will
only
-   * keep the specified number of similarities from the given {@link DataModel}.
-   * It will keep those with the highest similarity -- those that are therefore most important.</p>
-   *
-   * <p>Thanks to tsmorton for suggesting this and providing part of the implementation.</p>
-   *
-   * @param otherSimilarity other {@link ItemSimilarity} to get similarities from
-   * @param dataModel data model to get {@link Item}s from
-   * @param maxToKeep maximum number of similarities to keep
-   * @throws TasteException if an error occurs while accessing the {@link DataModel} items
-   */
-  public GenericItemSimilarity(ItemSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
+  public GenericUserSimilarity(UserSimilarity otherSimilarity, DataModel dataModel, int maxToKeep)
           throws TasteException {
-    List<? extends Item> items = IteratorUtils.iterableToList(dataModel.getItems());
-    Iterator<ItemItemSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity,
items);
-    Iterable<ItemItemSimilarity> keptSimilarities =
-            TopItems.getTopItemItemSimilarities(maxToKeep, new IteratorIterable<ItemItemSimilarity>(it));
+    List<? extends User> users = IteratorUtils.iterableToList(dataModel.getUsers());
+    Iterator<UserUserSimilarity> it = new DataModelSimilaritiesIterator(otherSimilarity,
users);
+    Iterable<UserUserSimilarity> keptSimilarities =
+            TopItems.getTopUserUserSimilarities(maxToKeep, new IteratorIterable<UserUserSimilarity>(it));
     initSimilarityMaps(keptSimilarities);
   }
 
-  private void initSimilarityMaps(Iterable<ItemItemSimilarity> similarities) {
-    for (ItemItemSimilarity iic : similarities) {
-      Item similarityItem1 = iic.getItem1();
-      Item similarityItem2 = iic.getItem2();
-      int compare = similarityItem1.compareTo(similarityItem2);
+  private void initSimilarityMaps(Iterable<UserUserSimilarity> similarities) {
+    for (UserUserSimilarity uuc : similarities) {
+      User similarityUser1 = uuc.getUser1();
+      User similarityUser2 = uuc.getUser2();
+      int compare = similarityUser1.compareTo(similarityUser2);
       if (compare != 0) {
         // Order them -- first key should be the "smaller" one
-        Item item1;
-        Item item2;
+        User user1;
+        User user2;
         if (compare < 0) {
-          item1 = similarityItem1;
-          item2 = similarityItem2;
+          user1 = similarityUser1;
+          user2 = similarityUser2;
         } else {
-          item1 = similarityItem2;
-          item2 = similarityItem1;
+          user1 = similarityUser2;
+          user2 = similarityUser1;
         }
-        Map<Item, Double> map = similarityMaps.get(item1);
+        Map<User, Double> map = similarityMaps.get(user1);
         if (map == null) {
-          map = new FastMap<Item, Double>();
-          similarityMaps.put(item1, map);
+          map = new FastMap<User, Double>();
+          similarityMaps.put(user1, map);
         }
-        map.put(item2, iic.getValue());
+        map.put(user2, uuc.getValue());
       }
-      // else similarity between item and itself already assumed to be 1.0
+      // else similarity between user and itself already assumed to be 1.0
     }
   }
 
-  /**
-   * <p>Returns the similarity between two items. Note that similarity is assumed to
be symmetric, that
-   * <code>itemSimilarity(item1, item2) == itemSimilarity(item2, item1)</code>,
and that
-   * <code>itemSimilarity(item1, item1) == 1.0</code> for all items.</p>
-   *
-   * @param item1 first item
-   * @param item2 second item
-   * @return similarity between the two
-   */
   @Override
-  public double itemSimilarity(Item item1, Item item2) {
-    int compare = item1.compareTo(item2);
+  public double userSimilarity(User user1, User user2) {
+    int compare = user1.compareTo(user2);
     if (compare == 0) {
       return 1.0;
     }
-    Item first;
-    Item second;
+    User first;
+    User second;
     if (compare < 0) {
-      first = item1;
-      second = item2;
+      first = user1;
+      second = user2;
     } else {
-      first = item2;
-      second = item1;
+      first = user2;
+      second = user1;
     }
-    Map<Item, Double> nextMap = similarityMaps.get(first);
+    Map<User, Double> nextMap = similarityMaps.get(first);
     if (nextMap == null) {
       return Double.NaN;
     }
@@ -184,43 +114,39 @@
   }
 
   @Override
+  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
   public void refresh(Collection<Refreshable> alreadyRefreshed) {
     // Do nothing
   }
 
-  /**
-   * Encapsulates a similarity between two items. Similarity must be in the range [-1.0,1.0].
-   */
-  public static final class ItemItemSimilarity implements Comparable<ItemItemSimilarity>
{
+  public static final class UserUserSimilarity implements Comparable<UserUserSimilarity>
{
 
-    private final Item item1;
-    private final Item item2;
+    private final User user1;
+    private final User user2;
     private final double value;
 
-    /**
-     * @param item1 first item
-     * @param item2 second item
-     * @param value similarity between the two
-     * @throws IllegalArgumentException if value is NaN, less than -1.0 or greater than 1.0
-     */
-    public ItemItemSimilarity(Item item1, Item item2, double value) {
-      if (item1 == null || item2 == null) {
-        throw new IllegalArgumentException("An item is null");
+    public UserUserSimilarity(User user1, User user2, double value) {
+      if (user1 == null || user2 == null) {
+        throw new IllegalArgumentException("A user is null");
       }
       if (Double.isNaN(value) || value < -1.0 || value > 1.0) {
         throw new IllegalArgumentException("Illegal value: " + value);
       }
-      this.item1 = item1;
-      this.item2 = item2;
+      this.user1 = user1;
+      this.user2 = user2;
       this.value = value;
     }
 
-    public Item getItem1() {
-      return item1;
+    public User getUser1() {
+      return user1;
     }
 
-    public Item getItem2() {
-      return item2;
+    public User getUser2() {
+      return user2;
     }
 
     public double getValue() {
@@ -229,49 +155,49 @@
 
     @Override
     public String toString() {
-      return "ItemItemSimilarity[" + item1 + ',' + item2 + ':' + value + ']';
+      return "UserUserSimilarity[" + user1 + ',' + user2 + ':' + value + ']';
     }
 
     /**
      * Defines an ordering from highest similarity to lowest.
      */
     @Override
-    public int compareTo(ItemItemSimilarity other) {
+    public int compareTo(UserUserSimilarity other) {
       double otherValue = other.value;
       return value > otherValue ? -1 : value < otherValue ? 1 : 0;
     }
 
     @Override
     public boolean equals(Object other) {
-      if (!(other instanceof ItemItemSimilarity)) {
+      if (!(other instanceof UserUserSimilarity)) {
         return false;
       }
-      ItemItemSimilarity otherSimilarity = (ItemItemSimilarity) other;
-      return otherSimilarity.item1.equals(item1) && otherSimilarity.item2.equals(item2)
&& otherSimilarity.value == value;
+      UserUserSimilarity otherSimilarity = (UserUserSimilarity) other;
+      return otherSimilarity.user1.equals(user1) && otherSimilarity.user2.equals(user2)
&& otherSimilarity.value == value;
     }
 
     @Override
     public int hashCode() {
-      return item1.hashCode() ^ item2.hashCode() ^ RandomUtils.hashDouble(value);
+      return user1.hashCode() ^ user2.hashCode() ^ RandomUtils.hashDouble(value);
     }
 
   }
 
-  private static final class DataModelSimilaritiesIterator implements Iterator<ItemItemSimilarity>
{
+  private static final class DataModelSimilaritiesIterator implements Iterator<UserUserSimilarity>
{
 
-    private final ItemSimilarity otherSimilarity;
-    private final List<? extends Item> items;
+    private final UserSimilarity otherSimilarity;
+    private final List<? extends User> users;
     private final int size;
     private int i;
-    private Item item1;
+    private User user1;
     private int j;
 
-    private DataModelSimilaritiesIterator(ItemSimilarity otherSimilarity, List<? extends
Item> items) {
+    private DataModelSimilaritiesIterator(UserSimilarity otherSimilarity, List<? extends
User> users) {
       this.otherSimilarity = otherSimilarity;
-      this.items = items;
-      this.size = items.size();
+      this.users = users;
+      this.size = users.size();
       i = 0;
-      item1 = items.get(0);
+      user1 = users.get(0);
       j = 1;
     }
 
@@ -281,23 +207,23 @@
     }
 
     @Override
-    public ItemItemSimilarity next() {
+    public UserUserSimilarity next() {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
-      Item item2 = items.get(j);
+      User user2 = users.get(j);
       double similarity;
       try {
-        similarity = otherSimilarity.itemSimilarity(item1, item2);
+        similarity = otherSimilarity.userSimilarity(user1, user2);
       } catch (TasteException te) {
         // ugly:
         throw new RuntimeException(te);
       }
-      ItemItemSimilarity result = new ItemItemSimilarity(item1, item2, similarity);
+      UserUserSimilarity result = new UserUserSimilarity(user1, user2, similarity);
       j++;
       if (j == size) {
         i++;
-        item1 = items.get(i);
+        user1 = users.get(i);
         j = i + 1;
       }
       return result;
@@ -310,4 +236,4 @@
 
   }
 
-}
+}
\ No newline at end of file



Mime
View raw message