mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r654943 [5/9] - in /lucene/mahout/trunk/core: ./ lib/ src/main/examples/org/ src/main/examples/org/apache/ src/main/examples/org/apache/mahout/ src/main/examples/org/apache/mahout/cf/ src/main/examples/org/apache/mahout/cf/taste/ src/main/e...
Date Fri, 09 May 2008 21:35:17 GMT
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java Fri May  9 14:35:12 2008
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.Collection;
+
+/**
+ * <p>Defines cluster similarity as the <em>smallest</em> correlation between any two
+ * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that clusters are close
+ * when <em>all pairs</em> of their members have relatively high correlation.</p>
+ */
+public final class FarthestNeighborClusterSimilarity implements ClusterSimilarity {
+
+  private final UserCorrelation correlation;
+  private final double samplingPercentage;
+
+  /**
+   * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+   * All user-user correlations are examined.</p>
+   *
+   * @param correlation
+   */
+  public FarthestNeighborClusterSimilarity(UserCorrelation correlation) {
+    this(correlation, 1.0);
+  }
+
+  /**
+   * <p>Constructs a {@link FarthestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+   * By setting <code>samplingPercentage</code> to a value less than 1.0, this implementation will only examine
+   * that fraction of all user-user correlations between two clusters, increasing performance at the expense
+   * of accuracy.</p>
+   *
+   * @param correlation
+   * @param samplingPercentage
+   */
+  public FarthestNeighborClusterSimilarity(UserCorrelation correlation, double samplingPercentage) {
+    if (correlation == null) {
+      throw new IllegalArgumentException("correlation is null");
+    }
+    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    }
+    this.correlation = correlation;
+    this.samplingPercentage = samplingPercentage;
+  }
+
+  public double getSimilarity(Collection<User> cluster1,
+                              Collection<User> cluster2) throws TasteException {
+    if (cluster1.isEmpty() || cluster2.isEmpty()) {
+      return Double.NaN;
+    }
+    double leastCorrelation = Double.POSITIVE_INFINITY;
+    for (User user1 : cluster1) {
+      if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+        for (User user2 : cluster2) {
+          double theCorrelation = correlation.userCorrelation(user1, user2);
+          if (theCorrelation < leastCorrelation) {
+            leastCorrelation = theCorrelation;
+          }
+        }
+      }
+    }
+    // We skipped everything? well, at least try comparing the first Users to get some value
+    if (leastCorrelation == Double.POSITIVE_INFINITY) {
+      return correlation.userCorrelation(cluster1.iterator().next(), cluster2.iterator().next());
+    }
+    return leastCorrelation;
+  }
+
+  public void refresh() {
+    correlation.refresh();
+  }
+
+  @Override
+  public String toString() {
+    return "FarthestNeighborClusterSimilarity[correlation:" + correlation + ']';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,333 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.ItemCorrelation;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A simple {@link org.apache.mahout.cf.taste.recommender.Recommender} which uses a given
+ * {@link org.apache.mahout.cf.taste.model.DataModel} and {@link org.apache.mahout.cf.taste.correlation.ItemCorrelation}
+ * to produce recommendations. This class represents Taste's support for item-based recommenders.</p>
+ *
+ * <p>The {@link ItemCorrelation} is the most important point to discuss here. Item-based recommenders
+ * are useful because they can take advantage of something to be very fast: they base their computations
+ * on item correlation, not user correlation, and item correlation is relatively static. It can be
+ * precomputed, instead of re-computed in real time.</p>
+ *
+ * <p>Thus it's strongly recommended that you use {@link org.apache.mahout.cf.taste.impl.correlation.GenericItemCorrelation}
+ * with pre-computed correlations if you're going to use this class. You can use
+ * {@link org.apache.mahout.cf.taste.impl.correlation.PearsonCorrelation} too, which computes correlations in real-time,
+ * but will probably find this painfully slow for large amounts of data.</p>
+ */
+public final class GenericItemBasedRecommender extends AbstractRecommender implements ItemBasedRecommender {
+
+  private static final Logger log = Logger.getLogger(GenericItemBasedRecommender.class.getName());
+
+  private final ItemCorrelation correlation;
+  private final ReentrantLock refreshLock;
+
+  public GenericItemBasedRecommender(DataModel dataModel, ItemCorrelation correlation) {
+    super(dataModel);
+    if (correlation == null) {
+      throw new IllegalArgumentException("correlation is null");
+    }
+    this.correlation = correlation;
+    this.refreshLock = new ReentrantLock();
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+          throws TasteException {
+
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommending items for user ID '" + userID + '\'');
+    }
+
+    User theUser = getDataModel().getUser(userID);
+    if (getNumPreferences(theUser) == 0) {
+      return Collections.emptyList();
+    }
+
+    Set<Item> allItems = getAllOtherItems(theUser);
+
+    TopItems.Estimator<Item> estimator = new Estimator(theUser);
+
+    List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommendations are: " + topItems);
+    }
+    return topItems;
+  }
+
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    DataModel model = getDataModel();
+    User theUser = model.getUser(userID);
+    Preference actualPref = theUser.getPreferenceFor(itemID);
+    if (actualPref != null) {
+      return actualPref.getValue();
+    }
+    Item item = model.getItem(itemID);
+    return doEstimatePreference(theUser, item);
+  }
+
+  public List<RecommendedItem> mostSimilarItems(Object itemID, int howMany) throws TasteException {
+    return mostSimilarItems(itemID, howMany, NullRescorer.getItemItemPairInstance());
+  }
+
+  public List<RecommendedItem> mostSimilarItems(Object itemID,
+                                                int howMany,
+                                                Rescorer<Pair<Item, Item>> rescorer) throws TasteException {
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+    Item toItem = getDataModel().getItem(itemID);
+    TopItems.Estimator<Item> estimator = new MostSimilarEstimator(toItem, correlation, rescorer);
+    return doMostSimilarItems(itemID, howMany, estimator);
+  }
+
+  public List<RecommendedItem> mostSimilarItems(List<Object> itemIDs, int howMany) throws TasteException {
+    return mostSimilarItems(itemIDs, howMany, NullRescorer.getItemItemPairInstance());
+  }
+
+  public List<RecommendedItem> mostSimilarItems(List<Object> itemIDs,
+                                                int howMany,
+                                                Rescorer<Pair<Item, Item>> rescorer) throws TasteException {
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+    DataModel model = getDataModel();
+    List<Item> toItems = new ArrayList<Item>(itemIDs.size());
+    for (Object itemID : itemIDs) {
+      toItems.add(model.getItem(itemID));
+    }
+    TopItems.Estimator<Item> estimator = new MultiMostSimilarEstimator(toItems, correlation, rescorer);
+    Collection<Item> allItems = new HashSet<Item>(model.getNumItems());
+    for (Item item : model.getItems()) {
+      allItems.add(item);
+    }
+    for (Item item : toItems) {
+      allItems.remove(item);
+    }
+    return TopItems.getTopItems(howMany, allItems, NullRescorer.getItemInstance(), estimator);
+  }
+
+  public List<RecommendedItem> recommendedBecause(Object userID,
+                                                  Object itemID,
+                                                  int howMany) throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    if (itemID == null) {
+      throw new IllegalArgumentException("itemID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+
+    DataModel model = getDataModel();
+    User user = model.getUser(userID);
+    Item recommendedItem = model.getItem(itemID);
+    TopItems.Estimator<Item> estimator = new RecommendedBecauseEstimator(user, recommendedItem, correlation);
+
+    Collection<Item> allUserItems = new HashSet<Item>();
+    Preference[] prefs = user.getPreferencesAsArray();
+    for (int i = 0; i < prefs.length; i++) {
+      allUserItems.add(prefs[i].getItem());
+    }
+    allUserItems.remove(recommendedItem);
+
+    return TopItems.getTopItems(howMany, allUserItems, NullRescorer.getItemInstance(), estimator);
+  }
+
+  private List<RecommendedItem> doMostSimilarItems(Object itemID,
+                                                   int howMany,
+                                                   TopItems.Estimator<Item> estimator) throws TasteException {
+    DataModel model = getDataModel();
+    Item toItem = model.getItem(itemID);
+    Collection<Item> allItems = new HashSet<Item>(model.getNumItems());
+    for (Item item : model.getItems()) {
+      allItems.add(item);
+    }
+    allItems.remove(toItem);
+    return TopItems.getTopItems(howMany, allItems, NullRescorer.getItemInstance(), estimator);
+  }
+
+  private double doEstimatePreference(User theUser, Item item) throws TasteException {
+    double preference = 0.0;
+    double totalCorrelation = 0.0;
+    Preference[] prefs = theUser.getPreferencesAsArray();
+    for (int i = 0; i < prefs.length; i++) {
+      Preference pref = prefs[i];
+      double theCorrelation = correlation.itemCorrelation(item, pref.getItem());
+      if (!Double.isNaN(theCorrelation)) {
+        // Why + 1.0? correlation ranges from -1.0 to 1.0, and we want to use it as a simple
+        // weight. To avoid negative values, we add 1.0 to put it in
+        // the [0.0,2.0] range which is reasonable for weights
+        theCorrelation += 1.0;
+        preference += theCorrelation * pref.getValue();
+        totalCorrelation += theCorrelation;
+      }
+    }
+    return totalCorrelation == 0.0 ? Double.NaN : preference / totalCorrelation;
+  }
+
+  private static int getNumPreferences(User theUser) {
+    return theUser.getPreferencesAsArray().length;
+  }
+
+  @Override
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      super.refresh();
+      correlation.refresh();
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "GenericItemBasedRecommender[correlation:" + correlation + ']';
+  }
+
+  private static class MostSimilarEstimator implements TopItems.Estimator<Item> {
+
+    private final Item toItem;
+    private final ItemCorrelation correlation;
+    private final Rescorer<Pair<Item, Item>> rescorer;
+
+    private MostSimilarEstimator(Item toItem,
+                                 ItemCorrelation correlation,
+                                 Rescorer<Pair<Item, Item>> rescorer) {
+      this.toItem = toItem;
+      this.correlation = correlation;
+      this.rescorer = rescorer;
+    }
+
+    public double estimate(Item item) throws TasteException {
+      Pair<Item, Item> pair = new Pair<Item, Item>(toItem, item);
+      if (rescorer.isFiltered(pair)) {
+        return Double.NaN;
+      }
+      double originalEstimate = correlation.itemCorrelation(toItem, item);
+      return rescorer.rescore(pair, originalEstimate);
+    }
+  }
+
+  private final class Estimator implements TopItems.Estimator<Item> {
+
+    private final User theUser;
+
+    private Estimator(User theUser) {
+      this.theUser = theUser;
+    }
+
+    public double estimate(Item item) throws TasteException {
+      return doEstimatePreference(theUser, item);
+    }
+  }
+
+  private static class MultiMostSimilarEstimator implements TopItems.Estimator<Item> {
+
+    private final List<Item> toItems;
+    private final ItemCorrelation correlation;
+    private final Rescorer<Pair<Item, Item>> rescorer;
+
+    private MultiMostSimilarEstimator(List<Item> toItems,
+                                      ItemCorrelation correlation,
+                                      Rescorer<Pair<Item, Item>> rescorer) {
+      this.toItems = toItems;
+      this.correlation = correlation;
+      this.rescorer = rescorer;
+    }
+
+    public double estimate(Item item) throws TasteException {
+      RunningAverage average = new FullRunningAverage();
+      for (Item toItem : toItems) {
+        Pair<Item, Item> pair = new Pair<Item, Item>(toItem, item);
+        if (rescorer.isFiltered(pair)) {
+          continue;
+        }
+        double estimate = correlation.itemCorrelation(toItem, item);
+        estimate = rescorer.rescore(pair, estimate);
+        average.addDatum(estimate);
+      }
+      return average.getAverage();
+    }
+  }
+
+  private static class RecommendedBecauseEstimator implements TopItems.Estimator<Item> {
+
+    private final User user;
+    private final Item recommendedItem;
+    private final ItemCorrelation correlation;
+
+    private RecommendedBecauseEstimator(User user,
+                                        Item recommendedItem,
+                                        ItemCorrelation correlation) {
+      this.user = user;
+      this.recommendedItem = recommendedItem;
+      this.correlation = correlation;
+    }
+
+    public double estimate(Item item) throws TasteException {
+      Preference pref = user.getPreferenceFor(item.getID());
+      if (pref == null) {
+        return Double.NaN;
+      }
+      double correlationValue = correlation.itemCorrelation(recommendedItem, item);
+      return (1.0 + correlationValue) * pref.getValue();
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java Fri May  9 14:35:12 2008
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+
+import java.io.Serializable;
+
+/**
+ * <p>A simple implementation of {@link RecommendedItem}.</p>
+ */
+public final class GenericRecommendedItem implements RecommendedItem, Serializable {
+
+  private final Item item;
+  private final double value;
+
+  /**
+   * @param item
+   * @param value
+   * @throws IllegalArgumentException if item is null or value is NaN
+   */
+  public GenericRecommendedItem(Item item, double value) {
+    if (item == null) {
+      throw new IllegalArgumentException("item is null");
+    }
+    if (Double.isNaN(value)) {
+      throw new IllegalArgumentException("value is NaN");
+    }
+    this.item = item;
+    this.value = value;
+  }
+
+  public Item getItem() {
+    return item;
+  }
+
+  public double getValue() {
+    return value;
+  }
+
+  @Override
+  public String toString() {
+    return "RecommendedItem[item:" + item + ", value:" + value + ']';
+  }
+
+  @Override
+  public int hashCode() {
+    return item.hashCode() ^ Double.valueOf(value).hashCode();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof GenericRecommendedItem)) {
+      return false;
+    }
+    GenericRecommendedItem other = (GenericRecommendedItem) o;
+    return item.equals(other.item) && value == other.value;
+  }
+
+  /**
+   * Defines a natural ordering from most-preferred item (highest value) to least-preferred.
+   *
+   * @param other
+   * @return 1, -1, 0 as this value is less than, greater than or equal to the other's value
+   */
+  public int compareTo(RecommendedItem other) {
+    double otherValue = other.getValue();
+    if (value < otherValue) {
+      return 1;
+    } else if (value > otherValue) {
+      return -1;
+    } else {
+      return 0;
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,242 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A simple {@link Recommender} which uses a given {@link DataModel} and {@link UserNeighborhood}
+ * to produce recommendations.</p>
+ */
+public final class GenericUserBasedRecommender extends AbstractRecommender implements UserBasedRecommender {
+
+  private static final Logger log = Logger.getLogger(GenericUserBasedRecommender.class.getName());
+
+  private final UserNeighborhood neighborhood;
+  private final UserCorrelation correlation;
+  private final ReentrantLock refreshLock;
+
+  public GenericUserBasedRecommender(DataModel dataModel,
+                                     UserNeighborhood neighborhood,
+                                     UserCorrelation correlation) {
+    super(dataModel);
+    if (neighborhood == null) {
+      throw new IllegalArgumentException("neighborhood is null");
+    }
+    this.neighborhood = neighborhood;
+    this.correlation = correlation;
+    this.refreshLock = new ReentrantLock();
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+          throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommending items for user ID '" + userID + '\'');
+    }
+
+    User theUser = getDataModel().getUser(userID);
+    Collection<User> theNeighborhood = neighborhood.getUserNeighborhood(userID);
+    if (log.isLoggable(Level.FINER)) {
+      log.finer("UserNeighborhood is: " + neighborhood);
+    }
+
+    if (theNeighborhood.isEmpty()) {
+      return Collections.emptyList();
+    }
+
+    Set<Item> allItems = getAllOtherItems(theNeighborhood, theUser);
+    if (log.isLoggable(Level.FINER)) {
+      log.finer("Items in neighborhood which user doesn't prefer already are: " + allItems);
+    }
+
+    TopItems.Estimator<Item> estimator = new Estimator(theUser, theNeighborhood);
+
+    List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommendations are: " + topItems);
+    }
+    return topItems;
+  }
+
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    DataModel model = getDataModel();
+    User theUser = model.getUser(userID);
+    Preference actualPref = theUser.getPreferenceFor(itemID);
+    if (actualPref != null) {
+      return actualPref.getValue();
+    }
+    Collection<User> theNeighborhood = neighborhood.getUserNeighborhood(userID);
+    Item item = model.getItem(itemID);
+    return doEstimatePreference(theUser, theNeighborhood, item);
+  }
+
+  public List<User> mostSimilarUsers(Object userID, int howMany) throws TasteException {
+    return mostSimilarUsers(userID, howMany, NullRescorer.getUserUserPairInstance());
+  }
+
+  public List<User> mostSimilarUsers(Object userID,
+                                     int howMany,
+                                     Rescorer<Pair<User, User>> rescorer) throws TasteException {
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+    User toUser = getDataModel().getUser(userID);
+    TopItems.Estimator<User> estimator = new MostSimilarEstimator(toUser, correlation, rescorer);
+    return doMostSimilarUsers(userID, howMany, estimator);
+  }
+
+  private List<User> doMostSimilarUsers(Object userID,
+                                        int howMany,
+                                        TopItems.Estimator<User> estimator) throws TasteException {
+    DataModel model = getDataModel();
+    User toUser = model.getUser(userID);
+    Collection<User> allUsers = new HashSet<User>(model.getNumUsers());
+    for (User user : model.getUsers()) {
+      allUsers.add(user);
+    }
+    allUsers.remove(toUser);
+    return TopItems.getTopUsers(howMany, allUsers, NullRescorer.getUserInstance(), estimator);
+  }
+
+  private double doEstimatePreference(User theUser, Collection<User> theNeighborhood, Item item)
+          throws TasteException {
+    if (theNeighborhood.isEmpty()) {
+      return Double.NaN;
+    }
+    double preference = 0.0;
+    double totalCorrelation = 0.0;
+    for (User user : theNeighborhood) {
+      if (!user.equals(theUser)) {
+        // See GenericItemBasedRecommender.doEstimatePreference() too
+        Preference pref = user.getPreferenceFor(item.getID());
+        if (pref != null) {
+          double theCorrelation = correlation.userCorrelation(theUser, user) + 1.0;
+          if (!Double.isNaN(theCorrelation)) {
+            preference += theCorrelation * pref.getValue();
+            totalCorrelation += theCorrelation;
+          }
+        }
+      }
+    }
+    return totalCorrelation == 0.0 ? Double.NaN : preference / totalCorrelation;
+  }
+
+  private static Set<Item> getAllOtherItems(Iterable<User> theNeighborhood, User theUser) {
+    Set<Item> allItems = new HashSet<Item>();
+    for (User user : theNeighborhood) {
+      Preference[] prefs = user.getPreferencesAsArray();
+      for (int i = 0; i < prefs.length; i++) {
+        Item item = prefs[i].getItem();
+        // If not already preferred by the user, add it
+        if (theUser.getPreferenceFor(item.getID()) == null) {
+          allItems.add(item);
+        }
+      }
+    }
+    return allItems;
+  }
+
+  @Override
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      super.refresh();
+      neighborhood.refresh();
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "GenericUserBasedRecommender[neighborhood:" + neighborhood + ']';
+  }
+
+  private static class MostSimilarEstimator implements TopItems.Estimator<User> {
+
+    private final User toUser;
+    private final UserCorrelation correlation;
+    private final Rescorer<Pair<User, User>> rescorer;
+
+    private MostSimilarEstimator(User toUser,
+                                 UserCorrelation correlation,
+                                 Rescorer<Pair<User, User>> rescorer) {
+      this.toUser = toUser;
+      this.correlation = correlation;
+      this.rescorer = rescorer;
+    }
+
+    public double estimate(User user) throws TasteException {
+      Pair<User, User> pair = new Pair<User, User>(toUser, user);
+      if (rescorer.isFiltered(pair)) {
+        return Double.NaN;
+      }
+      double originalEstimate = correlation.userCorrelation(toUser, user);
+      return rescorer.rescore(pair, originalEstimate);
+    }
+  }
+
+  private final class Estimator implements TopItems.Estimator<Item> {
+
+    private final User theUser;
+    private final Collection<User> theNeighborhood;
+
+    Estimator(User theUser, Collection<User> theNeighborhood) {
+      this.theUser = theUser;
+      this.theNeighborhood = theNeighborhood;
+    }
+
+    public double estimate(Item item) throws TasteException {
+      return doEstimatePreference(theUser, theNeighborhood, item);
+    }
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,220 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A simple recommender that always estimates preference for an {@link Item} to be the average of
+ * all known preference values for that {@link Item}. No information about {@link User}s is taken into
+ * account. This implementation is provided for experimentation; while simple and fast, it may not
+ * produce very good recommendations.</p>
+ */
+public final class ItemAverageRecommender extends AbstractRecommender {
+
+  private static final Logger log = Logger.getLogger(ItemAverageRecommender.class.getName());
+
+  private final Map<Object, RunningAverage> itemAverages;
+  private boolean averagesBuilt;
+  private final ReentrantLock refreshLock;
+  private final ReadWriteLock buildAveragesLock;
+
+  public ItemAverageRecommender(DataModel dataModel) {
+    super(dataModel);
+    this.itemAverages = new HashMap<Object, RunningAverage>(1003);
+    this.refreshLock = new ReentrantLock();
+    this.buildAveragesLock = new ReentrantReadWriteLock();
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+          throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommending items for user ID '" + userID + '\'');
+    }
+    checkAverageDiffsBuilt();
+
+    User theUser = getDataModel().getUser(userID);
+    Set<Item> allItems = getAllOtherItems(theUser);
+
+    TopItems.Estimator<Item> estimator = new Estimator();
+
+    List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommendations are: " + topItems);
+    }
+    return topItems;
+  }
+
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    DataModel model = getDataModel();
+    User theUser = model.getUser(userID);
+    Preference actualPref = theUser.getPreferenceFor(itemID);
+    if (actualPref != null) {
+      return actualPref.getValue();
+    }
+    checkAverageDiffsBuilt();
+    return doEstimatePreference(itemID);
+  }
+
+  private double doEstimatePreference(Object itemID) {
+    buildAveragesLock.readLock().lock();
+    try {
+      RunningAverage average = itemAverages.get(itemID);
+      return average == null ? Double.NaN : average.getAverage();
+    } finally {
+      buildAveragesLock.readLock().unlock();
+    }
+  }
+
+  private void checkAverageDiffsBuilt() throws TasteException {
+    if (!averagesBuilt) {
+      buildAverageDiffs();
+    }
+  }
+
+  private void buildAverageDiffs() throws TasteException {
+    try {
+      buildAveragesLock.writeLock().lock();
+      DataModel dataModel = getDataModel();
+      for (User user : dataModel.getUsers()) {
+        Preference[] prefs = user.getPreferencesAsArray();
+        for (int i = 0; i < prefs.length; i++) {
+          Preference pref = prefs[i];
+          Object itemID = pref.getItem().getID();
+          RunningAverage average = itemAverages.get(itemID);
+          if (average == null) {
+            average = new FullRunningAverage();
+            itemAverages.put(itemID, average);
+          }
+          average.addDatum(pref.getValue());
+        }
+      }
+      averagesBuilt = true;
+    } finally {
+      buildAveragesLock.writeLock().unlock();
+    }
+  }
+
+  @Override
+  public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+    DataModel dataModel = getDataModel();
+    double prefDelta;
+    try {
+      User theUser = dataModel.getUser(userID);
+      Preference oldPref = theUser.getPreferenceFor(itemID);
+      prefDelta = oldPref == null ? value : value - oldPref.getValue();
+    } catch (NoSuchElementException nsee) {
+      prefDelta = value;
+    }
+    super.setPreference(userID, itemID, value);
+    try {
+      buildAveragesLock.writeLock().lock();
+      RunningAverage average = itemAverages.get(itemID);
+      if (average == null) {
+        RunningAverage newAverage = new FullRunningAverage();
+        newAverage.addDatum(prefDelta);
+        itemAverages.put(itemID, newAverage);
+      } else {
+        average.changeDatum(prefDelta);
+      }
+    } finally {
+      buildAveragesLock.writeLock().unlock();
+    }
+  }
+
+  @Override
+  public void removePreference(Object userID, Object itemID) throws TasteException {
+    DataModel dataModel = getDataModel();
+    User theUser = dataModel.getUser(userID);
+    Preference oldPref = theUser.getPreferenceFor(itemID);
+    super.removePreference(userID, itemID);
+    if (oldPref != null) {
+      try {
+        buildAveragesLock.writeLock().lock();
+        RunningAverage average = itemAverages.get(itemID);
+        if (average == null) {
+          throw new IllegalStateException("No preferences exist for item ID: " + itemID);
+        } else {
+          average.removeDatum(oldPref.getValue());
+        }
+      } finally {
+        buildAveragesLock.writeLock().unlock();
+      }
+    }
+  }
+
+  @Override
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      super.refresh();
+      try {
+        buildAverageDiffs();
+      } catch (TasteException te) {
+        log.log(Level.WARNING, "Unexpected excpetion while refreshing", te);
+      }
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "ItemAverageRecommender";
+  }
+
+  private final class Estimator implements TopItems.Estimator<Item> {
+
+    public double estimate(Item item) {
+      return doEstimatePreference(item.getID());
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,264 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>Like {@link ItemAverageRecommender}, except that estimated preferences are adjusted for the
+ * {@link User}s' average preference value. For example, say user X has not rated item Y. Item Y's
+ * average preference value is 3.5. User X's average preference value is 4.2, and the average over all
+ * preference values is 4.0. User X prefers items 0.2 higher on average, so, the estimated preference
+ * for user X, item Y is 3.5 + 0.2 = 3.7.</p>
+ */
+public final class ItemUserAverageRecommender extends AbstractRecommender {
+
+  private static final Logger log = Logger.getLogger(ItemUserAverageRecommender.class.getName());
+
+  private final Map<Object, RunningAverage> itemAverages;
+  private final Map<Object, RunningAverage> userAverages;
+  private final RunningAverage overallAveragePrefValue;
+  private boolean averagesBuilt;
+  private final ReentrantLock refreshLock;
+  private final ReadWriteLock buildAveragesLock;
+
+  public ItemUserAverageRecommender(DataModel dataModel) {
+    super(dataModel);
+    this.itemAverages = new HashMap<Object, RunningAverage>(1003);
+    this.userAverages = new HashMap<Object, RunningAverage>(1003);
+    this.overallAveragePrefValue = new FullRunningAverage();
+    this.refreshLock = new ReentrantLock();
+    this.buildAveragesLock = new ReentrantReadWriteLock();
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+          throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommending items for user ID '" + userID + '\'');
+    }
+    checkAverageDiffsBuilt();
+
+    User theUser = getDataModel().getUser(userID);
+    Set<Item> allItems = getAllOtherItems(theUser);
+
+    TopItems.Estimator<Item> estimator = new Estimator(userID);
+
+    List<RecommendedItem> topItems = TopItems.getTopItems(howMany, allItems, rescorer, estimator);
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommendations are: " + topItems);
+    }
+    return topItems;
+  }
+
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    DataModel model = getDataModel();
+    User theUser = model.getUser(userID);
+    Preference actualPref = theUser.getPreferenceFor(itemID);
+    if (actualPref != null) {
+      return actualPref.getValue();
+    }
+    checkAverageDiffsBuilt();
+    return doEstimatePreference(userID, itemID);
+  }
+
+  private double doEstimatePreference(Object userID, Object itemID) {
+    buildAveragesLock.readLock().lock();
+    try {
+      RunningAverage itemAverage = itemAverages.get(itemID);
+      if (itemAverage == null) {
+        return Double.NaN;
+      }
+      RunningAverage userAverage = userAverages.get(userID);
+      if (userAverage == null) {
+        return Double.NaN;
+      }
+      double userDiff = userAverage.getAverage() - overallAveragePrefValue.getAverage();
+      return itemAverage.getAverage() + userDiff;
+    } finally {
+      buildAveragesLock.readLock().unlock();
+    }
+  }
+
+  private void checkAverageDiffsBuilt() throws TasteException {
+    if (!averagesBuilt) {
+      buildAverageDiffs();
+    }
+  }
+
+  private void buildAverageDiffs() throws TasteException {
+    try {
+      buildAveragesLock.writeLock().lock();
+      DataModel dataModel = getDataModel();
+      for (User user : dataModel.getUsers()) {
+        Object userID = user.getID();
+        Preference[] prefs = user.getPreferencesAsArray();
+        for (int i = 0; i < prefs.length; i++) {
+          Preference pref = prefs[i];
+          Object itemID = pref.getItem().getID();
+          double value = pref.getValue();
+          addDatumAndCrateIfNeeded(itemID, value, itemAverages);
+          addDatumAndCrateIfNeeded(userID, value, userAverages);
+          overallAveragePrefValue.addDatum(value);
+        }
+      }
+      averagesBuilt = true;
+    } finally {
+      buildAveragesLock.writeLock().unlock();
+    }
+  }
+
+  private static void addDatumAndCrateIfNeeded(Object itemID,
+                                               double value,
+                                               Map<Object, RunningAverage> averages) {
+    RunningAverage itemAverage = averages.get(itemID);
+    if (itemAverage == null) {
+      itemAverage = new FullRunningAverage();
+      averages.put(itemID, itemAverage);
+    }
+    itemAverage.addDatum(value);
+  }
+
+  @Override
+  public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+    DataModel dataModel = getDataModel();
+    double prefDelta;
+    try {
+      User theUser = dataModel.getUser(userID);
+      Preference oldPref = theUser.getPreferenceFor(itemID);
+      prefDelta = oldPref == null ? value : value - oldPref.getValue();
+    } catch (NoSuchElementException nsee) {
+      prefDelta = value;
+    }
+    super.setPreference(userID, itemID, value);
+    try {
+      buildAveragesLock.writeLock().lock();
+      RunningAverage itemAverage = itemAverages.get(itemID);
+      if (itemAverage == null) {
+        RunningAverage newItemAverage = new FullRunningAverage();
+        newItemAverage.addDatum(prefDelta);
+        itemAverages.put(itemID, newItemAverage);
+      } else {
+        itemAverage.changeDatum(prefDelta);
+      }
+      RunningAverage userAverage = userAverages.get(userID);
+      if (userAverage == null) {
+        RunningAverage newUserAveragae = new FullRunningAverage();
+        newUserAveragae.addDatum(prefDelta);
+        userAverages.put(userID, newUserAveragae);
+      } else {
+        userAverage.changeDatum(prefDelta);
+      }
+      overallAveragePrefValue.changeDatum(prefDelta);
+    } finally {
+      buildAveragesLock.writeLock().unlock();
+    }
+  }
+
+  @Override
+  public void removePreference(Object userID, Object itemID) throws TasteException {
+    DataModel dataModel = getDataModel();
+    User theUser = dataModel.getUser(userID);
+    Preference oldPref = theUser.getPreferenceFor(itemID);
+    super.removePreference(userID, itemID);
+    if (oldPref != null) {
+      double value = oldPref.getValue();
+      try {
+        buildAveragesLock.writeLock().lock();
+        RunningAverage itemAverage = itemAverages.get(itemID);
+        if (itemAverage == null) {
+          throw new IllegalStateException("No preferences exist for item ID: " + itemID);
+        }
+        itemAverage.removeDatum(value);
+        RunningAverage userAverage = userAverages.get(userID);
+        if (userAverage == null) {
+          throw new IllegalStateException("No preferences exist for user ID: " + userID);
+        }
+        userAverage.removeDatum(value);
+        overallAveragePrefValue.removeDatum(value);
+      } finally {
+        buildAveragesLock.writeLock().unlock();
+      }
+    }
+  }
+
+  @Override
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      super.refresh();
+      try {
+        buildAverageDiffs();
+      } catch (TasteException te) {
+        log.log(Level.WARNING, "Unexpected excpetion while refreshing", te);
+      }
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "ItemUserAverageRecommender";
+  }
+
+  private final class Estimator implements TopItems.Estimator<Item> {
+
+    private final Object userID;
+
+    private Estimator(Object userID) {
+      this.userID = userID;
+    }
+
+    public double estimate(Item item) {
+      return doEstimatePreference(userID, item.getID());
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java Fri May  9 14:35:12 2008
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.Collection;
+
+/**
+ * <p>Defines cluster similarity as the <em>largest</em> correlation between any two
+ * {@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that is, it says that clusters are close
+ * when <em>some pair</em> of their members has high correlation.</p>
+ */
+public final class NearestNeighborClusterSimilarity implements ClusterSimilarity {
+
+  private final UserCorrelation correlation;
+  private final double samplingPercentage;
+
+  /**
+   * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+   * All user-user correlations are examined.</p>
+   *
+   * @param correlation
+   */
+  public NearestNeighborClusterSimilarity(UserCorrelation correlation) {
+    this(correlation, 1.0);
+  }
+
+  /**
+   * <p>Constructs a {@link NearestNeighborClusterSimilarity} based on the given {@link UserCorrelation}.
+   * By setting <code>samplingPercentage</code> to a value less than 1.0, this implementation will only examine
+   * that fraction of all user-user correlations between two clusters, increasing performance at the expense
+   * of accuracy.</p>
+   *
+   * @param correlation
+   * @param samplingPercentage
+   */
+  public NearestNeighborClusterSimilarity(UserCorrelation correlation, double samplingPercentage) {
+    if (correlation == null) {
+      throw new IllegalArgumentException("correlation is null");
+    }
+    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    }
+    this.correlation = correlation;
+    this.samplingPercentage = samplingPercentage;
+  }
+
+  public double getSimilarity(Collection<User> cluster1,
+                              Collection<User> cluster2) throws TasteException {
+    if (cluster1.isEmpty() || cluster2.isEmpty()) {
+      return Double.NaN;
+    }
+    double greatestCorrelation = Double.NEGATIVE_INFINITY;
+    for (User user1 : cluster1) {
+      if (samplingPercentage >= 1.0 || Math.random() < samplingPercentage) {
+        for (User user2 : cluster2) {
+          double theCorrelation = correlation.userCorrelation(user1, user2);
+          if (theCorrelation > greatestCorrelation) {
+            greatestCorrelation = theCorrelation;
+          }
+        }
+      }
+    }
+    // We skipped everything? well, at least try comparing the first Users to get some value
+    if (greatestCorrelation == Double.NEGATIVE_INFINITY) {
+      return correlation.userCorrelation(cluster1.iterator().next(), cluster2.iterator().next());
+    }
+    return greatestCorrelation;
+  }
+
+  public void refresh() {
+    correlation.refresh();
+  }
+
+  @Override
+  public String toString() {
+    return "NearestNeighborClusterSimilarity[correlation:" + correlation + ']';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java Fri May  9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+/**
+ * <p>A simple {@link Rescorer} which always returns the original score.</p>
+ */
+public final class NullRescorer<T> implements Rescorer<T> {
+
+  private static final Rescorer<Item> itemInstance = new NullRescorer<Item>();
+  private static final Rescorer<User> userInstance = new NullRescorer<User>();
+  private static final Rescorer<Pair<Item, Item>> itemItemPairInstance = new NullRescorer<Pair<Item, Item>>();
+  private static final Rescorer<Pair<User, User>> userUserPairInstance = new NullRescorer<Pair<User, User>>();
+
+  public static Rescorer<Item> getItemInstance() {
+    return itemInstance;
+  }
+
+  public static Rescorer<User> getUserInstance() {
+    return userInstance;
+  }
+
+  public static Rescorer<Pair<Item, Item>> getItemItemPairInstance() {
+    return itemItemPairInstance;
+  }
+
+  public static Rescorer<Pair<User, User>> getUserUserPairInstance() {
+    return userUserPairInstance;
+  }
+
+  private NullRescorer() {
+    // do nothing
+  }
+
+  /**
+   * @param thing to rescore
+   * @param originalScore current score for {@link Item}
+   * @return same originalScore as new score, always
+   */
+  public double rescore(T thing, double originalScore) {
+    return originalScore;
+  }
+
+  public boolean isFiltered(T thing) {
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    return "NullRescorer";
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java Fri May  9 14:35:12 2008
@@ -0,0 +1,209 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.correlation.GenericItemCorrelation;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * <p>A simple class that refactors the "find top N recommended items" logic that is used in
+ * several places in Taste.</p>
+ */
+public final class TopItems {
+
+  private TopItems() {
+  }
+
+  public static List<RecommendedItem> getTopItems(int howMany,
+                                                  Iterable<Item> allItems,
+                                                  Rescorer<Item> rescorer,
+                                                  Estimator<Item> estimator) throws TasteException {
+    if (allItems == null || rescorer == null || estimator == null) {
+      throw new IllegalArgumentException("argument is null");
+    }
+    LinkedList<RecommendedItem> topItems = new LinkedList<RecommendedItem>();
+    boolean full = false;
+    for (Item item : allItems) {
+      if (item.isRecommendable() && !rescorer.isFiltered(item)) {
+        double preference = estimator.estimate(item);
+        double rescoredPref = rescorer.rescore(item, preference);
+        if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > topItems.getLast().getValue())) {
+          // I think this is faster than Collections.binarySearch() over a LinkedList since our
+          // comparisons are cheap, which binarySearch() economizes at the expense of more traversals.
+          // We also know that the right position tends to be at the end of the list.
+          ListIterator<RecommendedItem> iterator = topItems.listIterator(topItems.size());
+          while (iterator.hasPrevious()) {
+            if (rescoredPref <= iterator.previous().getValue()) {
+              iterator.next();
+              break;
+            }
+          }
+          iterator.add(new GenericRecommendedItem(item, rescoredPref));
+          if (full) {
+            topItems.removeLast();
+          } else if (topItems.size() > howMany) {
+            full = true;
+            topItems.removeLast();
+          }
+        }
+      }
+    }
+    return topItems;
+  }
+
+  public static List<User> getTopUsers(int howMany,
+                                       Iterable<User> allUsers,
+                                       Rescorer<User> rescorer,
+                                       Estimator<User> estimator) throws TasteException {
+    LinkedList<SimilarUser> topUsers = new LinkedList<SimilarUser>();
+    boolean full = false;
+    for (User user : allUsers) {
+      if (rescorer.isFiltered(user)) {
+        continue;
+      }
+      double similarity = estimator.estimate(user);
+      double rescoredSimilarity = rescorer.rescore(user, similarity);
+      if (!Double.isNaN(rescoredSimilarity) &&
+          (!full || rescoredSimilarity > topUsers.getLast().getSimilarity())) {
+        ListIterator<SimilarUser> iterator = topUsers.listIterator(topUsers.size());
+        while (iterator.hasPrevious()) {
+          if (rescoredSimilarity <= iterator.previous().getSimilarity()) {
+            iterator.next();
+            break;
+          }
+        }
+        iterator.add(new SimilarUser(user, similarity));
+        if (full) {
+          topUsers.removeLast();
+        } else if (topUsers.size() > howMany) {
+          full = true;
+          topUsers.removeLast();
+        }
+      }
+    }
+    List<User> result = new ArrayList<User>(topUsers.size());
+    for (SimilarUser similarUser : topUsers) {
+      result.add(similarUser.getUser());
+    }
+    return result;
+  }
+
+  /**
+   * <p>Thanks to tsmorton for suggesting this functionality and writing part of the code.</p>
+   *
+   * @see GenericItemCorrelation#GenericItemCorrelation(Iterable, int)
+   * @see GenericItemCorrelation#GenericItemCorrelation(org.apache.mahout.cf.taste.correlation.ItemCorrelation , org.apache.mahout.cf.taste.model.DataModel , int)
+   */
+  public static List<GenericItemCorrelation.ItemItemCorrelation> getTopItemItemCorrelations(
+          int howMany, Iterable<GenericItemCorrelation.ItemItemCorrelation> allCorrelations) {
+    LinkedList<GenericItemCorrelation.ItemItemCorrelation> topCorrelations =
+            new LinkedList<GenericItemCorrelation.ItemItemCorrelation>();
+    boolean full = false;
+    for (GenericItemCorrelation.ItemItemCorrelation correlation : allCorrelations) {
+      double value = correlation.getValue();
+      if (!full || value > topCorrelations.getLast().getValue()) {
+        ListIterator<GenericItemCorrelation.ItemItemCorrelation> iterator =
+                topCorrelations.listIterator(topCorrelations.size());
+        while (iterator.hasPrevious()) {
+          if (value <= iterator.previous().getValue()) {
+            iterator.next();
+            break;
+          }
+        }
+        iterator.add(correlation);
+        if (full) {
+          topCorrelations.removeLast();
+        } else if (topCorrelations.size() > howMany) {
+          full = true;
+          topCorrelations.removeLast();
+        }
+      }
+    }
+    return topCorrelations;
+  }
+
+  public static interface Estimator<T> {
+
+    double estimate(T thing) throws TasteException;
+  }
+
+  // Hmm, should this be exposed publicly like RecommendedItem?
+  private static class SimilarUser implements User {
+
+    private final User user;
+    private final double similarity;
+
+    private SimilarUser(User user, double similarity) {
+      this.user = user;
+      this.similarity = similarity;
+    }
+
+    public Object getID() {
+      return user.getID();
+    }
+
+    public Preference getPreferenceFor(Object itemID) {
+      return user.getPreferenceFor(itemID);
+    }
+
+    public Iterable<Preference> getPreferences() {
+      return user.getPreferences();
+    }
+
+    public Preference[] getPreferencesAsArray() {
+      return user.getPreferencesAsArray();
+    }
+
+    public User getUser() {
+      return user;
+    }
+
+    public double getSimilarity() {
+      return similarity;
+    }
+
+    @Override
+    public int hashCode() {
+      return user.hashCode() ^ Double.valueOf(similarity).hashCode();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof SimilarUser)) {
+        return false;
+      }
+      SimilarUser other = (SimilarUser) o;
+      return user.equals(other.user) && similarity == other.similarity;
+    }
+
+    public int compareTo(User user) {
+      return this.user.compareTo(user);
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,431 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.ClusteringRecommender;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A {@link org.apache.mahout.cf.taste.recommender.Recommender} that clusters {@link User}s, then determines
+ * the clusters' top recommendations. This implementation builds clusters by repeatedly merging clusters
+ * until only a certain number remain, meaning that each cluster is sort of a tree of other clusters.</p>
+ *
+ * <p>This {@link org.apache.mahout.cf.taste.recommender.Recommender} therefore has a few properties to note:</p>
+ * <ul>
+ * <li>For all {@link User}s in a cluster, recommendations will be the same</li>
+ * <li>{@link #estimatePreference(Object, Object)} may well return {@link Double#NaN}; it does so when asked
+ * to estimate preference for an {@link Item} for which no preference is expressed in the {@link User}s in
+ * the cluster.</li>
+ * </ul>
+ */
+public final class TreeClusteringRecommender extends AbstractRecommender implements ClusteringRecommender {
+
+  private static final Logger log = Logger.getLogger(TreeClusteringRecommender.class.getName());
+
+  private final ClusterSimilarity clusterSimilarity;
+  private final int numClusters;
+  private final double clusteringThreshold;
+  private final boolean clusteringByThreshold;
+  private final double samplingPercentage;
+  private Map<Object, List<RecommendedItem>> topRecsByUserID;
+  private Collection<Collection<User>> allClusters;
+  private Map<Object, Collection<User>> clustersByUserID;
+  private boolean clustersBuilt;
+  private final ReentrantLock refreshLock;
+  private final ReentrantLock buildClustersLock;
+
+  /**
+   * @param dataModel {@link DataModel} which provdes {@link User}s
+   * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+   * @param numClusters desired number of clusters to create
+   * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is
+   * less than 2
+   */
+  public TreeClusteringRecommender(DataModel dataModel,
+                                   ClusterSimilarity clusterSimilarity,
+                                   int numClusters) {
+    this(dataModel, clusterSimilarity, numClusters, 1.0);
+  }
+
+  /**
+   * @param dataModel {@link DataModel} which provdes {@link User}s
+   * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+   * @param numClusters desired number of clusters to create
+   * @param samplingPercentage percentage of all cluster-cluster pairs to consider when finding
+   * next-most-similar clusters. Decreasing this value from 1.0 can increase performance at the
+   * cost of accuracy
+   * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>numClusters</code> is
+   * less than 2, or samplingPercentage is {@link Double#NaN} or nonpositive or greater than 1.0
+   */
+  public TreeClusteringRecommender(DataModel dataModel,
+                                   ClusterSimilarity clusterSimilarity,
+                                   int numClusters,
+                                   double samplingPercentage) {
+    super(dataModel);
+    if (clusterSimilarity == null) {
+      throw new IllegalArgumentException("clusterSimilarity is null");
+    }
+    if (numClusters < 2) {
+      throw new IllegalArgumentException("numClusters must be at least 2");
+    }
+    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    }
+    this.clusterSimilarity = clusterSimilarity;
+    this.numClusters = numClusters;
+    this.clusteringThreshold = Double.NaN;
+    this.clusteringByThreshold = false;
+    this.samplingPercentage = samplingPercentage;
+    this.refreshLock = new ReentrantLock();
+    this.buildClustersLock = new ReentrantLock();
+  }
+
+  /**
+   * @param dataModel {@link DataModel} which provdes {@link User}s
+   * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+   * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger
+   * clusters until the next two nearest clusters' similarity drops below this threshold
+   * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>clusteringThreshold</code> is
+   * {@link Double#NaN}
+   */
+  public TreeClusteringRecommender(DataModel dataModel,
+                                   ClusterSimilarity clusterSimilarity,
+                                   double clusteringThreshold) {
+    this(dataModel, clusterSimilarity, clusteringThreshold, 1.0);
+  }
+
+  /**
+   * @param dataModel {@link DataModel} which provdes {@link User}s
+   * @param clusterSimilarity {@link ClusterSimilarity} used to compute cluster similarity
+   * @param clusteringThreshold clustering similarity threshold; clusters will be aggregated into larger
+   * clusters until the next two nearest clusters' similarity drops below this threshold
+   * @param samplingPercentage percentage of all cluster-cluster pairs to consider when finding
+   * next-most-similar clusters. Decreasing this value from 1.0 can increase performance at the
+   * cost of accuracy
+   * @throws IllegalArgumentException if arguments are <code>null</code>, or <code>clusteringThreshold</code> is
+   * {@link Double#NaN}, or samplingPercentage is {@link Double#NaN} or nonpositive or greater than 1.0
+   */
+  public TreeClusteringRecommender(DataModel dataModel,
+                                   ClusterSimilarity clusterSimilarity,
+                                   double clusteringThreshold,
+                                   double samplingPercentage) {
+    super(dataModel);
+    if (clusterSimilarity == null) {
+      throw new IllegalArgumentException("clusterSimilarity is null");
+    }
+    if (Double.isNaN(clusteringThreshold)) {
+      throw new IllegalArgumentException("clusteringThreshold must not be NaN");
+    }
+    if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 || samplingPercentage > 1.0) {
+      throw new IllegalArgumentException("samplingPercentage is invalid: " + samplingPercentage);
+    }
+    this.clusterSimilarity = clusterSimilarity;
+    this.numClusters = Integer.MIN_VALUE;
+    this.clusteringThreshold = clusteringThreshold;
+    this.clusteringByThreshold = true;
+    this.samplingPercentage = samplingPercentage;
+    this.refreshLock = new ReentrantLock();
+    this.buildClustersLock = new ReentrantLock();
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+          throws TasteException {
+    if (userID == null || rescorer == null) {
+      throw new IllegalArgumentException("userID or rescorer is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+    checkClustersBuilt();
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommending items for user ID '" + userID + '\'');
+    }
+
+    List<RecommendedItem> recommended = topRecsByUserID.get(userID);
+    if (recommended == null) {
+      return Collections.emptyList();
+    }
+
+    User theUser = getDataModel().getUser(userID);
+    List<RecommendedItem> rescored = new ArrayList<RecommendedItem>(recommended.size());
+    // Only add items the user doesn't already have a preference for.
+    // And that the rescorer doesn't "reject".
+    for (RecommendedItem recommendedItem : recommended) {
+      Item item = recommendedItem.getItem();
+      if (rescorer.isFiltered(item)) {
+        continue;
+      }
+      if (theUser.getPreferenceFor(item.getID()) == null &&
+          !Double.isNaN(rescorer.rescore(item, recommendedItem.getValue()))) {
+        rescored.add(recommendedItem);
+      }
+    }
+    Collections.sort(rescored, new ByRescoreComparator(rescorer));
+
+    return rescored;
+  }
+
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or itemID is null");
+    }
+    DataModel model = getDataModel();
+    User theUser = model.getUser(userID);
+    Preference actualPref = theUser.getPreferenceFor(itemID);
+    if (actualPref != null) {
+      return actualPref.getValue();
+    }
+    checkClustersBuilt();
+    List<RecommendedItem> topRecsForUser = topRecsByUserID.get(userID);
+    if (topRecsForUser != null) {
+      for (RecommendedItem item : topRecsForUser) {
+        if (itemID.equals(item.getItem().getID())) {
+          return item.getValue();
+        }
+      }
+    }
+    // Hmm, we have no idea. The item is not in the user's cluster
+    return Double.NaN;
+  }
+
+  public Collection<User> getCluster(Object userID) throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    checkClustersBuilt();
+    Collection<User> cluster = clustersByUserID.get(userID);
+    if (cluster == null) {
+      return Collections.emptyList();
+    } else {
+      return cluster;
+    }
+  }
+
+  public Collection<Collection<User>> getClusters() throws TasteException {
+    checkClustersBuilt();
+    return allClusters;
+  }
+
+  private void checkClustersBuilt() throws TasteException {
+    if (!clustersBuilt) {
+      buildClusters();
+    }
+  }
+
+  private void buildClusters() throws TasteException {
+    try {
+      buildClustersLock.lock();
+      DataModel model = getDataModel();
+      int numUsers = model.getNumUsers();
+      if (numUsers > 0) {
+        List<Collection<User>> newClusters = new ArrayList<Collection<User>>(numUsers);
+        if (numUsers == 1) {
+          User onlyUser = model.getUsers().iterator().next();
+          newClusters.add(Collections.singleton(onlyUser));
+        } else {
+          // Begin with a cluster for each user:
+          for (User user : model.getUsers()) {
+            Collection<User> newCluster = new HashSet<User>();
+            newCluster.add(user);
+            newClusters.add(newCluster);
+          }
+          if (clusteringByThreshold) {
+            Pair<Collection<User>, Collection<User>> nearestPair = findNearestClusters(newClusters);
+            if (nearestPair != null) {
+              Collection<User> cluster1 = nearestPair.getFirst();
+              Collection<User> cluster2 = nearestPair.getSecond();
+              while (clusterSimilarity.getSimilarity(cluster1, cluster2) >= clusteringThreshold) {
+                newClusters.remove(cluster1);
+                newClusters.remove(cluster2);
+                Collection<User> merged = new HashSet<User>(cluster1.size() + cluster2.size());
+                merged.addAll(cluster1);
+                merged.addAll(cluster2);
+                newClusters.add(merged);
+                nearestPair = findNearestClusters(newClusters);
+                if (nearestPair == null) {
+                  break;
+                }
+                cluster1 = nearestPair.getFirst();
+                cluster2 = nearestPair.getSecond();
+              }
+            }
+          } else {
+            while (newClusters.size() > numClusters) {
+              Pair<Collection<User>, Collection<User>> nearestPair =
+                      findNearestClusters(newClusters);
+              if (nearestPair == null) {
+                break;
+              }
+              Collection<User> cluster1 = nearestPair.getFirst();
+              Collection<User> cluster2 = nearestPair.getSecond();
+              newClusters.remove(cluster1);
+              newClusters.remove(cluster2);
+              Collection<User> merged = new HashSet<User>(cluster1.size() + cluster2.size());
+              merged.addAll(cluster1);
+              merged.addAll(cluster2);
+              newClusters.add(merged);
+            }
+          }
+        }
+        topRecsByUserID = computeTopRecsPerUserID(newClusters);
+        clustersByUserID = computeClustersPerUserID(newClusters);
+        allClusters = newClusters;
+      } else {
+        topRecsByUserID = Collections.emptyMap();
+        clustersByUserID = Collections.emptyMap();
+        allClusters = Collections.emptySet();
+      }
+      clustersBuilt = true;
+    } finally {
+      buildClustersLock.unlock();
+    }
+  }
+
+  private Pair<Collection<User>, Collection<User>> findNearestClusters(List<Collection<User>> clusters)
+          throws TasteException {
+    int size = clusters.size();
+    Pair<Collection<User>, Collection<User>> nearestPair = null;
+    double bestSimilarity = Double.NEGATIVE_INFINITY;
+    Random r = RandomUtils.getRandom();
+    for (int i = 0; i < size; i++) {
+      Collection<User> cluster1 = clusters.get(i);
+      for (int j = i + 1; j < size; j++) {
+        if (samplingPercentage >= 1.0 || r.nextDouble() < samplingPercentage) {
+          Collection<User> cluster2 = clusters.get(j);
+          double similarity = clusterSimilarity.getSimilarity(cluster1, cluster2);
+          if (!Double.isNaN(similarity) && similarity > bestSimilarity) {
+            bestSimilarity = similarity;
+            nearestPair = new Pair<Collection<User>, Collection<User>>(cluster1, cluster2);
+          }
+        }
+      }
+    }
+    return nearestPair;
+  }
+
+  private static Map<Object, List<RecommendedItem>> computeTopRecsPerUserID(
+          Iterable<Collection<User>> clusters) throws TasteException {
+    Map<Object, List<RecommendedItem>> recsPerUser = new HashMap<Object, List<RecommendedItem>>();
+    for (Collection<User> cluster : clusters) {
+      List<RecommendedItem> recs = computeTopRecsForCluster(cluster);
+      for (User user : cluster) {
+        recsPerUser.put(user.getID(), recs);
+      }
+    }
+    return Collections.unmodifiableMap(recsPerUser);
+  }
+
+  private static List<RecommendedItem> computeTopRecsForCluster(Collection<User> cluster)
+          throws TasteException {
+
+    Collection<Item> allItems = new HashSet<Item>();
+    for (User user : cluster) {
+      Preference[] prefs = user.getPreferencesAsArray();
+      for (int i = 0; i < prefs.length; i++) {
+        allItems.add(prefs[i].getItem());
+      }
+    }
+
+    TopItems.Estimator<Item> estimator = new Estimator(cluster);
+
+    List<RecommendedItem> topItems =
+            TopItems.getTopItems(Integer.MAX_VALUE, allItems, NullRescorer.getItemInstance(), estimator);
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Recommendations are: " + topItems);
+    }
+    return Collections.unmodifiableList(topItems);
+  }
+
+  private static Map<Object, Collection<User>> computeClustersPerUserID(Collection<Collection<User>> clusters) {
+    Map<Object, Collection<User>> clustersPerUser = new HashMap<Object, Collection<User>>(clusters.size());
+    for (Collection<User> cluster : clusters) {
+      for (User user : cluster) {
+        clustersPerUser.put(user.getID(), cluster);
+      }
+    }
+    return clustersPerUser;
+  }
+
+  @Override
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      super.refresh();
+      clusterSimilarity.refresh();
+      try {
+        buildClusters();
+      } catch (TasteException te) {
+        log.log(Level.WARNING, "Unexpected excpetion while refreshing", te);
+      }
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "TreeClusteringRecommender[clusterSimilarity:" + clusterSimilarity + ']';
+  }
+
+  private static class Estimator implements TopItems.Estimator<Item> {
+
+    private final Collection<User> cluster;
+
+    private Estimator(Collection<User> cluster) {
+      this.cluster = cluster;
+    }
+
+    public double estimate(Item item) {
+      RunningAverage average = new FullRunningAverage();
+      for (User user : cluster) {
+        Preference pref = user.getPreferenceFor(item.getID());
+        if (pref != null) {
+          average.addDatum(pref.getValue());
+        }
+      }
+      return average.getAverage();
+    }
+  }
+}



Mime
View raw message