mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r733196 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/impl/model/ core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/ core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ core/src/main/java/o...
Date Sat, 10 Jan 2009 00:18:15 GMT
Author: srowen
Date: Fri Jan  9 16:18:14 2009
New Revision: 733196

URL: http://svn.apache.org/viewvc?rev=733196&view=rev
Log:
Added "boolean" implementations for Otis

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPrefUser.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserGenericDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/BooleanPrefUserFileDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BooleanUserGenericUserBasedRecommender.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
    lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPrefUser.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPrefUser.java?rev=733196&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPrefUser.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPrefUser.java
Fri Jan  9 16:18:14 2009
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
+
+import java.io.Serializable;
+
+/**
+ * A variant of {@link GenericUser} which is appropriate when users express only a "yes"
preference for
+ * an item, or none at all. The preference value for all items is considered to be 1.0.
+ */
+public class BooleanPrefUser<K extends Comparable<K>> implements User, Serializable
{
+
+  private final K id;
+  private final FastSet<Object> itemIDs;
+
+  public BooleanPrefUser(K id, FastSet<Object> itemIDs) {
+    if (id == null || itemIDs == null || itemIDs.isEmpty()) {
+      throw new IllegalArgumentException("id or itemIDs is null or empty");
+    }
+    this.id = id;
+    this.itemIDs = itemIDs;
+  }
+
+  @Override
+  public K getID() {
+    return id;
+  }
+
+  @Override
+  public Preference getPreferenceFor(Object itemID) {
+    return itemIDs.contains(itemID) ?
+        new GenericPreference(this, new GenericItem<String>(itemID.toString()), 1.0)
: null;
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public Iterable<Preference> getPreferences() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public Preference[] getPreferencesAsArray() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * @return true iff this user expresses a preference for the given item
+   */
+  public boolean hasPreferenceFor(Object itemID) {
+    return itemIDs.contains(itemID);
+  }
+
+  /**
+   * @return all item IDs the user expresses a preference for
+   */
+  public FastSet<Object> getItemIDs() {
+    return itemIDs;
+  }
+
+  @Override
+  public int hashCode() {
+    return id.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return obj instanceof User && ((User) obj).getID().equals(id);
+  }
+
+  @Override
+  public String toString() {
+    return "User[id:" + id + ']';
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public int compareTo(User o) {
+    return id.compareTo((K) o.getID());
+  }
+
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserGenericDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserGenericDataModel.java?rev=733196&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserGenericDataModel.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserGenericDataModel.java
Fri Jan  9 16:18:14 2009
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * A variant on {@link GenericDataModel} which uses the "boolean" classes like {@link BooleanPrefUser}.
+ */
+public final class BooleanUserGenericDataModel implements DataModel, Serializable {
+
+  private final List<User> users;
+  private final Map<Object, User> userMap;
+  private final FastSet<Object> itemSet;
+
+  @SuppressWarnings("unchecked")
+  public BooleanUserGenericDataModel(Iterable<? extends User> users) {
+    if (users == null) {
+      throw new IllegalArgumentException("users is null");
+    }
+
+    this.userMap = new FastMap<Object, User>();
+    this.itemSet = new FastSet<Object>();
+    // I'm abusing generics a little here since I want to use this (huge) map to hold Lists,
+    // then arrays, and don't want to allocate two Maps at once here.
+    for (User user : users) {
+      userMap.put(user.getID(), user);
+      for (Object itemID : ((BooleanPrefUser<?>) user).getItemIDs()) {
+        itemSet.add(itemID);
+      }
+    }
+
+    List<User> usersCopy = new ArrayList<User>(userMap.values());
+    Collections.sort(usersCopy);
+    this.users = Collections.unmodifiableList(usersCopy);
+  }
+
+  public BooleanUserGenericDataModel(DataModel dataModel) throws TasteException {
+    this(dataModel.getUsers());
+  }
+
+  @Override
+  public Iterable<? extends User> getUsers() {
+    return users;
+  }
+
+  @Override
+  public User getUser(Object id) {
+    User user = userMap.get(id);
+    if (user == null) {
+      throw new NoSuchElementException();
+    }
+    return user;
+  }
+
+  @Override
+  public Iterable<? extends Item> getItems() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Item getItem(Object id) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Iterable<? extends Preference> getPreferencesForItem(Object itemID) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Preference[] getPreferencesForItemAsArray(Object itemID) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int getNumItems() {
+    return itemSet.size();
+  }
+
+  @Override
+  public int getNumUsers() {
+    return users.size();
+  }
+
+  @Override
+  public int getNumUsersWithPreferenceFor(Object... itemIDs) {
+    throw new UnsupportedOperationException();
+
+  }
+
+  @Override
+  public void setPreference(Object userID, Object itemID, double value) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void removePreference(Object userID, Object itemID) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    // Does nothing
+  }
+
+  @Override
+  public String toString() {
+    return "BooleanUserGenericDataModel[users:" + users + ']';
+  }
+
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/BooleanPrefUserFileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/BooleanPrefUserFileDataModel.java?rev=733196&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/BooleanPrefUserFileDataModel.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/BooleanPrefUserFileDataModel.java
Fri Jan  9 16:18:14 2009
@@ -0,0 +1,255 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FileLineIterable;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
+import org.apache.mahout.cf.taste.impl.model.BooleanUserGenericDataModel;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.locks.ReentrantLock;
+
+/**
+ * A variant on {@link FileDataModel} which uses the "boolean" classes like {@link BooleanPrefUser}.
+ */
+public class BooleanPrefUserFileDataModel implements DataModel {
+
+  private static final Logger log = LoggerFactory.getLogger(BooleanPrefUserFileDataModel.class);
+
+  private static final Timer timer = new Timer(true);
+  private static final long RELOAD_CHECK_INTERVAL_MS = 60L * 1000L;
+
+  private final File dataFile;
+  private long lastModified;
+  private boolean loaded;
+  private DataModel delegate;
+  private final ReentrantLock reloadLock;
+
+  /**
+   * @param dataFile file containing preferences data
+   * @throws java.io.FileNotFoundException if dataFile does not exist
+   */
+  public BooleanPrefUserFileDataModel(File dataFile) throws FileNotFoundException {
+    if (dataFile == null) {
+      throw new IllegalArgumentException("dataFile is null");
+    }
+    if (!dataFile.exists() || dataFile.isDirectory()) {
+      throw new FileNotFoundException(dataFile.toString());
+    }
+
+    log.info("Creating FileDataModel for file " + dataFile);
+
+    this.dataFile = dataFile;
+    this.lastModified = dataFile.lastModified();
+    this.reloadLock = new ReentrantLock();
+
+    // Schedule next refresh
+    timer.schedule(new RefreshTimerTask(), RELOAD_CHECK_INTERVAL_MS, RELOAD_CHECK_INTERVAL_MS);
+  }
+
+  protected void reload() {
+    reloadLock.lock();
+    try {
+      Map<String, FastSet<Object>> data = new FastMap<String, FastSet<Object>>();
+
+      processFile(data);
+
+      List<User> users = new ArrayList<User>(data.size());
+      for (Map.Entry<String, FastSet<Object>> entries : data.entrySet()) {
+        users.add(buildUser(entries.getKey(), entries.getValue()));
+      }
+
+      delegate = new BooleanUserGenericDataModel(users);
+      loaded = true;
+
+    } finally {
+      reloadLock.unlock();
+    }
+  }
+
+  private void processFile(Map<String, FastSet<Object>> data) {
+    log.info("Reading file info...");
+    for (String line : new FileLineIterable(dataFile, false)) {
+      if (line.length() > 0) {
+        log.debug("Read line: {}", line);
+        processLine(line, data);
+      }
+    }
+  }
+
+  /**
+   * <p>Reads one line from the input file and adds the data to a {@link java.util.Map}
data structure
+   * which maps user IDs to preferences. This assumes that each line of the input file
+   * corresponds to one preference. After reading a line and determining which user and item
+   * the preference pertains to, the method should look to see if the data contains a mapping
+   * for the user ID already, and if not, add an empty {@link java.util.List} of {@link org.apache.mahout.cf.taste.model.Preference}s
to
+   * the data.</p>
+   *
+   * <p>The method should use {@link #buildItem(String)} to create an {@link org.apache.mahout.cf.taste.model.Item}
representing
+   * the item in question if needed, and use {@link #buildPreference(org.apache.mahout.cf.taste.model.User,
org.apache.mahout.cf.taste.model.Item, double)} to
+   * build {@link org.apache.mahout.cf.taste.model.Preference} objects as needed.</p>
+   *
+   * @param line line from input data file
+   * @param data all data read so far, as a mapping from user IDs to preferences
+   * @see #buildPreference(org.apache.mahout.cf.taste.model.User, org.apache.mahout.cf.taste.model.Item,
double)
+   * @see #buildItem(String)
+   */
+  protected void processLine(String line, Map<String, FastSet<Object>> data)
{
+    int commaOne = line.indexOf((int) ',');
+    int commaTwo = line.indexOf((int) ',', commaOne + 1);
+    if (commaOne < 0 || commaTwo < 0) {
+      throw new IllegalArgumentException("Bad line: " + line);
+    }
+    String userID = line.substring(0, commaOne);
+    String itemID = line.substring(commaOne + 1, commaTwo);
+    FastSet<Object> prefs = data.get(userID);
+    if (prefs == null) {
+      prefs = new FastSet<Object>();
+      data.put(userID, prefs);
+    }
+    prefs.add(itemID);
+    log.debug("Read item '{}' for user ID '{}'", itemID, userID);
+  }
+
+  private void checkLoaded() {
+    if (!loaded) {
+      reload();
+    }
+  }
+
+  @Override
+  public Iterable<? extends User> getUsers() throws TasteException {
+    checkLoaded();
+    return delegate.getUsers();
+  }
+
+  @Override
+  public User getUser(Object id) throws TasteException {
+    checkLoaded();
+    return delegate.getUser(id);
+  }
+
+  @Override
+  public Iterable<? extends Item> getItems() throws TasteException {
+    checkLoaded();
+    return delegate.getItems();
+  }
+
+  @Override
+  public Item getItem(Object id) throws TasteException {
+    checkLoaded();
+    return delegate.getItem(id);
+  }
+
+  @Override
+  public Iterable<? extends Preference> getPreferencesForItem(Object itemID) throws
TasteException {
+    checkLoaded();
+    return delegate.getPreferencesForItem(itemID);
+  }
+
+  @Override
+  public Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException {
+    checkLoaded();
+    return delegate.getPreferencesForItemAsArray(itemID);
+  }
+
+  @Override
+  public int getNumItems() throws TasteException {
+    checkLoaded();
+    return delegate.getNumItems();
+  }
+
+  @Override
+  public int getNumUsers() throws TasteException {
+    checkLoaded();
+    return delegate.getNumUsers();
+  }
+
+  @Override
+  public int getNumUsersWithPreferenceFor(Object... itemIDs) throws TasteException {
+    checkLoaded();
+    return delegate.getNumUsersWithPreferenceFor(itemIDs);
+  }
+
+  @Override
+  public void setPreference(Object userID, Object itemID, double value) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void removePreference(Object userID, Object itemID) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    reload();
+  }
+
+  protected User buildUser(String id, FastSet<Object> prefs) {
+    return new BooleanPrefUser<String>(id, prefs);
+  }
+
+  protected Item buildItem(String id) {
+    return new GenericItem<String>(id);
+  }
+
+  protected Preference buildPreference(User user, Item item, double value) {
+    return new GenericPreference(user, item, value);
+  }
+
+  @Override
+  public String toString() {
+    return "BooleanPrefUserFileDataModel[dataFile:" + dataFile + ']';
+  }
+
+  private final class RefreshTimerTask extends TimerTask {
+
+    @Override
+    public void run() {
+      if (loaded) {
+        long newModified = dataFile.lastModified();
+        if (newModified > lastModified) {
+          log.debug("File has changed; reloading...");
+          lastModified = newModified;
+          reload();
+        }
+      }
+    }
+  }
+
+}
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=733196&r1=733195&r2=733196&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
(original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
Fri Jan  9 16:18:14 2009
@@ -93,6 +93,10 @@
     timer.schedule(new RefreshTimerTask(), RELOAD_CHECK_INTERVAL_MS, RELOAD_CHECK_INTERVAL_MS);
   }
 
+  public File getDataFile() {
+    return dataFile;
+  }
+
   protected void reload() {
     reloadLock.lock();    
     try {
@@ -113,7 +117,7 @@
     }
   }
 
-  private void processFile(Map<String, List<Preference>> data) {
+  protected void processFile(Map<String, List<Preference>> data) {
     log.info("Reading file info...");
     Map<String, Item> itemCache = new FastMap<String, Item>(1001);
     for (String line : new FileLineIterable(dataFile, false)) {

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BooleanUserGenericUserBasedRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BooleanUserGenericUserBasedRecommender.java?rev=733196&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BooleanUserGenericUserBasedRecommender.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/BooleanUserGenericUserBasedRecommender.java
Fri Jan  9 16:18:14 2009
@@ -0,0 +1,256 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.Queue;
+import java.util.PriorityQueue;
+import java.util.ArrayList;
+
+/**
+ * A variant on {@link GenericUserBasedRecommender} which is appropriate
+ * for use with "boolean" classes like {@link org.apache.mahout.cf.taste.impl.model.BooleanPrefUser}.
+ */
+public final class BooleanUserGenericUserBasedRecommender extends AbstractRecommender implements
UserBasedRecommender {
+
+  private static final Logger log = LoggerFactory.getLogger(BooleanUserGenericUserBasedRecommender.class);
+
+  private final UserNeighborhood neighborhood;
+  private final UserSimilarity similarity;
+  private final RefreshHelper refreshHelper;
+
+  public BooleanUserGenericUserBasedRecommender(DataModel dataModel,
+                                     UserNeighborhood neighborhood,
+                                     UserSimilarity similarity) {
+    super(dataModel);
+    if (neighborhood == null) {
+      throw new IllegalArgumentException("neighborhood is null");
+    }
+    this.neighborhood = neighborhood;
+    this.similarity = similarity;
+    this.refreshHelper = new RefreshHelper(null);
+    refreshHelper.addDependency(dataModel);
+    refreshHelper.addDependency(similarity);
+    refreshHelper.addDependency(neighborhood);
+  }
+
+  @Override
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item>
rescorer)
+          throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("userID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+
+    log.debug("Recommending items for user ID '{}'", userID);
+
+    User theUser = getDataModel().getUser(userID);
+    Collection<User> theNeighborhood = neighborhood.getUserNeighborhood(userID);
+    log.trace("UserNeighborhood is: {}", neighborhood);
+
+    if (theNeighborhood.isEmpty()) {
+      return Collections.emptyList();
+    }
+
+    Set<Object> allItems = getAllOtherItems(theNeighborhood, theUser);
+    log.trace("Items in neighborhood which user doesn't prefer already are: {}", allItems);
+
+    TopItems.Estimator<Object> estimator = new Estimator(theUser, theNeighborhood);
+
+    List<RecommendedItem> topItems = getTopItems(howMany, allItems, rescorer, estimator);
+
+    log.debug("Recommendations are: {}", topItems);
+    return topItems;
+  }
+
+  public static List<RecommendedItem> getTopItems(int howMany,
+                                                  Iterable<Object> allItems,
+                                                  Rescorer<Item> rescorer,
+                                                  TopItems.Estimator<Object> estimator)
throws TasteException {
+    if (allItems == null || estimator == null) {
+      throw new IllegalArgumentException("argument is null");
+    }
+    Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(howMany
+ 1, Collections.reverseOrder());
+    boolean full = false;
+    double lowestTopValue = Double.NEGATIVE_INFINITY;
+    for (Object itemID : allItems) {
+        double preference = estimator.estimate(itemID);
+        double rescoredPref = rescorer == null ? preference : rescorer.rescore(new GenericItem<String>(itemID.toString()),
preference);
+        if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > lowestTopValue))
{
+          topItems.add(new GenericRecommendedItem(new GenericItem<String>(itemID.toString()),
rescoredPref));
+          if (full) {
+            topItems.poll();
+          } else if (topItems.size() > howMany) {
+            full = true;
+            topItems.poll();
+          }
+          lowestTopValue = topItems.peek().getValue();
+        }
+    }
+    List<RecommendedItem> result = new ArrayList<RecommendedItem>(topItems.size());
+    result.addAll(topItems);
+    Collections.sort(result);
+    return result;
+  }
+
+  @Override
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    DataModel model = getDataModel();
+    User theUser = model.getUser(userID);
+    Preference actualPref = theUser.getPreferenceFor(itemID);
+    if (actualPref != null) {
+      return actualPref.getValue();
+    }
+    Collection<User> theNeighborhood = neighborhood.getUserNeighborhood(userID);
+    return doEstimatePreference(theUser, theNeighborhood, itemID);
+  }
+
+  @Override
+  public List<User> mostSimilarUsers(Object userID, int howMany) throws TasteException
{
+    return mostSimilarUsers(userID, howMany, null);
+  }
+
+  @Override
+  public List<User> mostSimilarUsers(Object userID,
+                                     int howMany,
+                                     Rescorer<Pair<User, User>> rescorer) throws
TasteException {
+    User toUser = getDataModel().getUser(userID);
+    TopItems.Estimator<User> estimator = new MostSimilarEstimator(toUser, similarity,
rescorer);
+    return doMostSimilarUsers(userID, howMany, estimator);
+  }
+
+  private List<User> doMostSimilarUsers(Object userID,
+                                        int howMany,
+                                        TopItems.Estimator<User> estimator) throws
TasteException {
+    DataModel model = getDataModel();
+    User toUser = model.getUser(userID);
+    Collection<User> allUsers = new FastSet<User>(model.getNumUsers());
+    for (User user : model.getUsers()) {
+      allUsers.add(user);
+    }
+    allUsers.remove(toUser);
+    return TopItems.getTopUsers(howMany, allUsers, null, estimator);
+  }
+
+  private double doEstimatePreference(User theUser, Collection<User> theNeighborhood,
Object itemID)
+          throws TasteException {
+    if (theNeighborhood.isEmpty()) {
+      return Double.NaN;
+    }
+    double preference = 0.0;
+    double totalSimilarity = 0.0;
+    for (User user : theNeighborhood) {
+      if (!user.equals(theUser)) {
+        // See GenericItemBasedRecommender.doEstimatePreference() too
+        Preference pref = user.getPreferenceFor(itemID);
+        if (pref != null) {
+          double theSimilarity = similarity.userSimilarity(theUser, user) + 1.0;
+          if (!Double.isNaN(theSimilarity)) {
+            preference += theSimilarity * pref.getValue();
+            totalSimilarity += theSimilarity;
+          }
+        }
+      }
+    }
+    return totalSimilarity == 0.0 ? Double.NaN : preference / totalSimilarity;
+  }
+
+  private static Set<Object> getAllOtherItems(Iterable<User> theNeighborhood,
User theUser) {
+    Set<Object> allItems = new FastSet<Object>();
+    for (User user : theNeighborhood) {
+      allItems.addAll(((BooleanPrefUser<?>) user).getItemIDs());
+    }
+    allItems.removeAll(((BooleanPrefUser<?>) theUser).getItemIDs());
+    return allItems;
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    refreshHelper.refresh(alreadyRefreshed);
+  }
+
+  @Override
+  public String toString() {
+    return "GenericUserBasedRecommender[neighborhood:" + neighborhood + ']';
+  }
+
+  private static class MostSimilarEstimator implements TopItems.Estimator<User> {
+
+    private final User toUser;
+    private final UserSimilarity similarity;
+    private final Rescorer<Pair<User, User>> rescorer;
+
+    private MostSimilarEstimator(User toUser,
+                                 UserSimilarity similarity,
+                                 Rescorer<Pair<User, User>> rescorer) {
+      this.toUser = toUser;
+      this.similarity = similarity;
+      this.rescorer = rescorer;
+    }
+
+    @Override
+    public double estimate(User user) throws TasteException {
+      Pair<User, User> pair = new Pair<User, User>(toUser, user);
+      if (rescorer != null && rescorer.isFiltered(pair)) {
+        return Double.NaN;
+      }
+      double originalEstimate = similarity.userSimilarity(toUser, user);
+      return rescorer == null ? originalEstimate : rescorer.rescore(pair, originalEstimate);
+    }
+  }
+
+  private final class Estimator implements TopItems.Estimator<Object> {
+
+    private final User theUser;
+    private final Collection<User> theNeighborhood;
+
+    Estimator(User theUser, Collection<User> theNeighborhood) {
+      this.theUser = theUser;
+      this.theNeighborhood = theNeighborhood;
+    }
+
+    @Override
+    public double estimate(Object itemID) throws TasteException {
+      return doEstimatePreference(theUser, theNeighborhood, itemID);
+    }
+  }
+}
\ No newline at end of file

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java?rev=733196&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java
(added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/BooleanTanimotoCoefficientSimilarity.java
Fri Jan  9 16:18:14 2009
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
+import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collection;
+
+/**
+ * Variant of {@link TanimotoCoefficientSimilarity} which is appropriate
+ * for use with the "boolean" classes like {@link BooleanPrefUser}
+ */
+public final class BooleanTanimotoCoefficientSimilarity implements UserSimilarity {
+
+  private static final Logger log = LoggerFactory.getLogger(BooleanTanimotoCoefficientSimilarity.class);
+
+  private final DataModel dataModel;
+
+  public BooleanTanimotoCoefficientSimilarity(DataModel dataModel) {
+    this.dataModel = dataModel;
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public double userSimilarity(User user1, User user2) {
+
+    if (user1 == null || user2 == null) {
+      throw new IllegalArgumentException("user1 or user2 is null");
+    }
+    if (!(user1 instanceof BooleanPrefUser && user2 instanceof BooleanPrefUser))
{
+      throw new IllegalArgumentException();
+    }
+    BooleanPrefUser<?> bpUser1 = (BooleanPrefUser<?>) user1;
+    BooleanPrefUser<?> bpUser2 = (BooleanPrefUser<?>) user2;
+
+    FastSet<Object> prefs1 = bpUser1.getItemIDs();
+    FastSet<Object> prefs2 = bpUser2.getItemIDs();
+    int intersectionSize =
+        prefs1.size() < prefs2.size() ? prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2);
+
+    int unionSize = prefs1.size() + prefs2.size() - intersectionSize;
+
+    double result = (double) intersectionSize / (double) unionSize;
+
+    if (log.isTraceEnabled()) {
+      log.trace("User similarity between " + user1 + " and " + user2 + " is " + result);
+    }
+    return result;
+  }
+
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+    RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
+  }
+
+  @Override
+  public String toString() {
+    return "BooleanTanimotoCoefficientSimilarity[dataModel:" + dataModel + ']';
+  }
+
+}
\ No newline at end of file

Modified: lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java?rev=733196&r1=733195&r2=733196&view=diff
==============================================================================
--- lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java
(original)
+++ lucene/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/jester/JesterRecommenderEvaluatorRunner.java
Fri Jan  9 16:18:14 2009
@@ -44,7 +44,7 @@
     double evaluation = evaluator.evaluate(new JesterRecommenderBuilder(),
                                                  model,
                                                  0.9,
-                                                 0.1);
+                                                 1.0);
     log.info(String.valueOf(evaluation));
   }
 



Mime
View raw message