mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r800634 [2/7] - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/cf/taste/hadoop/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/ core/src/main/java/org/apache/mahout/cf/taste/impl/common/jdbc/ core/src/main/java/org...
Date Tue, 04 Aug 2009 00:06:50 GMT
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java Tue Aug  4 00:06:46 2009
@@ -21,19 +21,16 @@
 import org.apache.mahout.cf.taste.common.NoSuchUserException;
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.ArrayIterator;
-import org.apache.mahout.cf.taste.impl.common.EmptyIterable;
 import org.apache.mahout.cf.taste.impl.common.FastMap;
 import org.apache.mahout.cf.taste.impl.common.FastSet;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.Serializable;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -41,7 +38,7 @@
 import java.util.Set;
 
 /**
- * <p>A simple {@link DataModel} which uses a given {@link List} of {@link User}s as its data source. This
+ * <p>A simple {@link DataModel} which uses a given {@link List} of users as its data source. This
  * implementation is mostly useful for small experiments and is not recommended for contexts where performance is
  * important.</p>
  */
@@ -49,41 +46,37 @@
 
   private static final Logger log = LoggerFactory.getLogger(GenericDataModel.class);
 
-  private static final Preference[] NO_PREFS_ARRAY = new Preference[0];
-  private static final Iterable<Preference> NO_PREFS_ITERABLE = new EmptyIterable<Preference>();
-
-  private final List<User> users;
-  private final FastMap<Comparable<?>, User> userMap;
+  private final List<Comparable<?>> userIDs;
+  private final Map<Comparable<?>, PreferenceArray> preferenceFromUsers;
   private final List<Comparable<?>> itemIDs;
-  private final FastMap<Comparable<?>, Preference[]> preferenceForItems;
+  private final Map<Comparable<?>, PreferenceArray> preferenceForItems;
 
   /**
-   * <p>Creates a new {@link GenericDataModel} from the given {@link User}s (and their preferences). This {@link
+   * <p>Creates a new {@link GenericDataModel} from the given users (and their preferences). This {@link
    * DataModel} retains all this information in memory and is effectively immutable.</p>
    *
-   * @param users {@link User}s to include in this {@link GenericDataModel}
+   * @param userData users to include in this {@link GenericDataModel}
+   *  (see also {@link #toPrefArrayValues(Map, boolean)})
    */
   @SuppressWarnings("unchecked")
-  public GenericDataModel(Iterable<? extends User> users) {
-    if (users == null) {
-      throw new IllegalArgumentException("users is null");
+  public GenericDataModel(Map<Comparable<?>, PreferenceArray> userData) {
+    if (userData == null) {
+      throw new IllegalArgumentException("userData is null");
     }
 
-    this.userMap = new FastMap<Comparable<?>, User>();
-    // I'm abusing generics a little here since I want to use this (huge) map to hold Lists,
-    // then arrays, and don't want to allocate two Maps at once here.
-    FastMap<Comparable<?>, Object> prefsForItems = new FastMap<Comparable<?>, Object>();
+    this.preferenceFromUsers = userData;
+    FastMap<Comparable<?>, Collection<Preference>> prefsForItems = new FastMap<Comparable<?>, Collection<Preference>>();
     FastSet<Comparable<?>> itemIDSet = new FastSet<Comparable<?>>();
     int currentCount = 0;
-    for (User user : users) {
-      userMap.put(user.getID(), user);
-      Preference[] prefsArray = user.getPreferencesAsArray();
-      for (Preference preference : prefsArray) {
+    for (PreferenceArray prefs : preferenceFromUsers.values()) {
+      prefs.sortByItem();
+      int size = prefs.length();
+      for (Preference preference : prefs) {
         Comparable<?> itemID = preference.getItemID();
         itemIDSet.add(itemID);
         List<Preference> prefsForItem = (List<Preference>) prefsForItems.get(itemID);
         if (prefsForItem == null) {
-          prefsForItem = new ArrayList<Preference>();
+          prefsForItem = new ArrayList<Preference>(2);
           prefsForItems.put(itemID, prefsForItem);
         }
         prefsForItem.add(preference);
@@ -93,25 +86,19 @@
         log.info("Processed {} users", currentCount);
       }
     }
-    userMap.rehash();
-
-    this.users = new ArrayList<User>(userMap.values());
-    Collections.sort(this.users);
 
     this.itemIDs = new ArrayList<Comparable<?>>(itemIDSet);
+    itemIDSet = null;
     Collections.sort((List<? extends Comparable>) this.itemIDs);
 
-    prefsForItems.rehash();    
-    // Swap out lists for arrays here -- using the same Map. This is why the generics mess is worth it.
-    for (Map.Entry<Comparable<?>, Object> entry : prefsForItems.entrySet()) {
-      List<Preference> list = (List<Preference>) entry.getValue();
-      Preference[] prefsAsArray = list.toArray(new Preference[list.size()]);
-      Arrays.sort(prefsAsArray, ByUserPreferenceComparator.getInstance());
-      entry.setValue(prefsAsArray);
-    }
+    this.preferenceForItems = toPrefArrayValues(prefsForItems, false);
 
-    // Yeah more generics ugliness
-    this.preferenceForItems = (FastMap<Comparable<?>, Preference[]>) (FastMap<Comparable<?>, ?>) prefsForItems;
+    for (PreferenceArray prefs : preferenceForItems.values()) {
+      prefs.sortByUser();
+    }
+    
+    this.userIDs = new ArrayList(userData.keySet());
+    Collections.sort((List<? extends Comparable>) userIDs);
   }
 
   /**
@@ -122,22 +109,56 @@
    * @throws TasteException if an error occurs while retrieving the other {@link DataModel}'s users
    */
   public GenericDataModel(DataModel dataModel) throws TasteException {
-    this(dataModel.getUsers());
+    this(toDataMap(dataModel));
+  }
+
+  /**
+   * Swaps, in-place, {@link List}s for arrays in {@link Map} values
+   * .
+   * @return input value
+   */
+  public static Map<Comparable<?>, PreferenceArray> toPrefArrayValues(Map<Comparable<?>, Collection<Preference>> data,
+                                                                      boolean byUser) {
+    for (Map.Entry<Comparable<?>, Object> entry :
+         ((Map<Comparable<?>, Object>) (Map<Comparable<?>, ?>) data).entrySet()) {
+      List<Preference> prefList = (List<Preference>) entry.getValue();
+      entry.setValue(byUser ? new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray(prefList));
+    }
+    return (Map<Comparable<?>, PreferenceArray>) (Map<Comparable<?>, ?>) data;
+  }
+
+  private static Map<Comparable<?>, PreferenceArray> toDataMap(DataModel dataModel) throws TasteException {
+    Map<Comparable<?>, PreferenceArray> data = new FastMap<Comparable<?>, PreferenceArray>(dataModel.getNumUsers());
+    for (Comparable<?> userID : dataModel.getUserIDs()) {
+      data.put(userID, dataModel.getPreferencesFromUser(userID));
+    }
+    return data;
   }
 
   @Override
-  public Iterable<? extends User> getUsers() {
-    return users;
+  public Iterable<Comparable<?>> getUserIDs() {
+    return userIDs;
   }
 
-  /** @throws NoSuchUserException if there is no such {@link User} */
+  /** @throws NoSuchUserException if there is no such user */
   @Override
-  public User getUser(Comparable<?> id) throws NoSuchUserException {
-    User user = userMap.get(id);
-    if (user == null) {
+  public PreferenceArray getPreferencesFromUser(Comparable<?> userID) throws NoSuchUserException {
+    PreferenceArray prefs = preferenceFromUsers.get(userID);
+    if (prefs == null) {
       throw new NoSuchUserException();
     }
-    return user;
+    return prefs;
+  }
+
+  @Override
+  public FastSet<Comparable<?>> getItemIDsFromUser(Comparable<?> userID) throws TasteException {
+    PreferenceArray prefs = getPreferencesFromUser(userID);
+    int size = prefs.length();
+    FastSet<Comparable<?>> result = new FastSet<Comparable<?>>(size);
+    for (int i = 0; i < size; i++) {
+      result.add(prefs.getItemID(i));
+    }
+    return result;
   }
 
   @Override
@@ -146,15 +167,24 @@
   }
 
   @Override
-  public Iterable<? extends Preference> getPreferencesForItem(Comparable<?> itemID) {
-    Preference[] prefs = preferenceForItems.get(itemID);
-    return prefs == null ? NO_PREFS_ITERABLE : new ArrayIterator<Preference>(prefs);
+  public PreferenceArray getPreferencesForItem(Comparable<?> itemID) throws NoSuchItemException {
+    PreferenceArray prefs = preferenceForItems.get(itemID);
+    if (prefs == null) {
+      throw new NoSuchItemException();
+    }
+    return prefs;
   }
 
   @Override
-  public Preference[] getPreferencesForItemAsArray(Comparable<?> itemID) {
-    Preference[] prefs = preferenceForItems.get(itemID);
-    return prefs == null ? NO_PREFS_ARRAY : prefs;
+  public Float getPreferenceValue(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+    PreferenceArray prefs = getPreferencesFromUser(userID);
+    int size = prefs.length();
+    for (int i = 0; i < size; i++) {
+      if (prefs.getItemID(i).equals(itemID)) {
+        return prefs.getValue(i);
+      }
+    }
+    return null;
   }
 
   @Override
@@ -164,7 +194,7 @@
 
   @Override
   public int getNumUsers() {
-    return users.size();
+    return userIDs.size();
   }
 
   @Override
@@ -177,21 +207,23 @@
       throw new IllegalArgumentException("Illegal number of item IDs: " + length);
     }
     if (length == 1) {
-      Preference[] prefs = preferenceForItems.get(itemIDs[0]);
-      return prefs == null ? 0 : prefs.length;
+      PreferenceArray prefs = preferenceForItems.get(itemIDs[0]);
+      return prefs == null ? 0 : prefs.length();
     } else {
-      Preference[] prefs1 = preferenceForItems.get(itemIDs[0]);
-      Preference[] prefs2 = preferenceForItems.get(itemIDs[1]);
+      PreferenceArray prefs1 = preferenceForItems.get(itemIDs[0]);
+      PreferenceArray prefs2 = preferenceForItems.get(itemIDs[1]);
       if (prefs1 == null || prefs2 == null) {
         return 0;
       }
-      Set<Comparable<?>> users1 = new FastSet<Comparable<?>>(prefs1.length);
-      for (Preference aPrefs1 : prefs1) {
-        users1.add(aPrefs1.getUser().getID());
-      }
-      Set<Comparable<?>> users2 = new FastSet<Comparable<?>>(prefs2.length);
-      for (Preference aPrefs2 : prefs2) {
-        users2.add(aPrefs2.getUser().getID());
+      Set<Comparable<?>> users1 = new FastSet<Comparable<?>>(prefs1.length());
+      int size1 = prefs1.length();
+      for (int i = 0; i < size1; i++) {
+        users1.add(prefs1.getUserID(i));
+      }
+      Set<Comparable<?>> users2 = new FastSet<Comparable<?>>(prefs2.length());
+      int size2 = prefs2.length();
+      for (int i = 0; i < size2; i++) {
+        users2.add(prefs2.getUserID(i));
       }
       users1.retainAll(users2);
       return users1.size();
@@ -199,14 +231,13 @@
   }
 
   @Override
-  public void setPreference(Comparable<?> userID, Comparable<?> itemID, double value)
-      throws NoSuchUserException, NoSuchItemException {
-    getUser(userID).setPreference(itemID, value);
+  public void removePreference(Comparable<?> userID, Comparable<?> itemID) throws NoSuchUserException {
+    throw new UnsupportedOperationException();
   }
 
   @Override
-  public void removePreference(Comparable<?> userID, Comparable<?> itemID) throws NoSuchUserException {
-    getUser(userID).removePreference(itemID);
+  public void setPreference(Comparable<?> userID, Comparable<?> itemID, float value) throws NoSuchUserException {
+    throw new UnsupportedOperationException();
   }
 
   @Override
@@ -216,7 +247,7 @@
 
   @Override
   public String toString() {
-    return "GenericDataModel[users:" + (users.size() > 3 ? users.subList(0, 3) + "..." : users) + ']';
+    return "GenericDataModel[users:" + (userIDs.size() > 3 ? userIDs.subList(0, 3) + "..." : userIDs) + ']';
   }
 
 }

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java (from r800080, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreferenceArray.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreferenceArray.java&r1=800080&r2=800634&rev=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreferenceArray.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java Tue Aug  4 00:06:46 2009
@@ -19,70 +19,207 @@
 
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.model.User;
 
 import java.io.Serializable;
+import java.util.Iterator;
+import java.util.List;
 
 /**
- * This implementation maintains three parallel arrays, of {@link User}s, items, and values. The idea is to save
- * allocating {@link Preference} objects themselves. On a 64-bit virtual machine, this should save 12 bytes per element
- * (the overhead of an enclosing {@link Preference} object reference and object header).
+ * <p>Like {@link GenericUserPreferenceArray} but stores preferences for one item (all item IDs the same)
+ * rather than one user.</p>
  *
- * This is not used yet.
+ * @see BooleanItemPreferenceArray
+ * @see GenericUserPreferenceArray
+ * @see GenericPreference
  */
-public final class GenericPreferenceArray implements PreferenceArray, Serializable {
+public final class GenericItemPreferenceArray implements PreferenceArray, Serializable {
 
-  private final User[] users;
-  private final Comparable<?>[] itemIDs;
-  private final double[] values;
-
-  public GenericPreferenceArray(int size) {
-    users = new User[size];
-    itemIDs = new Comparable<?>[size];
-    values = new double[size];
+  private static final int USER = 0;
+  private static final int VALUE = 2;
+  private static final int VALUE_REVERSED = 3;
+
+  private final Comparable<?>[] IDs;
+  private Comparable<?> id;
+  private final float[] values;
+
+  public GenericItemPreferenceArray(int size) {
+    if (size < 1) {
+      throw new IllegalArgumentException("size is less than 1");
+    }
+    this.IDs = new Comparable<?>[size];
+    values = new float[size];
+  }
+
+  public int length() {
+    return IDs.length;
+  }
+
+  public GenericItemPreferenceArray(List<Preference> prefs) {
+    this(prefs.size());
+    for (int i = 0; i < prefs.size(); i++) {
+      Preference pref = prefs.get(i);
+      IDs[i] = pref.getUserID();
+      values[i] = pref.getValue();
+    }
+    id = prefs.get(0).getItemID();
   }
 
   @Override
   public Preference get(int i) {
-    return new GenericPreference(users[i], itemIDs[i], values[i]);
+    return new PreferenceView(i);
   }
 
   @Override
   public void set(int i, Preference pref) {
-    users[i] = pref.getUser();
-    itemIDs[i] = pref.getItemID();
+    id = pref.getItemID();
+    IDs[i] = pref.getUserID();
     values[i] = pref.getValue();
   }
 
   @Override
-  public User getUser(int i) {
-    return users[i];
+  public Comparable<?> getUserID(int i) {
+    return IDs[i];
   }
 
   @Override
-  public void setUser(int i, User user) {
-    users[i] = user;
+  public void setUserID(int i, Comparable<?> userID) {
+    IDs[i] = userID;
   }
 
   @Override
   public Comparable<?> getItemID(int i) {
-    return itemIDs[i];
+    return id;
   }
 
   @Override
   public void setItemID(int i, Comparable<?> itemID) {
-    itemIDs[i] = itemID;
+    id = itemID;
   }
 
-
   @Override
-  public double getValue(int i) {
+  public float getValue(int i) {
     return values[i];
   }
 
   @Override
-  public void setValue(int i, double value) {
+  public void setValue(int i, float value) {
     values[i] = value;
   }
 
+  @Override
+  public void sortByUser() {
+    selectionSort(USER);
+  }
+
+  @Override
+  public void sortByItem() {
+  }
+
+  @Override  
+  public void sortByValue() {
+    selectionSort(VALUE);
+  }
+
+  @Override
+  public void sortByValueReversed() {
+    selectionSort(VALUE_REVERSED);
+  }
+
+  private void selectionSort(int type) {
+    // I think this sort will prove to be too dumb, but, it's in place and OK for tiny, mostly sorted data
+    int max = length();
+    for (int i = 0; i < max; i++) {
+      int min = i;
+      for (int j = i + 1; j < max; j++) {
+        if (isLess(j, min, type)) {
+          min = j;
+        }
+      }
+      if (i != min) {
+        swap(i, min);
+      }
+    }
+  }
+
+  private boolean isLess(int i, int j, int type) {
+    switch (type) {
+      case USER:
+        return ((Comparable<Object>) IDs[i]).compareTo(IDs[j]) < 0;
+      case VALUE:
+        return values[i] < values[j];
+      case VALUE_REVERSED:
+        return values[i] >= values[j];
+      default:
+        throw new IllegalStateException();
+    }
+  }
+
+  private void swap(int i, int j) {
+    Comparable<?> temp1 = IDs[i];
+    float temp2 = values[i];
+    IDs[i] = IDs[j];
+    values[i] = values[j];
+    IDs[j] = temp1;
+    values[j] = temp2;
+  }
+
+  public GenericItemPreferenceArray clone() {
+    try {
+      return (GenericItemPreferenceArray) super.clone();
+    } catch (CloneNotSupportedException cnse) {
+      throw new AssertionError();
+    }
+  }
+
+  @Override
+  public Iterator<Preference> iterator() {
+    return new PreferenceArrayIterator();
+  }
+
+  private final class PreferenceArrayIterator implements Iterator<Preference> {
+    private int i = 0;
+    @Override
+    public boolean hasNext() {
+      return i < length();
+    }
+    @Override
+    public Preference next() {
+      return new PreferenceView(i++);
+    }
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  private final class PreferenceView implements Preference {
+
+    private final int i;
+
+    private PreferenceView(int i) {
+      this.i = i;
+    }
+
+    @Override
+    public Comparable<?> getUserID() {
+      return GenericItemPreferenceArray.this.getUserID(i);
+    }
+
+    @Override
+    public Comparable<?> getItemID() {
+      return GenericItemPreferenceArray.this.getItemID(i);
+    }
+
+    @Override
+    public float getValue() {
+      return values[i];
+    }
+
+    @Override
+    public void setValue(float value) {
+      values[i] = value;
+    }
+
+  }
+
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java Tue Aug  4 00:06:46 2009
@@ -18,43 +18,31 @@
 package org.apache.mahout.cf.taste.impl.model;
 
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
 
 import java.io.Serializable;
 
 /** <p>A simple {@link Preference} encapsulating an item and preference value.</p> */
-public class GenericPreference implements SettableUserPreference, Serializable {
+public class GenericPreference implements Preference, Serializable {
 
-  private User user;
+  private final Comparable<?> userID;
   private final Comparable<?> itemID;
-  private double value;
+  private float value;
 
-  public GenericPreference(User user, Comparable<?> itemID, double value) {
-    if (itemID == null) {
-      throw new IllegalArgumentException("itemID is null");
+  public GenericPreference(Comparable<?> userID, Comparable<?> itemID, float value) {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or itemID is null");
     }
-    if (Double.isNaN(value)) {
+    if (Float.isNaN(value)) {
       throw new IllegalArgumentException("Invalid value: " + value);
     }
-    this.user = user;
+    this.userID = userID;
     this.itemID = itemID;
     this.value = value;
   }
 
   @Override
-  public User getUser() {
-    if (user == null) {
-      throw new IllegalStateException("User was never set");
-    }
-    return user;
-  }
-
-  @Override
-  public void setUser(User user) {
-    if (user == null) {
-      throw new IllegalArgumentException("user is null");
-    }
-    this.user = user;
+  public Comparable<?> getUserID() {
+    return userID;
   }
 
   @Override
@@ -63,13 +51,13 @@
   }
 
   @Override
-  public double getValue() {
+  public float getValue() {
     return value;
   }
 
   @Override
-  public void setValue(double value) {
-    if (Double.isNaN(value)) {
+  public void setValue(float value) {
+    if (Float.isNaN(value)) {
       throw new IllegalArgumentException("Invalid value: " + value);
     }
     this.value = value;
@@ -77,7 +65,7 @@
 
   @Override
   public String toString() {
-    return "GenericPreference[user: " + user + ", itemID:" + itemID + ", value:" + value + ']';
+    return "GenericPreference[userID: " + userID + ", itemID:" + itemID + ", value:" + value + ']';
   }
 
 }

Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java (from r800080, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreferenceArray.java)
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreferenceArray.java&r1=800080&r2=800634&rev=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreferenceArray.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java Tue Aug  4 00:06:46 2009
@@ -19,70 +19,208 @@
 
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
-import org.apache.mahout.cf.taste.model.User;
 
 import java.io.Serializable;
+import java.util.Iterator;
+import java.util.List;
 
 /**
- * This implementation maintains three parallel arrays, of {@link User}s, items, and values. The idea is to save
- * allocating {@link Preference} objects themselves. On a 64-bit virtual machine, this should save 12 bytes per element
- * (the overhead of an enclosing {@link Preference} object reference and object header).
+ * <p>This implementation maintains two parallel arrays, of user IDs and values. The idea is to save
+ * allocating {@link Preference} objects themselves. This saves the overhead of {@link Preference} objects
+ * but also duplicating the user ID value.</p>
  *
- * This is not used yet.
+ * @see BooleanUserPreferenceArray
+ * @see GenericItemPreferenceArray
+ * @see GenericPreference
  */
-public final class GenericPreferenceArray implements PreferenceArray, Serializable {
+public final class GenericUserPreferenceArray implements PreferenceArray, Serializable {
 
-  private final User[] users;
-  private final Comparable<?>[] itemIDs;
-  private final double[] values;
-
-  public GenericPreferenceArray(int size) {
-    users = new User[size];
-    itemIDs = new Comparable<?>[size];
-    values = new double[size];
+  private static final int ITEM = 1;
+  private static final int VALUE = 2;
+  private static final int VALUE_REVERSED = 3;
+
+  private final Comparable<?>[] IDs;
+  private Comparable<?> id;
+  private final float[] values;
+
+  public GenericUserPreferenceArray(int size) {
+    if (size < 1) {
+      throw new IllegalArgumentException("size is less than 1");
+    }
+    this.IDs = new Comparable<?>[size];
+    values = new float[size];
+  }
+
+  public int length() {
+    return IDs.length;
+  }
+
+  public GenericUserPreferenceArray(List<Preference> prefs) {
+    this(prefs.size());
+    for (int i = 0; i < prefs.size(); i++) {
+      Preference pref = prefs.get(i);
+      IDs[i] = pref.getItemID();
+      values[i] = pref.getValue();
+    }
+    id = prefs.get(0).getUserID();
   }
 
   @Override
   public Preference get(int i) {
-    return new GenericPreference(users[i], itemIDs[i], values[i]);
+    return new PreferenceView(i);
   }
 
   @Override
   public void set(int i, Preference pref) {
-    users[i] = pref.getUser();
-    itemIDs[i] = pref.getItemID();
+    id = pref.getUserID();
+    IDs[i] = pref.getItemID();
     values[i] = pref.getValue();
   }
 
   @Override
-  public User getUser(int i) {
-    return users[i];
+  public Comparable<?> getUserID(int i) {
+    return id;
   }
 
   @Override
-  public void setUser(int i, User user) {
-    users[i] = user;
+  public void setUserID(int i, Comparable<?> userID) {
+    id = userID;
   }
 
   @Override
   public Comparable<?> getItemID(int i) {
-    return itemIDs[i];
+    return IDs[i];
   }
 
   @Override
   public void setItemID(int i, Comparable<?> itemID) {
-    itemIDs[i] = itemID;
+    IDs[i] = itemID;
   }
 
-
   @Override
-  public double getValue(int i) {
+  public float getValue(int i) {
     return values[i];
   }
 
   @Override
-  public void setValue(int i, double value) {
+  public void setValue(int i, float value) {
     values[i] = value;
   }
 
-}
+  @Override
+  public void sortByUser() {
+  }
+
+  @Override
+  public void sortByItem() {
+    selectionSort(ITEM);
+  }
+
+  @Override
+  public void sortByValue() {
+    selectionSort(VALUE);
+  }
+
+  @Override
+  public void sortByValueReversed() {
+    selectionSort(VALUE_REVERSED);
+  }
+
+  private void selectionSort(int type) {
+    // I think this sort will prove to be too dumb, but, it's in place and OK for tiny, mostly sorted data
+    int max = length();
+    for (int i = 0; i < max; i++) {
+      int min = i;
+      for (int j = i + 1; j < max; j++) {
+        if (isLess(j, min, type)) {
+          min = j;
+        }
+      }
+      if (i != min) {
+        swap(i, min);
+      }
+    }
+  }
+
+  private boolean isLess(int i, int j, int type) {
+    switch (type) {
+      case ITEM:
+        return ((Comparable<Object>) IDs[i]).compareTo(IDs[j]) < 0;
+      case VALUE:
+        return values[i] < values[j];
+      case VALUE_REVERSED:
+        return values[i] >= values[j];
+      default:
+        throw new IllegalStateException();
+    }
+  }
+
+  private void swap(int i, int j) {
+    Comparable<?> temp1 = IDs[i];
+    float temp2 = values[i];
+    IDs[i] = IDs[j];
+    values[i] = values[j];
+    IDs[j] = temp1;
+    values[j] = temp2;
+  }
+
+  public GenericUserPreferenceArray clone() {
+    try {
+      return (GenericUserPreferenceArray) super.clone();
+    } catch (CloneNotSupportedException cnse) {
+      throw new AssertionError();
+    }
+  }
+
+  @Override
+  public Iterator<Preference> iterator() {
+    return new PreferenceArrayIterator();
+  }
+
+  private final class PreferenceArrayIterator implements Iterator<Preference> {
+    private int i = 0;
+    @Override
+    public boolean hasNext() {
+      return i < length();
+    }
+    @Override
+    public Preference next() {
+      return new PreferenceView(i++);
+    }
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  private final class PreferenceView implements Preference {
+
+    private final int i;
+
+    private PreferenceView(int i) {
+      this.i = i;
+    }
+
+    @Override
+    public Comparable<?> getUserID() {
+      return GenericUserPreferenceArray.this.getUserID(i);
+    }
+
+    @Override
+    public Comparable<?> getItemID() {
+      return GenericUserPreferenceArray.this.getItemID(i);
+    }
+
+    @Override
+    public float getValue() {
+      return values[i];
+    }
+
+    @Override
+    public void setValue(float value) {
+      values[i] = value;
+    }
+
+  }
+
+}
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Tue Aug  4 00:06:46 2009
@@ -21,20 +21,19 @@
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.FastMap;
 import org.apache.mahout.cf.taste.impl.common.FastSet;
-import org.apache.mahout.cf.taste.impl.common.FileLineIterable;
-import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
-import org.apache.mahout.cf.taste.impl.model.BooleanPreference;
+import org.apache.mahout.cf.taste.impl.common.FileLineIterator;
+import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
 import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
-import org.apache.mahout.cf.taste.impl.model.GenericUser;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -68,23 +67,25 @@
  * preference for an item, but no degree of preference), the caller can simply omit the third token in each line
  * altogether -- for example, "123,ABC".</p>
  *
+ * <p>Note that it's all-or-nothing -- all of the items in the file must express no preference, or the all must.
+ * These cannot be mixed. Put another way there will always be the same number of delimiters on every line of the
+ * file!</p>
+ *
  * <p>This class is not intended for use with very large amounts of data (over, say, tens of millions of rows). For
  * that, a JDBC-backed {@link DataModel} and a database are more appropriate.</p>
  *
  * <p>It is possible and likely useful to subclass this class and customize its behavior to accommodate
- * application-specific needs and input formats. See {@link #processLine(String, Map)},
- * {@link #buildUser(String, List)} and {@link #buildPreference(User, Comparable, double)}.</p>
+ * application-specific needs and input formats. See {@link #processLine(String, Map, char)} and
+ * {@link #processLineWithoutID(String, Map, char)}
  */
 public class FileDataModel implements DataModel {
 
   private static final Logger log = LoggerFactory.getLogger(FileDataModel.class);
 
   private static final long MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
-  private static final char UNKNOWN_DELIMITER = '\0';
 
   private final File dataFile;
   private long lastModified;
-  private char delimiter;
   private boolean loaded;
   private DataModel delegate;
   private final ReentrantLock reloadLock;
@@ -107,8 +108,6 @@
       throw new FileNotFoundException(dataFile.toString());
     }
 
-    this.delimiter = UNKNOWN_DELIMITER;
-
     log.info("Creating FileDataModel for file " + dataFile);
 
     this.dataFile = dataFile.getAbsoluteFile();
@@ -125,22 +124,39 @@
     if (!reloadLock.isLocked()) {
       reloadLock.lock();
       try {
-        Map<String, List<Preference>> data = new FastMap<String, List<Preference>>();
-
-        processFile(dataFile, data);
-        for (File updateFile : findUpdateFiles()) {
-          processFile(updateFile, data);
-        }
-
-        delegate = new GenericDataModel(new UserIterableOverData(data));
+        delegate = buildModel();
         loaded = true;
-
+      } catch (IOException ioe) {
+        log.warn("Exception while reloading", ioe);
       } finally {
         reloadLock.unlock();
       }
     }
   }
 
+  private DataModel buildModel() throws IOException {
+    FileLineIterator iterator = new FileLineIterator(dataFile, false);
+    String firstLine = iterator.peek();
+    char delimiter = determineDelimiter(firstLine);
+    boolean hasPrefValues = firstLine.indexOf(',', firstLine.indexOf(',') + 1) >= 0;
+
+    if (hasPrefValues) {
+      Map<Comparable<?>, Collection<Preference>> data = new FastMap<Comparable<?>, Collection<Preference>>();
+      processFile(iterator, data, delimiter);
+      for (File updateFile : findUpdateFiles()) {
+        processFile(new FileLineIterator(updateFile, false), data, delimiter);
+      }
+      return new GenericDataModel(GenericDataModel.toPrefArrayValues(data, true));
+    } else {
+      Map<Comparable<?>, FastSet<Comparable<?>>> data = new FastMap<Comparable<?>, FastSet<Comparable<?>>>();
+      processFileWithoutID(iterator, data, delimiter);
+      for (File updateFile : findUpdateFiles()) {
+        processFileWithoutID(new FileLineIterator(updateFile, false), data, delimiter);
+      }
+      return new GenericBooleanPrefDataModel(data);
+    }
+  }
+
   /**
    * Finds update delta files in the same directory as the data file. This finds any file whose name starts the same way
    * as the data file (up to first period) but isn't the data file itself. For example, if the data file is
@@ -162,35 +178,32 @@
     return updateFiles;
   }
 
-  protected void processFile(File dataOrUpdateFile, Map<String, List<Preference>> data) {
+  private static char determineDelimiter(String line) {
+    if (line.indexOf(',') >= 0) {
+      return ',';
+    }
+    if (line.indexOf('\t') >= 0) {
+      return '\t';
+    }
+    throw new IllegalArgumentException("Did not find a delimiter in first line");
+  }
+
+  protected void processFile(FileLineIterator dataOrUpdateFileIterator,
+                             Map<Comparable<?>, Collection<Preference>> data,
+                             char delimiter) {
     log.info("Reading file info...");
     AtomicInteger count = new AtomicInteger();
-    for (String line : new FileLineIterable(dataOrUpdateFile, false)) {
+    while (dataOrUpdateFileIterator.hasNext()) {
+      String line = dataOrUpdateFileIterator.next();
       if (line.length() > 0) {
-        if (log.isDebugEnabled()) {
-          log.debug("Read line: {}", line);
-        }
-        if (delimiter == UNKNOWN_DELIMITER) {
-          delimiter = determineDelimiter(line);
-        }
-        processLine(line, data);
+        processLine(line, data, delimiter);
         int currentCount = count.incrementAndGet();
         if (currentCount % 100000 == 0) {
           log.info("Processed {} lines", currentCount);
         }
       }
     }
-    log.info("Read lines: " + count.get());
-  }
-
-  private static char determineDelimiter(String line) {
-    if (line.indexOf(',') >= 0) {
-      return ',';
-    }
-    if (line.indexOf('\t') >= 0) {
-      return '\t';
-    }
-    throw new IllegalArgumentException("Did not find a delimiter in first line");
+    log.info("Read lines: {}", count.get());
   }
 
   /**
@@ -199,49 +212,39 @@
    * determining which user and item the preference pertains to, the method should look to see if the data contains a
    * mapping for the user ID already, and if not, add an empty {@link List} of {@link Preference}s to the data.</p>
    *
-   * <p>The method should use {@link #buildPreference(User, Comparable, double)} to
-   * build {@link Preference} objects as needed.</p>
-   *
    * <p>Note that if the line is empty or begins with '#' it will be ignored as a comment.</p>
    *
    * @param line      line from input data file
    * @param data      all data read so far, as a mapping from user IDs to preferences
-   * @see #buildPreference(User, Comparable, double)
    */
-  protected void processLine(String line, Map<String, List<Preference>> data) {
+  protected void processLine(String line, Map<Comparable<?>, Collection<Preference>> data, char delimiter) {
 
     if (line.length() == 0 || line.charAt(0) == '#') {
       return;
     }
 
     int delimiterOne = line.indexOf((int) delimiter);
-    if (delimiterOne < 0) {
+    int delimiterTwo = line.indexOf((int) delimiter, delimiterOne + 1);
+    if (delimiterOne < 0 || delimiterTwo < 0) {
       throw new IllegalArgumentException("Bad line: " + line);
     }
-    int delimiterTwo = line.indexOf((int) delimiter, delimiterOne + 1);
 
     String userID = line.substring(0, delimiterOne);
-    String itemID;
-    String preferenceValueString;
-    if (delimiterTwo >= 0) {
-      itemID = line.substring(delimiterOne + 1, delimiterTwo);
-      preferenceValueString = line.substring(delimiterTwo + 1);
-    } else {
-      itemID = line.substring(delimiterOne + 1);
-      preferenceValueString = null;
-    }
+    String itemID = line.substring(delimiterOne + 1, delimiterTwo);
+    String preferenceValueString = line.substring(delimiterTwo + 1);
+
     if (transpose) {
       String tmp = userID;
       userID = itemID;
       itemID = tmp;
     }
-    List<Preference> prefs = data.get(userID);
+    Collection<Preference> prefs = data.get(userID);
     if (prefs == null) {
       prefs = new ArrayList<Preference>(2);
       data.put(userID, prefs);
     }
 
-    if (preferenceValueString != null && preferenceValueString.length() == 0) {
+    if (preferenceValueString.length() == 0) {
       // remove pref
       Iterator<Preference> prefsIterator = prefs.iterator();
       while (prefsIterator.hasNext()) {
@@ -252,14 +255,54 @@
         }
       }
     } else {
-      // add pref -- assume it does not already exist
-      if (preferenceValueString == null) {
-        prefs.add(new BooleanPreference(null, itemID));
-      } else {
-        double preferenceValue = Double.parseDouble(preferenceValueString);
-        prefs.add(buildPreference(null, itemID, preferenceValue));
+      float preferenceValue = Float.parseFloat(preferenceValueString);
+      prefs.add(new GenericPreference(userID, itemID, preferenceValue));
+    }
+  }
+
+  protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator,
+                                      Map<Comparable<?>, FastSet<Comparable<?>>> data,
+                                      char delimiter) {
+    log.info("Reading file info...");
+    AtomicInteger count = new AtomicInteger();
+    while (dataOrUpdateFileIterator.hasNext()) {
+      String line = dataOrUpdateFileIterator.next();
+      if (line.length() > 0) {
+        processLineWithoutID(line, data, delimiter);
+        int currentCount = count.incrementAndGet();
+        if (currentCount % 100000 == 0) {
+          log.info("Processed {} lines", currentCount);
+        }
       }
     }
+    log.info("Read lines: {}", count.get());
+  }
+
+  protected void processLineWithoutID(String line, Map<Comparable<?>, FastSet<Comparable<?>>> data, char delimiter) {
+
+    if (line.length() == 0 || line.charAt(0) == '#') {
+      return;
+    }
+
+    int delimiterOne = line.indexOf((int) delimiter);
+    if (delimiterOne < 0) {
+      throw new IllegalArgumentException("Bad line: " + line);
+    }
+
+    String userID = line.substring(0, delimiterOne);
+    String itemID = line.substring(delimiterOne + 1);
+
+    if (transpose) {
+      String tmp = userID;
+      userID = itemID;
+      itemID = tmp;
+    }
+    FastSet<Comparable<?>> itemIDs = data.get(userID);
+    if (itemIDs == null) {
+      itemIDs = new FastSet<Comparable<?>>(2);
+      data.put(userID, itemIDs);
+    }
+    itemIDs.add(itemID);
   }
 
   private void checkLoaded() {
@@ -269,15 +312,20 @@
   }
 
   @Override
-  public Iterable<? extends User> getUsers() throws TasteException {
+  public Iterable<Comparable<?>> getUserIDs() throws TasteException {
     checkLoaded();
-    return delegate.getUsers();
+    return delegate.getUserIDs();
   }
 
   @Override
-  public User getUser(Comparable<?> id) throws TasteException {
+  public PreferenceArray getPreferencesFromUser(Comparable<?> userID) throws TasteException {
     checkLoaded();
-    return delegate.getUser(id);
+    return delegate.getPreferencesFromUser(userID);
+  }
+
+  @Override
+  public FastSet<Comparable<?>> getItemIDsFromUser(Comparable<?> userID) throws TasteException {
+    return delegate.getItemIDsFromUser(userID);
   }
 
   @Override
@@ -287,15 +335,14 @@
   }
 
   @Override
-  public Iterable<? extends Preference> getPreferencesForItem(Comparable<?> itemID) throws TasteException {
+  public PreferenceArray getPreferencesForItem(Comparable<?> itemID) throws TasteException {
     checkLoaded();
     return delegate.getPreferencesForItem(itemID);
   }
 
   @Override
-  public Preference[] getPreferencesForItemAsArray(Comparable<?> itemID) throws TasteException {
-    checkLoaded();
-    return delegate.getPreferencesForItemAsArray(itemID);
+  public Float getPreferenceValue(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+    return delegate.getPreferenceValue(userID, itemID);
   }
 
   @Override
@@ -322,12 +369,12 @@
    * reloaded from a file. This method should also be considered relatively slow.
    */
   @Override
-  public void setPreference(Comparable<?> userID, Comparable<?> itemID, double value) throws TasteException {
+  public void setPreference(Comparable<?> userID, Comparable<?> itemID, float value) throws TasteException {
     checkLoaded();
     delegate.setPreference(userID, itemID, value);
   }
 
-  /** See the warning at {@link #setPreference(Comparable, Comparable, double)}. */
+  /** See the warning at {@link #setPreference(Comparable, Comparable, float)}. */
   @Override
   public void removePreference(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
     checkLoaded();
@@ -347,86 +394,11 @@
     }
   }
 
-  /**
-   * Subclasses may override to return a different {@link User} implementation. The default implemenation always builds
-   * a new {@link GenericUser}. This may not be desirable; it may be better to return an existing {@link User} object in
-   * some applications rather than create a new object.
-   *
-   * @param id    user ID
-   * @param prefs user preferences
-   * @return {@link GenericUser} by default, or, a {@link BooleanPrefUser} if the prefs supplied are in fact {@link
-   *         BooleanPreference}s
-   */
-  protected User buildUser(String id, List<Preference> prefs) {
-    if (!prefs.isEmpty() && prefs.get(0) instanceof BooleanPreference) {
-      // If first is a BooleanPreference, assuming all are, so, want to use BooleanPrefUser
-      FastSet<Comparable<?>> itemIDs = new FastSet<Comparable<?>>(prefs.size());
-      for (Preference pref : prefs) {
-        itemIDs.add(pref.getItemID());
-      }
-      itemIDs.rehash();
-      return new BooleanPrefUser(id, itemIDs);
-    }
-    return new GenericUser(id, prefs);
-  }
-
-  /**
-   * Subclasses may override to return a different {@link Preference} implementation. The default implementation builds
-   * a new {@link GenericPreference}.
-   *
-   * @param user  {@link User} who expresses the preference
-   * @param itemID  preferred item
-   * @param value preference value
-   * @return {@link GenericPreference} by default
-   */
-  protected Preference buildPreference(User user, Comparable<?> itemID, double value) {
-    return new GenericPreference(user, itemID, value);
-  }
-
   @Override
   public String toString() {
     return "FileDataModel[dataFile:" + dataFile + ']';
   }
 
 
-  private final class UserIterableOverData implements Iterable<User> {
-    private final Map<String, List<Preference>> data;
-
-    private UserIterableOverData(Map<String, List<Preference>> data) {
-      this.data = data;
-    }
-
-    @Override
-    public Iterator<User> iterator() {
-      return new UserIteratorOverData(data.entrySet().iterator());
-    }
-  }
-
-  private final class UserIteratorOverData implements Iterator<User> {
-    private final Iterator<Map.Entry<String, List<Preference>>> dataIterator;
-
-    private UserIteratorOverData(Iterator<Map.Entry<String, List<Preference>>> dataIterator) {
-      this.dataIterator = dataIterator;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return dataIterator.hasNext();
-    }
-
-    @Override
-    public User next() {
-      Map.Entry<String, List<Preference>> datum = dataIterator.next();
-      String key = datum.getKey();
-      List<Preference> value = datum.getValue();
-      dataIterator.remove();
-      return buildUser(key, value);
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java Tue Aug  4 00:06:46 2009
@@ -17,41 +17,33 @@
 
 package org.apache.mahout.cf.taste.impl.model.jdbc;
 
-import org.apache.mahout.cf.taste.common.NoSuchUserException;
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.impl.common.FastSet;
 import org.apache.mahout.cf.taste.impl.common.IOUtils;
-import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
-import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
-import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
 import org.apache.mahout.cf.taste.impl.model.BooleanPreference;
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import javax.sql.DataSource;
 import java.sql.Connection;
 import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.NoSuchElementException;
-
 
 public abstract class AbstractBooleanPrefJDBCDataModel extends AbstractJDBCDataModel {
 
-  private final String getUserSQL;
+  private static final Logger log = LoggerFactory.getLogger(AbstractBooleanPrefJDBCDataModel.class);
+
   private final String setPreferenceSQL;
-  private final String getUsersSQL;
-  private final String getPrefsForItemSQL;
 
   protected AbstractBooleanPrefJDBCDataModel(DataSource dataSource,
                                              String preferenceTable,
                                              String userIDColumn,
                                              String itemIDColumn,
                                              String preferenceColumn,
+                                             String getPreferenceSQL,
                                              String getUserSQL,
+                                             String getAllUsersSQL,
                                              String getNumItemsSQL,
                                              String getNumUsersSQL,
                                              String setPreferenceSQL,
@@ -66,7 +58,9 @@
         userIDColumn,
         itemIDColumn,
         preferenceColumn,
+        getPreferenceSQL,
         getUserSQL,
+        getAllUsersSQL,
         getNumItemsSQL,
         getNumUsersSQL,
         setPreferenceSQL,
@@ -76,74 +70,25 @@
         getPrefsForItemSQL,
         getNumPreferenceForItemSQL,
         getNumPreferenceForItemsSQL);
-    this.getUserSQL = getUserSQL;
     this.setPreferenceSQL = setPreferenceSQL;
-    this.getUsersSQL = getUsersSQL;
-    this.getPrefsForItemSQL = getPrefsForItemSQL;
-  }
-
-  /**
-   * @throws org.apache.mahout.cf.taste.common.NoSuchUserException
-   *          if there is no such user
-   */
-  @Override
-  public User getUser(Comparable<?> id) throws TasteException {
-
-    log.debug("Retrieving user ID '{}'", id);
-
-    Connection conn = null;
-    PreparedStatement stmt = null;
-    ResultSet rs = null;
-
-    try {
-      conn = getDataSource().getConnection();
-      stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
-      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
-      stmt.setFetchSize(getFetchSize());
-      stmt.setObject(1, id);
-
-      log.debug("Executing SQL query: {}", getUserSQL);
-      rs = stmt.executeQuery();
-
-      FastSet<Comparable<?>> itemIDs = new FastSet<Comparable<?>>();
-      while (rs.next()) {
-        itemIDs.add((Comparable<?>) rs.getObject(1));
-      }
-
-      if (itemIDs.isEmpty()) {
-        throw new NoSuchUserException();
-      }
-
-      return buildUser(id, itemIDs);
-
-    } catch (SQLException sqle) {
-      log.warn("Exception while retrieving user", sqle);
-      throw new TasteException(sqle);
-    } finally {
-      IOUtils.quietClose(rs, stmt, conn);
-    }
-
   }
 
   @Override
-  public Iterable<? extends User> getUsers() throws TasteException {
-    log.debug("Retrieving all users...");
-    return new IteratorIterable<User>(new ResultSetUserIterator(getDataSource(), getUsersSQL));
+  protected Preference buildPreference(ResultSet rs) throws SQLException {
+    return new BooleanPreference((Comparable<?>) rs.getObject(1), (Comparable<?>) rs.getObject(2));
   }
 
   @Override
-  public void setPreference(Comparable<?> userID, Comparable<?> itemID, double value)
+  public void setPreference(Comparable<?> userID, Comparable<?> itemID, float value)
       throws TasteException {
     if (userID == null || itemID == null) {
       throw new IllegalArgumentException("userID or itemID is null");
     }
-    if (!Double.isNaN(value)) {
+    if (!Float.isNaN(value)) {
       throw new IllegalArgumentException("Invalid value: " + value);
     }
 
-    if (log.isDebugEnabled()) {
-      log.debug("Setting preference for user '" + userID + "', item '" + itemID);
-    }
+    log.debug("Setting preference for user {}, item {}", userID, itemID);
 
     Connection conn = null;
     PreparedStatement stmt = null;
@@ -165,152 +110,4 @@
     }
   }
 
-  @Override
-  protected List<? extends Preference> doGetPreferencesForItem(Comparable<?> itemID) throws TasteException {
-    log.debug("Retrieving preferences for item ID '{}'", itemID);
-    Connection conn = null;
-    PreparedStatement stmt = null;
-    ResultSet rs = null;
-    try {
-      conn = getDataSource().getConnection();
-      stmt = conn.prepareStatement(getPrefsForItemSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
-      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
-      stmt.setFetchSize(getFetchSize());
-      stmt.setObject(1, itemID);
-
-      log.debug("Executing SQL query: {}", getPrefsForItemSQL);
-      rs = stmt.executeQuery();
-      List<Preference> prefs = new ArrayList<Preference>();
-      while (rs.next()) {
-        Comparable<?> userID = (Comparable<?>) rs.getObject(2);
-        Preference pref = buildPreference(buildUser(userID, (FastSet<Comparable<?>>) null), itemID);
-        prefs.add(pref);
-      }
-      return prefs;
-    } catch (SQLException sqle) {
-      log.warn("Exception while retrieving prefs for item", sqle);
-      throw new TasteException(sqle);
-    } finally {
-      IOUtils.quietClose(rs, stmt, conn);
-    }
-  }
-
-  protected User buildUser(Comparable<?> id, FastSet<Comparable<?>> itemIDs) {
-    return new BooleanPrefUser(id, itemIDs);
-  }
-
-  protected Preference buildPreference(User user, Comparable<?> itemID) {
-    return new BooleanPreference(user, itemID);
-  }
-
-  private final class ResultSetUserIterator implements SkippingIterator<User> {
-
-    private final Connection connection;
-    private final Statement statement;
-    private final ResultSet resultSet;
-    private boolean closed;
-
-    private ResultSetUserIterator(DataSource dataSource, String getUsersSQL) throws TasteException {
-      try {
-        connection = dataSource.getConnection();
-        statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
-        statement.setFetchDirection(ResultSet.FETCH_FORWARD);
-        statement.setFetchSize(getFetchSize());
-        log.debug("Executing SQL query: {}", getUsersSQL);
-        resultSet = statement.executeQuery(getUsersSQL);
-        boolean anyResults = resultSet.next();
-        if (!anyResults) {
-          close();
-        }
-      } catch (SQLException sqle) {
-        close();
-        throw new TasteException(sqle);
-      }
-    }
-
-    @Override
-    public boolean hasNext() {
-      boolean nextExists = false;
-      if (!closed) {
-        try {
-          if (resultSet.isAfterLast()) {
-            close();
-          } else {
-            nextExists = true;
-          }
-        } catch (SQLException sqle) {
-          log.warn("Unexpected exception while accessing ResultSet; continuing...", sqle);
-          close();
-        }
-      }
-      return nextExists;
-    }
-
-    @Override
-    public User next() {
-
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-
-      Comparable<?> currentUserID = null;
-      FastSet<Comparable<?>> itemIDs = new FastSet<Comparable<?>>();
-
-      try {
-        do {
-          Comparable<?> userID = (Comparable<?>) resultSet.getObject(2);
-          if (currentUserID == null) {
-            currentUserID = userID;
-          }
-          // Did we move on to a new user?
-          if (!userID.equals(currentUserID)) {
-            break;
-          }
-          // else add a new preference for the current user
-          itemIDs.add((Comparable<?>) resultSet.getObject(1));
-        } while (resultSet.next());
-      } catch (SQLException sqle) {
-        // No good way to handle this since we can't throw an exception
-        log.warn("Exception while iterating over users", sqle);
-        close();
-        throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
-      }
-
-      return buildUser(currentUserID, itemIDs);
-    }
-
-    /**
-     * @throws UnsupportedOperationException
-     */
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    private void close() {
-      closed = true;
-      IOUtils.quietClose(resultSet, statement, connection);
-    }
-
-    @Override
-    public void skip(int n) {
-      if (n >= 1 && hasNext()) {
-        try {
-          int distinctUserNamesSeen = 0;
-          Object currentUserID = null;
-          do {
-            Comparable<?> userID = (Comparable<?>) resultSet.getObject(2);
-            if (!userID.equals(currentUserID)) {
-              distinctUserNamesSeen++;
-            }
-            currentUserID = userID;
-          } while (distinctUserNamesSeen <= n && resultSet.next());
-        } catch (SQLException sqle) {
-          log.warn("Exception while iterating over users", sqle);
-          close();
-        }
-      }
-    }
-  }
-
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Tue Aug  4 00:06:46 2009
@@ -21,17 +21,20 @@
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.Cache;
+import org.apache.mahout.cf.taste.impl.common.FastMap;
+import org.apache.mahout.cf.taste.impl.common.FastSet;
 import org.apache.mahout.cf.taste.impl.common.IOUtils;
 import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
 import org.apache.mahout.cf.taste.impl.common.Retriever;
 import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
 import org.apache.mahout.cf.taste.impl.common.jdbc.AbstractJDBCComponent;
+import org.apache.mahout.cf.taste.impl.model.GenericItemPreferenceArray;
 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
-import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.JDBCDataModel;
 import org.apache.mahout.cf.taste.model.Preference;
-import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,6 +47,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.NoSuchElementException;
 
 /**
@@ -58,13 +62,6 @@
  * pooling, so make sure the {@link DataSource} it exposes is using pooling. Outside a J2EE container, you can use
  * packages like Jakarta's <a href="http://jakarta.apache.org/commons/dbcp/">DBCP</a> to create a {@link DataSource} on
  * top of your database whose {@link Connection}s are pooled.</p>
- *
- * <p>Also note: this default implementation assumes that the user and item ID keys are {@link String}s, for maximum
- * flexibility. You can override this behavior by subclassing an implementation and overriding {@link
- * {@link #buildUser(Comparable, List)}. If you don't, just make sure you use {@link String}s as IDs
- * throughout your code. If your IDs are really numeric, and you use, say, {@link Long} for IDs in the rest of your
- * code, you will run into subtle problems because the {@link Long} values won't be equal to or compare correctly to the
- * underlying {@link String} key values.</p>
  */
 public abstract class AbstractJDBCDataModel extends AbstractJDBCComponent implements JDBCDataModel {
 
@@ -80,7 +77,9 @@
   private final String userIDColumn;
   private final String itemIDColumn;
   private final String preferenceColumn;
+  private final String getPreferenceSQL;
   private final String getUserSQL;
+  private final String getAllUsersSQL;
   private final String getNumItemsSQL;
   private final String getNumUsersSQL;
   private final String setPreferenceSQL;
@@ -95,7 +94,9 @@
   private final Cache<Comparable<?>, Integer> itemPrefCounts;
 
   protected AbstractJDBCDataModel(DataSource dataSource,
+                                  String getPreferenceSQL,
                                   String getUserSQL,
+                                  String getAllUsersSQL,
                                   String getNumItemsSQL,
                                   String getNumUsersSQL,
                                   String setPreferenceSQL,
@@ -110,7 +111,9 @@
         DEFAULT_USER_ID_COLUMN,
         DEFAULT_ITEM_ID_COLUMN,
         DEFAULT_PREFERENCE_COLUMN,
+        getPreferenceSQL,
         getUserSQL,
+        getAllUsersSQL,
         getNumItemsSQL,
         getNumUsersSQL,
         setPreferenceSQL,
@@ -127,7 +130,9 @@
                                   String userIDColumn,
                                   String itemIDColumn,
                                   String preferenceColumn,
+                                  String getPreferenceSQL,
                                   String getUserSQL,
+                                  String getAllUsersSQL,
                                   String getNumItemsSQL,
                                   String getNumUsersSQL,
                                   String setPreferenceSQL,
@@ -147,6 +152,8 @@
 
     checkNotNullAndLog("dataSource", dataSource);
     checkNotNullAndLog("getUserSQL", getUserSQL);
+    checkNotNullAndLog("getAllUsersSQL", getAllUsersSQL);
+    checkNotNullAndLog("getPreferenceSQL", getPreferenceSQL);
     checkNotNullAndLog("getNumItemsSQL", getNumItemsSQL);
     checkNotNullAndLog("getNumUsersSQL", getNumUsersSQL);
     checkNotNullAndLog("setPreferenceSQL", setPreferenceSQL);
@@ -168,7 +175,9 @@
     this.preferenceColumn = preferenceColumn;
 
     this.dataSource = dataSource;
+    this.getPreferenceSQL = getPreferenceSQL;
     this.getUserSQL = getUserSQL;
+    this.getAllUsersSQL = getAllUsersSQL;
     this.getNumItemsSQL = getNumItemsSQL;
     this.getNumUsersSQL = getNumUsersSQL;
     this.setPreferenceSQL = setPreferenceSQL;
@@ -208,14 +217,14 @@
   }
 
   @Override
-  public Iterable<? extends User> getUsers() throws TasteException {
+  public Iterable<Comparable<?>> getUserIDs() throws TasteException {
     log.debug("Retrieving all users...");
-    return new IteratorIterable<User>(new ResultSetUserIterator(dataSource, getUsersSQL));
+    return new IteratorIterable<Comparable<?>>(new ResultSetIDIterator(getUsersSQL));
   }
 
   /** @throws NoSuchUserException if there is no such user */
   @Override
-  public User getUser(Comparable<?> id) throws TasteException {
+  public PreferenceArray getPreferencesFromUser(Comparable<?> id) throws TasteException {
 
     log.debug("Retrieving user ID '{}'", id);
 
@@ -223,8 +232,6 @@
     PreparedStatement stmt = null;
     ResultSet rs = null;
 
-    String idString = id.toString();
-
     try {
       conn = dataSource.getConnection();
       stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
@@ -237,14 +244,14 @@
 
       List<Preference> prefs = new ArrayList<Preference>();
       while (rs.next()) {
-        addPreference(rs, prefs);
+        prefs.add(buildPreference(rs));
       }
 
       if (prefs.isEmpty()) {
         throw new NoSuchUserException();
       }
 
-      return buildUser(idString, prefs);
+      return new GenericUserPreferenceArray(prefs);
 
     } catch (SQLException sqle) {
       log.warn("Exception while retrieving user", sqle);
@@ -256,23 +263,183 @@
   }
 
   @Override
-  public Iterable<Comparable<?>> getItemIDs() throws TasteException {
-    log.debug("Retrieving all items...");
-    return new IteratorIterable<Comparable<?>>(new ResultSetItemIterator(dataSource, getItemsSQL));
+  public Map<Comparable<?>, PreferenceArray> exportWithPrefs() throws TasteException {
+    log.debug("Exporting all data");
+
+    Connection conn = null;
+    Statement stmt = null;
+    ResultSet rs = null;
+
+    Map<Comparable<?>, PreferenceArray> result = new FastMap<Comparable<?>, PreferenceArray>();
+
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
+
+      log.debug("Executing SQL query: {}", getAllUsersSQL);
+      rs = stmt.executeQuery(getAllUsersSQL);
+
+      Comparable<?> currentUserID = null;
+      List<Preference> currentPrefs = new ArrayList<Preference>();
+      while (rs.next()) {
+        Comparable<?> nextUserID = (Comparable<?>) rs.getObject(1);
+        if (currentUserID != null && !currentUserID.equals(nextUserID)) {
+          if (!currentPrefs.isEmpty()) {
+            result.put(currentUserID, new GenericUserPreferenceArray(currentPrefs));
+            currentPrefs.clear();
+          }
+        } else {
+          currentPrefs.add(buildPreference(rs));
+        }
+        currentUserID = nextUserID;
+      }
+      if (!currentPrefs.isEmpty()) {
+        result.put(currentUserID, new GenericUserPreferenceArray(currentPrefs));
+      }
+
+      return result;
+
+    } catch (SQLException sqle) {
+      log.warn("Exception while exporting all data", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.quietClose(rs, stmt, conn);
+
+    }
+  }
+
+  @Override
+  public Map<Comparable<?>, FastSet<Comparable<?>>> exportWithIDsOnly() throws TasteException {
+    log.debug("Exporting all data");
+
+    Connection conn = null;
+    Statement stmt = null;
+    ResultSet rs = null;
+
+    Map<Comparable<?>, FastSet<Comparable<?>>> result = new FastMap<Comparable<?>, FastSet<Comparable<?>>>();
+
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
+
+      log.debug("Executing SQL query: {}", getAllUsersSQL);
+      rs = stmt.executeQuery(getAllUsersSQL);
+
+      Comparable<?> currentUserID = null;
+      FastSet<Comparable<?>> currentItemIDs = new FastSet<Comparable<?>>(2);
+      while (rs.next()) {
+        Comparable<?> nextUserID = (Comparable<?>) rs.getObject(1);
+        if (currentUserID != null && !currentUserID.equals(nextUserID)) {
+          if (!currentItemIDs.isEmpty()) {
+            result.put(currentUserID, currentItemIDs);
+            currentItemIDs = new FastSet<Comparable<?>>(2);
+          }
+        } else {
+          currentItemIDs.add((Comparable<?>) rs.getObject(2));
+        }
+        currentUserID = nextUserID;
+      }
+      if (!currentItemIDs.isEmpty()) {
+        result.put(currentUserID, currentItemIDs);
+      }
+
+      return result;
+
+    } catch (SQLException sqle) {
+      log.warn("Exception while exporting all data", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.quietClose(rs, stmt, conn);
+
+    }
+  }
+
+  /** @throws NoSuchUserException if there is no such user */
+  @Override
+  public FastSet<Comparable<?>> getItemIDsFromUser(Comparable<?> id) throws TasteException {
+
+    log.debug("Retrieving items for user ID '{}'", id);
+
+    Connection conn = null;
+    PreparedStatement stmt = null;
+    ResultSet rs = null;
+
+    try {
+      conn = getDataSource().getConnection();
+      stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
+      stmt.setObject(1, id);
+
+      log.debug("Executing SQL query: {}", getUserSQL);
+      rs = stmt.executeQuery();
+
+      FastSet<Comparable<?>> result = new FastSet<Comparable<?>>();
+      while (rs.next()) {
+        result.add((Comparable<?>) rs.getObject(1));
+      }
+
+      if (result.isEmpty()) {
+        throw new NoSuchUserException();
+      }
+
+      return result;
+
+    } catch (SQLException sqle) {
+      log.warn("Exception while retrieving item s", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.quietClose(rs, stmt, conn);
+    }
+
+  }
+
+  @Override
+  public Float getPreferenceValue(Comparable<?> userID, Comparable<?> itemID) throws TasteException {
+    log.debug("Retrieving preferences for item ID '{}'", itemID);
+    Connection conn = null;
+    PreparedStatement stmt = null;
+    ResultSet rs = null;
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.prepareStatement(getPreferenceSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(1);
+      stmt.setObject(1, userID);
+      stmt.setObject(2, itemID);
+
+      log.debug("Executing SQL query: {}", getPreferenceSQL);
+      rs = stmt.executeQuery();
+      if (rs.next()) {
+        return rs.getFloat(1);
+      } else {
+        return null;
+      }
+    } catch (SQLException sqle) {
+      log.warn("Exception while retrieving prefs for item", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.quietClose(rs, stmt, conn);
+    }
   }
 
   @Override
-  public Iterable<? extends Preference> getPreferencesForItem(Comparable<?> itemID) throws TasteException {
-    return doGetPreferencesForItem(itemID);
+  public Iterable<Comparable<?>> getItemIDs() throws TasteException {
+    log.debug("Retrieving all items...");
+    return new IteratorIterable<Comparable<?>>(new ResultSetIDIterator(getItemsSQL));
   }
 
   @Override
-  public Preference[] getPreferencesForItemAsArray(Comparable<?> itemID) throws TasteException {
-    List<? extends Preference> list = doGetPreferencesForItem(itemID);
-    return list.toArray(new Preference[list.size()]);
+  public PreferenceArray getPreferencesForItem(Comparable<?> itemID) throws TasteException {
+    List<Preference> list = doGetPreferencesForItem(itemID);
+    return new GenericItemPreferenceArray(list);
   }
 
-  protected List<? extends Preference> doGetPreferencesForItem(Comparable<?> itemID) throws TasteException {
+  protected List<Preference> doGetPreferencesForItem(Comparable<?> itemID) throws TasteException {
     log.debug("Retrieving preferences for item ID '{}'", itemID);
     Connection conn = null;
     PreparedStatement stmt = null;
@@ -288,10 +455,7 @@
       rs = stmt.executeQuery();
       List<Preference> prefs = new ArrayList<Preference>();
       while (rs.next()) {
-        double preference = rs.getDouble(1);
-        Comparable<?> userID = (Comparable<?>) rs.getObject(2);
-        Preference pref = buildPreference(buildUser(userID, null), itemID, preference);
-        prefs.add(pref);
+        prefs.add(buildPreference(rs));
       }
       return prefs;
     } catch (SQLException sqle) {
@@ -329,7 +493,7 @@
     }
     return length == 1 ?
         itemPrefCounts.get(itemIDs[0]) :
-        getNumThings("user preferring items", getNumPreferenceForItemsSQL, itemIDs);
+        getNumThings("user preferring items", getNumPreferenceForItemsSQL, (Object[]) itemIDs);
   }
 
 
@@ -361,18 +525,16 @@
   }
 
   @Override
-  public void setPreference(Comparable<?> userID, Comparable<?> itemID, double value)
+  public void setPreference(Comparable<?> userID, Comparable<?> itemID, float value)
       throws TasteException {
     if (userID == null || itemID == null) {
       throw new IllegalArgumentException("userID or itemID is null");
     }
-    if (Double.isNaN(value)) {
+    if (Float.isNaN(value)) {
       throw new IllegalArgumentException("Invalid value: " + value);
     }
 
-    if (log.isDebugEnabled()) {
-      log.debug("Setting preference for user '" + userID + "', item '" + itemID + "', value " + value);
-    }
+    log.debug("Setting preference for user {}, item {}", userID, itemID);    
 
     Connection conn = null;
     PreparedStatement stmt = null;
@@ -432,177 +594,32 @@
     itemPrefCounts.clear();
   }
 
-
-  private void addPreference(ResultSet rs, Collection<Preference> prefs)
-      throws SQLException {
-    Comparable<?> itemID = (Comparable<?>) rs.getObject(1);
-    double preferenceValue = rs.getDouble(2);
-    prefs.add(buildPreference(null, itemID, preferenceValue));
-  }
-
-  /**
-   * <p>Default implementation which returns a new {@link GenericUser} with {@link String} IDs. Subclasses may override
-   * to return a different {@link User} implementation.</p>
-   *
-   * @param id    user ID
-   * @param prefs user preferences
-   * @return {@link GenericUser} by default
-   */
-  protected User buildUser(Comparable<?> id, List<Preference> prefs) {
-    return new GenericUser(id, prefs);
-  }
-
-  /**
-   * Subclasses may override to return a different {@link Preference} implementation.
-   *
-   * @param user {@link User}
-   * @param itemID item ID
-   * @return {@link GenericPreference} by default
-   */
-  protected Preference buildPreference(User user, Comparable<?> itemID, double value) {
-    return new GenericPreference(user, itemID, value);
-  }
-
-  /**
-   * <p>An {@link java.util.Iterator} which returns {@link org.apache.mahout.cf.taste.model.User}s from a {@link
-   * java.sql.ResultSet}. This is a useful way to iterate over all user data since it does not require all data to be
-   * read into memory at once. It does however require that the DB connection be held open. Note that this class will
-   * only release database resources after {@link #hasNext()} has been called and has returned false; callers should
-   * make sure to "drain" the entire set of data to avoid tying up database resources.</p>
-   */
-  private final class ResultSetUserIterator implements SkippingIterator<User> {
-
-    private final Connection connection;
-    private final Statement statement;
-    private final ResultSet resultSet;
-    private boolean closed;
-
-    private ResultSetUserIterator(DataSource dataSource, String getUsersSQL) throws TasteException {
-      try {
-        connection = dataSource.getConnection();
-        statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
-        statement.setFetchDirection(ResultSet.FETCH_FORWARD);
-        statement.setFetchSize(getFetchSize());
-        log.debug("Executing SQL query: {}", getUsersSQL);
-        resultSet = statement.executeQuery(getUsersSQL);
-        boolean anyResults = resultSet.next();
-        if (!anyResults) {
-          close();
-        }
-      } catch (SQLException sqle) {
-        close();
-        throw new TasteException(sqle);
-      }
-    }
-
-    @Override
-    public boolean hasNext() {
-      boolean nextExists = false;
-      if (!closed) {
-        try {
-          if (resultSet.isAfterLast()) {
-            close();
-          } else {
-            nextExists = true;
-          }
-        } catch (SQLException sqle) {
-          log.warn("Unexpected exception while accessing ResultSet; continuing...", sqle);
-          close();
-        }
-      }
-      return nextExists;
-    }
-
-    @Override
-    public User next() {
-
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-
-      Comparable<?> currentUserID = null;
-      List<Preference> prefs = new ArrayList<Preference>();
-
-      try {
-        do {
-          Comparable<?> userID = (Comparable<?>) resultSet.getObject(3);
-          if (currentUserID == null) {
-            currentUserID = userID;
-          }
-          // Did we move on to a new user?
-          if (!userID.equals(currentUserID)) {
-            break;
-          }
-          // else add a new preference for the current user
-          addPreference(resultSet, prefs);
-        } while (resultSet.next());
-      } catch (SQLException sqle) {
-        // No good way to handle this since we can't throw an exception
-        log.warn("Exception while iterating over users", sqle);
-        close();
-        throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
-      }
-
-      return buildUser(currentUserID, prefs);
-    }
-
-    /**
-     * @throws UnsupportedOperationException
-     */
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
-    private void close() {
-      closed = true;
-      IOUtils.quietClose(resultSet, statement, connection);
-    }
-
-    @Override
-    public void skip(int n) {
-      if (n >= 1 && hasNext()) {
-        try {
-          int distinctUserNamesSeen = 0;
-          Object currentUserID = null;
-          do {
-            Comparable<?> userID = (Comparable<?>) resultSet.getObject(3);
-            if (!userID.equals(currentUserID)) {
-              distinctUserNamesSeen++;
-            }
-            currentUserID = userID;
-          } while (distinctUserNamesSeen <= n && resultSet.next());
-        } catch (SQLException sqle) {
-          log.warn("Exception while iterating over users", sqle);
-          close();
-        }
-      }
-    }
-
+  protected Preference buildPreference(ResultSet rs) throws SQLException {
+    return new GenericPreference((Comparable<?>) rs.getObject(1), (Comparable<?>) rs.getObject(2), rs.getFloat(3));
   }
 
   /**
-   * <p>An {@link java.util.Iterator} which returns items from a {@link
-   * java.sql.ResultSet}. This is a useful way to iterate over all user data since it does not require all data to be
+   * <p>An {@link java.util.Iterator} which returns items from a {@link ResultSet}.
+   * This is a useful way to iterate over all user data since it does not require all data to be
    * read into memory at once. It does however require that the DB connection be held open. Note that this class will
    * only release database resources after {@link #hasNext()} has been called and has returned <code>false</code>;
    * callers should make sure to "drain" the entire set of data to avoid tying up database resources.</p>
    */
-  private final class ResultSetItemIterator implements SkippingIterator<Comparable<?>> {
+  private final class ResultSetIDIterator implements SkippingIterator<Comparable<?>> {
 
     private final Connection connection;
     private final Statement statement;
     private final ResultSet resultSet;
     private boolean closed;
 
-    private ResultSetItemIterator(DataSource dataSource, String getItemsSQL) throws TasteException {
+    private ResultSetIDIterator(String sql) throws TasteException {
       try {
         connection = dataSource.getConnection();
         statement = connection.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
         statement.setFetchDirection(ResultSet.FETCH_FORWARD);
         statement.setFetchSize(getFetchSize());
-        log.debug("Executing SQL query: {}", getItemsSQL);
-        resultSet = statement.executeQuery(getItemsSQL);
+        log.debug("Executing SQL query: {}", sql);
+        resultSet = statement.executeQuery(sql);
         boolean anyResults = resultSet.next();
         if (!anyResults) {
           close();
@@ -639,12 +656,12 @@
       }
 
       try {
-        Comparable<?> itemID = (Comparable<?>) resultSet.getObject(1);
+        Comparable<?> ID = (Comparable<?>) resultSet.getObject(1);
         resultSet.next();
-        return itemID;
+        return ID;
       } catch (SQLException sqle) {
         // No good way to handle this since we can't throw an exception
-        log.warn("Exception while iterating over items", sqle);
+        log.warn("Exception while iterating", sqle);
         close();
         throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
       }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java Tue Aug  4 00:06:46 2009
@@ -26,8 +26,8 @@
 import javax.sql.DataSource;
 import java.io.PrintWriter;
 import java.sql.Connection;
-import java.sql.SQLException;
 import java.sql.ResultSet;
+import java.sql.SQLException;
 
 /** <p>A wrapper {@link DataSource} which pools connections.</p> */
 public final class ConnectionPoolDataSource implements DataSource {

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java Tue Aug  4 00:06:46 2009
@@ -38,7 +38,9 @@
 public final class GenericJDBCDataModel extends AbstractJDBCDataModel {
 
   public static final String DATA_SOURCE_KEY = "dataSource";
+  public static final String GET_PREFERENCE_SQL_KEY = "getPreferenceSQL";
   public static final String GET_USER_SQL_KEY = "getUserSQL";
+  public static final String GET_ALL_USERS_SQL_KEY = "getAllUsersSQL";
   public static final String GET_NUM_USERS_SQL_KEY = "getNumUsersSQL";
   public static final String GET_NUM_ITEMS_SQL_KEY = "getNumItemsSQL";
   public static final String SET_PREFERENCE_SQL_KEY = "setPreferenceSQL";
@@ -58,7 +60,9 @@
    */
   public GenericJDBCDataModel(Properties props) throws TasteException {
     super(lookupDataSource(props.getProperty(DATA_SOURCE_KEY)),
+        props.getProperty(GET_PREFERENCE_SQL_KEY),
         props.getProperty(GET_USER_SQL_KEY),
+        props.getProperty(GET_ALL_USERS_SQL_KEY),        
         props.getProperty(GET_NUM_USERS_SQL_KEY),
         props.getProperty(GET_NUM_ITEMS_SQL_KEY),
         props.getProperty(SET_PREFERENCE_SQL_KEY),

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java Tue Aug  4 00:06:46 2009
@@ -97,8 +97,12 @@
         userIDColumn,
         itemIDColumn,
         NO_SUCH_COLUMN,
+        // getPreferenceSQL
+        "SELECT 1 FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " + itemIDColumn + "=?",
         // getUserSQL
-        "SELECT " + itemIDColumn + " FROM " + preferenceTable + " WHERE " + userIDColumn + "=?",
+        "SELECT " + userIDColumn + ", " + itemIDColumn + " FROM " + preferenceTable + " WHERE " + userIDColumn + "=?",
+        // getAllUsersSQL
+        "SELECT " + userIDColumn + ", " + itemIDColumn + " FROM " + preferenceTable + " ORDER BY " + userIDColumn,
         // getNumItemsSQL
         "SELECT COUNT(DISTINCT " + itemIDColumn + ") FROM " + preferenceTable,
         // getNumUsersSQL
@@ -108,11 +112,11 @@
         // removePreference SQL
         "DELETE FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " + itemIDColumn + "=?",
         // getUsersSQL
-        "SELECT " + itemIDColumn + ", " + userIDColumn + " FROM " + preferenceTable + " ORDER BY " + userIDColumn,
+        "SELECT DISTINCT " + userIDColumn + " FROM " + preferenceTable + " ORDER BY " + userIDColumn,
         // getItemsSQL
         "SELECT DISTINCT " + itemIDColumn + " FROM " + preferenceTable + " ORDER BY " + itemIDColumn,
         // getPrefsForItemSQL
-        "SELECT " + userIDColumn + " FROM " +
+        "SELECT " + userIDColumn + ", " + itemIDColumn + " FROM " +
             preferenceTable + " WHERE " + itemIDColumn + "=? ORDER BY " + userIDColumn,
         // getNumPreferenceForItemSQL
         "SELECT COUNT(1) FROM " + preferenceTable + " WHERE " + itemIDColumn + "=?",

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java?rev=800634&r1=800633&r2=800634&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java Tue Aug  4 00:06:46 2009
@@ -38,7 +38,7 @@
  * </table>
  *
  * <p><code>preference</code> must have a type compatible
- * with the Java <code>double</code> type. <code>user_id</code> and <code>item_id</code> should be an integer or
+ * with the Java <code>float</code> type. <code>user_id</code> and <code>item_id</code> should be an integer or
  * string type (INT, LONGINT, VARCHAR). For example, the following command sets up a suitable table in MySQL,
  * complete with primary key and indexes:</p>
  *
@@ -145,9 +145,15 @@
         userIDColumn,
         itemIDColumn,
         preferenceColumn,
+        // getPreferenceSQL
+        "SELECT " + preferenceColumn + " FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " +
+            itemIDColumn + "=?",
         // getUserSQL
-        "SELECT " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable +
+        "SELECT " + userIDColumn + ", " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable +
             " WHERE " + userIDColumn + "=? ORDER BY " + itemIDColumn,
+        // getAllUsersSQL
+        "SELECT " + userIDColumn + ", " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable +
+            " ORDER BY " + userIDColumn + ", " + itemIDColumn,
         // getNumItemsSQL
         "SELECT COUNT(DISTINCT " + itemIDColumn + ") FROM " + preferenceTable,
         // getNumUsersSQL
@@ -158,12 +164,11 @@
         // removePreference SQL
         "DELETE FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " + itemIDColumn + "=?",
         // getUsersSQL
-        "SELECT " + itemIDColumn + ", " + preferenceColumn + ", " + userIDColumn + " FROM " +
-            preferenceTable + " ORDER BY " + userIDColumn + ", " + itemIDColumn,
+        "SELECT DISTINCT " + userIDColumn + " FROM " + preferenceTable + " ORDER BY " + userIDColumn,
         // getItemsSQL
         "SELECT DISTINCT " + itemIDColumn + " FROM " + preferenceTable + " ORDER BY " + itemIDColumn,
         // getPrefsForItemSQL
-        "SELECT " + preferenceColumn + ", " + userIDColumn + " FROM " +
+        "SELECT " + userIDColumn + ", " + itemIDColumn + ", " + preferenceColumn + " FROM " +
             preferenceTable + " WHERE " + itemIDColumn + "=? ORDER BY " + userIDColumn,
         // getNumPreferenceForItemSQL
         "SELECT COUNT(1) FROM " + preferenceTable + " WHERE " + itemIDColumn + "=?",



Mime
View raw message