mahout-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject svn commit: r1207820 - /mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
Date Tue, 29 Nov 2011 10:55:43 GMT
Author: srowen
Date: Tue Nov 29 10:55:42 2011
New Revision: 1207820

URL: http://svn.apache.org/viewvc?rev=1207820&view=rev
Log:
MAHOUT-901 probable fixes to the knn recommender

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java?rev=1207820&r1=1207819&r2=1207820&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/knn/KnnItemBasedRecommender.java
Tue Nov 29 10:55:42 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.impl.recommender.knn;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.mahout.cf.taste.common.TasteException;
@@ -41,6 +42,8 @@ import org.apache.mahout.common.LongPair
  */
 public final class KnnItemBasedRecommender extends GenericItemBasedRecommender {
   
+  private static final double BETA = 500.0;
+
   private final Optimizer optimizer;
   private final int neighborhoodSize;
   
@@ -71,57 +74,109 @@ public final class KnnItemBasedRecommend
     return TopItems.getTopItems(howMany, possibleItemIDs, null, estimator);
   }
   
-  private double[] getInterpolations(long itemID, long userID, long[] itemNeighborhood) throws
TasteException {
+  private double[] getInterpolations(long itemID, 
+                                     long userID, 
+                                     long[] itemNeighborhood, 
+                                     List<Long> usersRatedNeighborhood) throws TasteException
{
+    
+    int length = 0;
+    for (int i = 0; i < itemNeighborhood.length; i++) {
+      if (itemNeighborhood[i] == itemID) {
+        itemNeighborhood[i] = -1;
+        length = itemNeighborhood.length - 1;
+        break;
+      }
+    }
     
-    int k = itemNeighborhood.length;
+    int k = length;
     double[][] aMatrix = new double[k][k];
     double[] b = new double[k];
     int i = 0;
     
     DataModel dataModel = getDataModel();
     
-    int numUsers = getDataModel().getNumUsers();
+    int numUsers = usersRatedNeighborhood.size();
     for (long iitem : itemNeighborhood) {
-      PreferenceArray iPrefs = getDataModel().getPreferencesForItem(iitem);
-      int iSize = iPrefs.length();
+      if (iitem == -1) {
+        break;
+      }
       int j = 0;
+      double value = 0.0;
       for (long jitem : itemNeighborhood) {
-        double value = 0.0;
-        for (int pi = 0; pi < iSize; pi++) {
-          long v = iPrefs.getUserID(pi);
-          if (v == userID) {
-            continue;
-          }
-          Float pj = dataModel.getPreferenceValue(userID, jitem);
-          if (pj != null) {
-            value += iPrefs.getValue(pi) * pj;
-          }
+      if (jitem == -1) {
+        continue;
+      }
+      for (long user : usersRatedNeighborhood) {
+        float prefVJ = dataModel.getPreferenceValue(user, iitem);
+        float prefVK = dataModel.getPreferenceValue(user, jitem);
+          value += prefVJ * prefVK;
         }
-        aMatrix[i][j] = value / numUsers;
+        aMatrix[i][j] = value/numUsers;
         j++;
       }
       i++;
     }
     
-    PreferenceArray iPrefs = getDataModel().getPreferencesForItem(itemID);
-    int iSize = iPrefs.length();
     i = 0;
     for (long jitem : itemNeighborhood) {
+      if (jitem == -1) {
+        break;
+      }
       double value = 0.0;
-      for (int pi = 0; pi < iSize; pi++) {
-        long v = iPrefs.getUserID(pi);
-        if (v == userID) {
-          continue;
-        }
-        Float pj = dataModel.getPreferenceValue(userID, jitem);
-        if (pj != null) {
-          value += iPrefs.getValue(pi) * pj;
-        }
+      for (long user : usersRatedNeighborhood) {
+        float prefVJ = dataModel.getPreferenceValue(user, jitem);
+        float prefVI = dataModel.getPreferenceValue(user, itemID);
+        value += prefVJ * prefVI;
       }
       b[i] = value / numUsers;
       i++;
     }
     
+    // Find the larger diagonal and calculate the average
+    double avgDiagonal = 0.0;
+    if (k > 1) {
+      double diagonalA = 0.0;
+      double diagonalB = 0.0;
+      for (i = 0; i < k; i++) {
+        diagonalA += aMatrix[i][i];
+      }
+      for (i = k - 1; i >= 0; i--) {
+        for (int j = 0; j < k; j++) {
+          diagonalB += aMatrix[i--][j];
+        }
+      }
+      avgDiagonal = Math.max(diagonalA, diagonalB) / k;
+    }
+    // Calculate the average of non-diagonal values
+    double avgMatrixA = 0.0;
+    double avgVectorB = 0.0;
+    for (i = 0; i < k; i++) {
+      for (int j = 0; j < k; j++) {
+        if (i != j || k <= 1) {
+          avgMatrixA += aMatrix[i][j];
+        }
+      }
+      avgVectorB += b[i];
+    }
+    if (k > 1) {
+      avgMatrixA /= k * k - k;
+    }
+    avgVectorB /= k;
+
+    double numUsersPlusBeta = numUsers + BETA;
+    for (i = 0; i < k; i++) {
+      for (int j = 0; j < k; j++) {
+        double average;
+        if (i == j && k > 1) {
+          average = avgDiagonal;
+        } else {
+          average = avgMatrixA;
+        }
+        aMatrix[i][j] = (numUsers * aMatrix[i][j] + BETA * average) / numUsersPlusBeta;
+      }
+      b[i] = (numUsers * b[i] + BETA * avgVectorB) / numUsersPlusBeta;
+    }
+
     return optimizer.optimize(aMatrix, b);
   }
   
@@ -139,13 +194,41 @@ public final class KnnItemBasedRecommend
     
     List<RecommendedItem> mostSimilar = mostSimilarItems(itemID, possibleItemIDs.iterator(),
       neighborhoodSize, null);
-    long[] theNeighborhood = new long[mostSimilar.size()];
+    long[] theNeighborhood = new long[mostSimilar.size() + 1];
+    theNeighborhood[0] = -1;
+  
+    List<Long> usersRatedNeighborhood = new ArrayList<Long>();
     int nOffset = 0;
     for (RecommendedItem rec : mostSimilar) {
       theNeighborhood[nOffset++] = rec.getItemID();
     }
     
-    double[] weights = getInterpolations(itemID, theUserID, theNeighborhood);
+    if (!mostSimilar.isEmpty()) {
+      theNeighborhood[mostSimilar.size()] = itemID;
+      for (int i = 0; i < theNeighborhood.length; i++) {
+        PreferenceArray usersNeighborhood = dataModel.getPreferencesForItem(theNeighborhood[i]);
+        int size1 = usersRatedNeighborhood.isEmpty() ? usersNeighborhood.length() : usersRatedNeighborhood.size();
+        for (int j = 0; j < size1; j++) {
+          if (i == 0) {
+            usersRatedNeighborhood.add(usersNeighborhood.getUserID(j));
+          } else {
+            if (j >= usersRatedNeighborhood.size()) {
+              break;
+            }
+            long index = usersRatedNeighborhood.get(j);
+            if (!usersNeighborhood.hasPrefWithUserID(index) || index == theUserID) {
+              usersRatedNeighborhood.remove(index);
+              j--;
+            }
+          }
+        }
+      }
+    }
+
+    double[] weights = null;
+    if (!mostSimilar.isEmpty()) {
+      weights = getInterpolations(itemID, theUserID, theNeighborhood, usersRatedNeighborhood);
+    }
     
     int i = 0;
     double preference = 0.0;
@@ -155,8 +238,9 @@ public final class KnnItemBasedRecommend
       Float pref = dataModel.getPreferenceValue(theUserID, jitem);
       
       if (pref != null) {
-        preference += pref * weights[i];
-        totalSimilarity += weights[i];
+        double weight = weights[i];
+        preference += pref * weight;
+        totalSimilarity += weight;
       }
       i++;
       



Mime
View raw message