spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pwend...@apache.org
Subject [1/9] git commit: Added a method to enable bulk prediction
Date Wed, 08 Jan 2014 00:57:19 GMT
Updated Branches:
  refs/heads/master 6ccf8ce70 -> b2e690f83


Added a method to enable bulk prediction


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/67f937ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/67f937ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/67f937ec

Branch: refs/heads/master
Commit: 67f937ec222c5a7db5286c0af0ec6f9c482d2af6
Parents: 0475ca8
Author: Hossein Falaki <falaki@gmail.com>
Authored: Fri Jan 3 15:34:16 2014 -0800
Committer: Hossein Falaki <falaki@gmail.com>
Committed: Fri Jan 3 15:34:16 2014 -0800

----------------------------------------------------------------------
 .../MatrixFactorizationModel.scala              | 24 +++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/67f937ec/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index af43d89..bc13a66 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -20,7 +20,9 @@ package org.apache.spark.mllib.recommendation
 import org.apache.spark.rdd.RDD
 import org.apache.spark.SparkContext._
 
+
 import org.jblas._
+import java.nio.{ByteOrder, ByteBuffer}
 
 /**
  * Model representing the result of matrix factorization.
@@ -44,6 +46,26 @@ class MatrixFactorizationModel(
     userVector.dot(productVector)
   }
 
-  // TODO: Figure out what good bulk prediction methods would look like.
+  /**
+    * Predict the rating of many users for many products.
+    * The output RDD has an element per each element in the input RDD (including all duplicates)
+    * unless a user or product is missing in the training set.
+    *
+    * @param usersProducts  RDD of (user, product) pairs.
+    * @return RDD of Ratings.
+    */
+  def predict(usersProducts: RDD[(Int, Int)]): RDD[Rating] = {
+    val users = userFeatures.join(usersProducts).map{
+      case (user, (uFeatures, product)) => (product, (user, uFeatures))
+    }
+    users.join(productFeatures).map {
+      case (product, ((user, uFeatures), pFeatures)) =>
+        val userVector = new DoubleMatrix(uFeatures)
+        val productVector = new DoubleMatrix(pFeatures)
+        Rating(user, product, userVector.dot(productVector))
+    }
+  }
+
+  // TODO: Figure out what other good bulk prediction methods would look like.
   // Probably want a way to get the top users for a product or vice-versa.
 }


Mime
View raw message