spark-reviews mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From srowen <...@git.apache.org>
Subject [GitHub] spark pull request #20362: [Spark-22886][ML][TESTS] ML test for structured s...
Date Thu, 22 Feb 2018 18:15:49 GMT
Github user srowen commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20362#discussion_r170046857
  
    --- Diff: mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala ---
    @@ -413,34 +411,36 @@ class ALSSuite
           .setSeed(0)
         val alpha = als.getAlpha
         val model = als.fit(training.toDF())
    -    val predictions = model.transform(test.toDF()).select("rating", "prediction").rdd.map
{
    -      case Row(rating: Float, prediction: Float) =>
    -        (rating.toDouble, prediction.toDouble)
    +    testTransformerByGlobalCheckFunc[Rating[Int]](test.toDF(), model, "rating", "prediction")
{
    +        case rows: Seq[Row] =>
    +          val predictions = rows.map(row => (row.getFloat(0).toDouble, row.getFloat(1).toDouble))
    +
    +          val rmse =
    +            if (implicitPrefs) {
    +              // TODO: Use a better (rank-based?) evaluation metric for implicit feedback.
    +              // We limit the ratings and the predictions to interval [0, 1] and compute
the
    +              // weighted RMSE with the confidence scores as weights.
    +              val (totalWeight, weightedSumSq) = predictions.map { case (rating, prediction)
=>
    +                val confidence = 1.0 + alpha * math.abs(rating)
    +                val rating01 = math.max(math.min(rating, 1.0), 0.0)
    +                val prediction01 = math.max(math.min(prediction, 1.0), 0.0)
    +                val err = prediction01 - rating01
    +                (confidence, confidence * err * err)
    +              }.reduce[(Double, Double)] { case ((c0, e0), (c1, e1)) =>
    +                (c0 + c1, e0 + e1)
    +              }
    +              math.sqrt(weightedSumSq / totalWeight)
    +            } else {
    +              val errorSquares = predictions.map { case (rating, prediction) =>
    +                val err = rating - prediction
    +                err * err
    +              }
    +              val mse = errorSquares.sum / errorSquares.length
    +              math.sqrt(mse)
    +            }
    +          logInfo(s"Test RMSE is $rmse.")
    +          assert(rmse < targetRMSE)
         }
    -    val rmse =
    --- End diff --
    
    This change is just a move, really? or did something else change as well?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


Mime
View raw message