spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dbt...@apache.org
Subject spark git commit: [SPARK-8913] [ML] Simplify LogisticRegression suite to use Vector Vector comparision
Date Fri, 10 Jul 2015 02:08:36 GMT
Repository: spark
Updated Branches:
  refs/heads/master 0e78e40c0 -> 272730466


[SPARK-8913] [ML] Simplify LogisticRegression suite to use Vector Vector comparision

Cleanup tests from SPARK 8700.

Author: Holden Karau <holden@pigscanfly.ca>

Closes #7335 from holdenk/SPARK-8913-cleanup-tests-from-SPARK-8700-logistic-regression-r2-really-logistic-regression-this-time
and squashes the following commits:

e5e2c5f [Holden Karau] Simplify LogisticRegression suite to use Vector <-> Vector comparisions
instead of comparing element by element


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27273046
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27273046
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27273046

Branch: refs/heads/master
Commit: 2727304660663fcf1e41f7b666978c1443262e4e
Parents: 0e78e40
Author: Holden Karau <holden@pigscanfly.ca>
Authored: Thu Jul 9 19:08:33 2015 -0700
Committer: DB Tsai <dbt@netflix.com>
Committed: Thu Jul 9 19:08:33 2015 -0700

----------------------------------------------------------------------
 .../LogisticRegressionSuite.scala               | 135 ++++++-------------
 1 file changed, 39 insertions(+), 96 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/27273046/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 27253c1..b7dd447 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -234,20 +234,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.7996864
      */
     val interceptR = 2.8366423
-    val weightsR = Array(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
+    val weightsR = Vectors.dense(-0.5895848, 0.8931147, -0.3925051, -0.7996864)
 
     assert(model1.intercept ~== interceptR relTol 1E-3)
-    assert(model1.weights(0) ~== weightsR(0) relTol 1E-3)
-    assert(model1.weights(1) ~== weightsR(1) relTol 1E-3)
-    assert(model1.weights(2) ~== weightsR(2) relTol 1E-3)
-    assert(model1.weights(3) ~== weightsR(3) relTol 1E-3)
+    assert(model1.weights ~= weightsR relTol 1E-3)
 
     // Without regularization, with or without standardization will converge to the same
solution.
     assert(model2.intercept ~== interceptR relTol 1E-3)
-    assert(model2.weights(0) ~== weightsR(0) relTol 1E-3)
-    assert(model2.weights(1) ~== weightsR(1) relTol 1E-3)
-    assert(model2.weights(2) ~== weightsR(2) relTol 1E-3)
-    assert(model2.weights(3) ~== weightsR(3) relTol 1E-3)
+    assert(model2.weights ~= weightsR relTol 1E-3)
   }
 
   test("binary logistic regression without intercept without regularization") {
@@ -277,20 +271,14 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.7407946
      */
     val interceptR = 0.0
-    val weightsR = Array(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
+    val weightsR = Vectors.dense(-0.3534996, 1.2964482, -0.3571741, -0.7407946)
 
     assert(model1.intercept ~== interceptR relTol 1E-3)
-    assert(model1.weights(0) ~== weightsR(0) relTol 1E-2)
-    assert(model1.weights(1) ~== weightsR(1) relTol 1E-2)
-    assert(model1.weights(2) ~== weightsR(2) relTol 1E-3)
-    assert(model1.weights(3) ~== weightsR(3) relTol 1E-3)
+    assert(model1.weights ~= weightsR relTol 1E-2)
 
     // Without regularization, with or without standardization should converge to the same
solution.
     assert(model2.intercept ~== interceptR relTol 1E-3)
-    assert(model2.weights(0) ~== weightsR(0) relTol 1E-2)
-    assert(model2.weights(1) ~== weightsR(1) relTol 1E-2)
-    assert(model2.weights(2) ~== weightsR(2) relTol 1E-3)
-    assert(model2.weights(3) ~== weightsR(3) relTol 1E-3)
+    assert(model2.weights ~= weightsR relTol 1E-2)
   }
 
   test("binary logistic regression with intercept with L1 regularization") {
@@ -321,13 +309,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.02481551
      */
     val interceptR1 = -0.05627428
-    val weightsR1 = Array(0.0, 0.0, -0.04325749, -0.02481551)
+    val weightsR1 = Vectors.dense(0.0, 0.0, -0.04325749, -0.02481551)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-2)
-    assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3)
-    assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
-    assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2)
-    assert(model1.weights(3) ~== weightsR1(3) relTol 2E-2)
+    assert(model1.weights ~= weightsR1 absTol 2E-2)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -349,13 +334,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5       .
      */
     val interceptR2 = 0.3722152
-    val weightsR2 = Array(0.0, 0.0, -0.1665453, 0.0)
+    val weightsR2 = Vectors.dense(0.0, 0.0, -0.1665453, 0.0)
 
     assert(model2.intercept ~== interceptR2 relTol 1E-2)
-    assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
-    assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
-    assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
-    assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
+    assert(model2.weights ~= weightsR2 absTol 1E-3)
   }
 
   test("binary logistic regression without intercept with L1 regularization") {
@@ -387,13 +369,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.03891782
      */
     val interceptR1 = 0.0
-    val weightsR1 = Array(0.0, 0.0, -0.05189203, -0.03891782)
+    val weightsR1 = Vectors.dense(0.0, 0.0, -0.05189203, -0.03891782)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights(0) ~== weightsR1(0) absTol 1E-3)
-    assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
-    assert(model1.weights(2) ~== weightsR1(2) relTol 1E-2)
-    assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2)
+    assert(model1.weights ~= weightsR1 absTol 1E-3)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -415,13 +394,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5       .
      */
     val interceptR2 = 0.0
-    val weightsR2 = Array(0.0, 0.0, -0.08420782, 0.0)
+    val weightsR2 = Vectors.dense(0.0, 0.0, -0.08420782, 0.0)
 
-    assert(model2.intercept ~== interceptR2 relTol 1E-3)
-    assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
-    assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
-    assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
-    assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
+    assert(model2.intercept ~== interceptR2 absTol 1E-3)
+    assert(model2.weights ~= weightsR2 absTol 1E-3)
   }
 
   test("binary logistic regression with intercept with L2 regularization") {
@@ -452,13 +428,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.10062872
      */
     val interceptR1 = 0.15021751
-    val weightsR1 = Array(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
+    val weightsR1 = Vectors.dense(-0.07251837, 0.10724191, -0.04865309, -0.10062872)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights(0) ~== weightsR1(0) relTol 1E-3)
-    assert(model1.weights(1) ~== weightsR1(1) relTol 1E-3)
-    assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
-    assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
+    assert(model1.weights ~= weightsR1 relTol 1E-3)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -480,13 +453,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.06266838
      */
     val interceptR2 = 0.48657516
-    val weightsR2 = Array(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
+    val weightsR2 = Vectors.dense(-0.05155371, 0.02301057, -0.11482896, -0.06266838)
 
     assert(model2.intercept ~== interceptR2 relTol 1E-3)
-    assert(model2.weights(0) ~== weightsR2(0) relTol 1E-3)
-    assert(model2.weights(1) ~== weightsR2(1) relTol 1E-3)
-    assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3)
-    assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3)
+    assert(model2.weights ~= weightsR2 relTol 1E-3)
   }
 
   test("binary logistic regression without intercept with L2 regularization") {
@@ -518,13 +488,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.09799775
      */
     val interceptR1 = 0.0
-    val weightsR1 = Array(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
+    val weightsR1 = Vectors.dense(-0.06099165, 0.12857058, -0.04708770, -0.09799775)
 
-    assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights(0) ~== weightsR1(0) relTol 1E-2)
-    assert(model1.weights(1) ~== weightsR1(1) relTol 1E-2)
-    assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
-    assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
+    assert(model1.intercept ~== interceptR1 absTol 1E-3)
+    assert(model1.weights ~= weightsR1 relTol 1E-2)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -546,13 +513,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.053314311
      */
     val interceptR2 = 0.0
-    val weightsR2 = Array(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
+    val weightsR2 = Vectors.dense(-0.005679651, 0.048967094, -0.093714016, -0.053314311)
 
-    assert(model2.intercept ~== interceptR2 relTol 1E-3)
-    assert(model2.weights(0) ~== weightsR2(0) relTol 1E-2)
-    assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2)
-    assert(model2.weights(2) ~== weightsR2(2) relTol 1E-3)
-    assert(model2.weights(3) ~== weightsR2(3) relTol 1E-3)
+    assert(model2.intercept ~== interceptR2 absTol 1E-3)
+    assert(model2.weights ~= weightsR2 relTol 1E-2)
   }
 
   test("binary logistic regression with intercept with ElasticNet regularization") {
@@ -583,13 +547,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.15458796
      */
     val interceptR1 = 0.57734851
-    val weightsR1 = Array(-0.05310287, 0.0, -0.08849250, -0.15458796)
+    val weightsR1 = Vectors.dense(-0.05310287, 0.0, -0.08849250, -0.15458796)
 
     assert(model1.intercept ~== interceptR1 relTol 6E-3)
-    assert(model1.weights(0) ~== weightsR1(0) relTol 5E-3)
-    assert(model1.weights(1) ~== weightsR1(1) absTol 1E-3)
-    assert(model1.weights(2) ~== weightsR1(2) relTol 5E-3)
-    assert(model1.weights(3) ~== weightsR1(3) relTol 1E-3)
+    assert(model1.weights ~== weightsR1 absTol 5E-3)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -611,13 +572,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.05350074
      */
     val interceptR2 = 0.51555993
-    val weightsR2 = Array(0.0, 0.0, -0.18807395, -0.05350074)
+    val weightsR2 = Vectors.dense(0.0, 0.0, -0.18807395, -0.05350074)
 
     assert(model2.intercept ~== interceptR2 relTol 6E-3)
-    assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
-    assert(model2.weights(1) ~== weightsR2(1) absTol 1E-3)
-    assert(model2.weights(2) ~== weightsR2(2) relTol 5E-3)
-    assert(model2.weights(3) ~== weightsR2(3) relTol 1E-2)
+    assert(model2.weights ~= weightsR2 absTol 1E-3)
   }
 
   test("binary logistic regression without intercept with ElasticNet regularization") {
@@ -649,13 +607,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5     -0.142534158
      */
     val interceptR1 = 0.0
-    val weightsR1 = Array(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
+    val weightsR1 = Vectors.dense(-0.001005743, 0.072577857, -0.081203769, -0.142534158)
 
     assert(model1.intercept ~== interceptR1 relTol 1E-3)
-    assert(model1.weights(0) ~== weightsR1(0) absTol 1E-2)
-    assert(model1.weights(1) ~== weightsR1(1) absTol 1E-2)
-    assert(model1.weights(2) ~== weightsR1(2) relTol 1E-3)
-    assert(model1.weights(3) ~== weightsR1(3) relTol 1E-2)
+    assert(model1.weights ~= weightsR1 absTol 1E-2)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -677,13 +632,10 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5       .
      */
     val interceptR2 = 0.0
-    val weightsR2 = Array(0.0, 0.03345223, -0.11304532, 0.0)
+    val weightsR2 = Vectors.dense(0.0, 0.03345223, -0.11304532, 0.0)
 
-    assert(model2.intercept ~== interceptR2 relTol 1E-3)
-    assert(model2.weights(0) ~== weightsR2(0) absTol 1E-3)
-    assert(model2.weights(1) ~== weightsR2(1) relTol 1E-2)
-    assert(model2.weights(2) ~== weightsR2(2) relTol 1E-2)
-    assert(model2.weights(3) ~== weightsR2(3) absTol 1E-3)
+    assert(model2.intercept ~== interceptR2 absTol 1E-3)
+    assert(model2.weights ~= weightsR2 absTol 1E-3)
   }
 
   test("binary logistic regression with intercept with strong L1 regularization") {
@@ -717,19 +669,13 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        }}}
      */
     val interceptTheory = math.log(histogram(1).toDouble / histogram(0).toDouble)
-    val weightsTheory = Array(0.0, 0.0, 0.0, 0.0)
+    val weightsTheory = Vectors.dense(0.0, 0.0, 0.0, 0.0)
 
     assert(model1.intercept ~== interceptTheory relTol 1E-5)
-    assert(model1.weights(0) ~== weightsTheory(0) absTol 1E-6)
-    assert(model1.weights(1) ~== weightsTheory(1) absTol 1E-6)
-    assert(model1.weights(2) ~== weightsTheory(2) absTol 1E-6)
-    assert(model1.weights(3) ~== weightsTheory(3) absTol 1E-6)
+    assert(model1.weights ~= weightsTheory absTol 1E-6)
 
     assert(model2.intercept ~== interceptTheory relTol 1E-5)
-    assert(model2.weights(0) ~== weightsTheory(0) absTol 1E-6)
-    assert(model2.weights(1) ~== weightsTheory(1) absTol 1E-6)
-    assert(model2.weights(2) ~== weightsTheory(2) absTol 1E-6)
-    assert(model2.weights(3) ~== weightsTheory(3) absTol 1E-6)
+    assert(model2.weights ~= weightsTheory absTol 1E-6)
 
     /*
        Using the following R code to load the data and train the model using glmnet package.
@@ -750,12 +696,9 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext
{
        data.V5       .
      */
     val interceptR = -0.248065
-    val weightsR = Array(0.0, 0.0, 0.0, 0.0)
+    val weightsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
 
     assert(model1.intercept ~== interceptR relTol 1E-5)
-    assert(model1.weights(0) ~== weightsR(0) absTol 1E-6)
-    assert(model1.weights(1) ~== weightsR(1) absTol 1E-6)
-    assert(model1.weights(2) ~== weightsR(2) absTol 1E-6)
-    assert(model1.weights(3) ~== weightsR(3) absTol 1E-6)
+    assert(model1.weights ~= weightsR absTol 1E-6)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message