spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jkbrad...@apache.org
Subject spark git commit: [SPARK-9454] Change LDASuite tests to use vector comparisons
Date Thu, 30 Jul 2015 21:09:03 GMT
Repository: spark
Updated Branches:
  refs/heads/master 1abf7dc16 -> 89cda69ec


[SPARK-9454] Change LDASuite tests to use vector comparisons

jkbradley Changes the current hacky string-comparison for vector compares.

Author: Feynman Liang <fliang@databricks.com>

Closes #7775 from feynmanliang/SPARK-9454-ldasuite-vector-compare and squashes the following
commits:

bd91a82 [Feynman Liang] Remove println
905c76e [Feynman Liang] Fix string compare in distributed EM
2f24c13 [Feynman Liang] Improve LDASuite tests


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/89cda69e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/89cda69e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/89cda69e

Branch: refs/heads/master
Commit: 89cda69ecd5ef942a68ad13fc4e1f4184010f087
Parents: 1abf7dc
Author: Feynman Liang <fliang@databricks.com>
Authored: Thu Jul 30 14:08:59 2015 -0700
Committer: Joseph K. Bradley <joseph@databricks.com>
Committed: Thu Jul 30 14:08:59 2015 -0700

----------------------------------------------------------------------
 .../spark/mllib/clustering/LDASuite.scala       | 33 +++++++++-----------
 1 file changed, 14 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/89cda69e/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index d74482d..c43e1e5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -83,21 +83,14 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(model.topicsMatrix === localModel.topicsMatrix)
 
     // Check: topic summaries
-    //  The odd decimal formatting and sorting is a hack to do a robust comparison.
-    val roundedTopicSummary = model.describeTopics().map { case (terms, termWeights) =>
-      // cut values to 3 digits after the decimal place
-      terms.zip(termWeights).map { case (term, weight) =>
-        ("%.3f".format(weight).toDouble, term.toInt)
-      }
-    }.sortBy(_.mkString(""))
-    val roundedLocalTopicSummary = localModel.describeTopics().map { case (terms, termWeights)
=>
-      // cut values to 3 digits after the decimal place
-      terms.zip(termWeights).map { case (term, weight) =>
-        ("%.3f".format(weight).toDouble, term.toInt)
-      }
-    }.sortBy(_.mkString(""))
-    roundedTopicSummary.zip(roundedLocalTopicSummary).foreach { case (t1, t2) =>
-      assert(t1 === t2)
+    val topicSummary = model.describeTopics().map { case (terms, termWeights) =>
+      Vectors.sparse(tinyVocabSize, terms, termWeights)
+    }.sortBy(_.toString)
+    val localTopicSummary = localModel.describeTopics().map { case (terms, termWeights) =>
+      Vectors.sparse(tinyVocabSize, terms, termWeights)
+    }.sortBy(_.toString)
+    topicSummary.zip(localTopicSummary).foreach { case (topics, topicsLocal) =>
+      assert(topics ~== topicsLocal absTol 0.01)
     }
 
     // Check: per-doc topic distributions
@@ -197,10 +190,12 @@ class LDASuite extends SparkFunSuite with MLlibTestSparkContext {
 
     // verify the result, Note this generate the identical result as
     // [[https://github.com/Blei-Lab/onlineldavb]]
-    val topic1 = op.getLambda(0, ::).inner.toArray.map("%.4f".format(_)).mkString(", ")
-    val topic2 = op.getLambda(1, ::).inner.toArray.map("%.4f".format(_)).mkString(", ")
-    assert("1.1101, 1.2076, 1.3050, 0.8899, 0.7924, 0.6950" == topic1)
-    assert("0.8899, 0.7924, 0.6950, 1.1101, 1.2076, 1.3050" == topic2)
+    val topic1: Vector = Vectors.fromBreeze(op.getLambda(0, ::).t)
+    val topic2: Vector = Vectors.fromBreeze(op.getLambda(1, ::).t)
+    val expectedTopic1 = Vectors.dense(1.1101, 1.2076, 1.3050, 0.8899, 0.7924, 0.6950)
+    val expectedTopic2 = Vectors.dense(0.8899, 0.7924, 0.6950, 1.1101, 1.2076, 1.3050)
+    assert(topic1 ~== expectedTopic1 absTol 0.01)
+    assert(topic2 ~== expectedTopic2 absTol 0.01)
   }
 
   test("OnlineLDAOptimizer with toy data") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message