spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jkbrad...@apache.org
Subject spark git commit: [SPARK-7452] [MLLIB] fix bug in topBykey and update test
Date Fri, 08 May 2015 03:55:25 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-1.4 05454fd8a -> 28d423870


[SPARK-7452] [MLLIB] fix bug in topBykey and update test

the toArray function of the BoundedPriorityQueue does not necessarily preserve order. Add
a counter-example as the test, which would fail the original impl.

Author: Shuo Xiang <shuoxiangpub@gmail.com>

Closes #5990 from coderxiang/topbykey-test and squashes the following commits:

98804c9 [Shuo Xiang] fix bug in topBykey and update test

(cherry picked from commit 92f8f803a68e0c16771e9793098c6d76dfdf99af)
Signed-off-by: Joseph K. Bradley <joseph@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28d42387
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28d42387
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28d42387

Branch: refs/heads/branch-1.4
Commit: 28d423870869b968054289625a869948022c8aff
Parents: 05454fd
Author: Shuo Xiang <shuoxiangpub@gmail.com>
Authored: Thu May 7 20:55:08 2015 -0700
Committer: Joseph K. Bradley <joseph@databricks.com>
Committed: Thu May 7 20:55:19 2015 -0700

----------------------------------------------------------------------
 .../org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala     | 2 +-
 .../apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala    | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/28d42387/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 5af55aa..1b93e2d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -46,7 +46,7 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends
Se
       combOp = (queue1, queue2) => {
         queue1 ++= queue2
       }
-    ).mapValues(_.toArray.reverse)  // This is an min-heap, so we reverse the order.
+    ).mapValues(_.toArray.sorted(ord.reverse))  // This is an min-heap, so we reverse the
order.
   }
 }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/28d42387/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
index cb8fe4d..57216e8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctionsSuite.scala
@@ -24,13 +24,14 @@ import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
 
 class MLPairRDDFunctionsSuite extends FunSuite with MLlibTestSparkContext {
   test("topByKey") {
-    val topMap = sc.parallelize(Array((1, 1), (1, 2), (3, 2), (3, 7), (5, 1), (3, 5)), 2)
-      .topByKey(2)
+    val topMap = sc.parallelize(Array((1, 7), (1, 3), (1, 6), (1, 1), (1, 2), (3, 2), (3,
7), (5,
+      1), (3, 5)), 2)
+      .topByKey(5)
       .collectAsMap()
 
     assert(topMap.size === 3)
-    assert(topMap(1) === Array(2, 1))
-    assert(topMap(3) === Array(7, 5))
+    assert(topMap(1) === Array(7, 6, 3, 2, 1))
+    assert(topMap(3) === Array(7, 5, 2))
     assert(topMap(5) === Array(1))
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message