spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sro...@apache.org
Subject spark git commit: [SPARK-16369][MLLIB] tallSkinnyQR of RowMatrix should aware of empty partition
Date Fri, 08 Jul 2016 13:24:12 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 221a4a7fb -> 8c8180605


[SPARK-16369][MLLIB] tallSkinnyQR of RowMatrix should aware of empty partition

## What changes were proposed in this pull request?

tallSkinnyQR of RowMatrix should aware of empty partition, which could cause exception from
Breeze qr decomposition.

See the [archived dev mail](https://mail-archives.apache.org/mod_mbox/spark-dev/201510.mbox/%3CCAF7ADNrycvPL3qX-VZJhq4OYmiUUhoscut_tkOm63Cm18iK1tQmail.gmail.com%3E)
for more details.

## How was this patch tested?

Scala unit test.

Author: Xusen Yin <yinxusen@gmail.com>

Closes #14049 from yinxusen/SPARK-16369.

(cherry picked from commit 255d74fe4a0db2cc842177ec735bbde07c7c8732)
Signed-off-by: Sean Owen <sowen@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c818060
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c818060
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c818060

Branch: refs/heads/branch-2.0
Commit: 8c8180605b0faa1f6223af9c4ffbbdb1c81486c4
Parents: 221a4a7
Author: Xusen Yin <yinxusen@gmail.com>
Authored: Fri Jul 8 14:23:57 2016 +0100
Committer: Sean Owen <sowen@cloudera.com>
Committed: Fri Jul 8 14:24:07 2016 +0100

----------------------------------------------------------------------
 .../spark/mllib/linalg/distributed/RowMatrix.scala |  5 +++--
 .../mllib/linalg/distributed/RowMatrixSuite.scala  | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/8c818060/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 1c94479..ec32e37 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") (
   def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = {
     val col = numCols().toInt
     // split rows horizontally into smaller matrices, and compute QR for each of them
-    val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows =>
+    val blockQRs = rows.retag(classOf[Vector]).glom().filter(_.length != 0).map { partRows
=>
       val bdm = BDM.zeros[Double](partRows.length, col)
       var i = 0
       partRows.foreach { row =>
@@ -548,10 +548,11 @@ class RowMatrix @Since("1.0.0") (
     }
 
     // combine the R part from previous results vertically into a tall matrix
-    val combinedR = blockQRs.treeReduce{ (r1, r2) =>
+    val combinedR = blockQRs.treeReduce { (r1, r2) =>
       val stackedR = BDM.vertcat(r1, r2)
       breeze.linalg.qr.reduced(stackedR).r
     }
+
     val finalR = Matrices.fromBreeze(combinedR.toDenseMatrix)
     val finalQ = if (computeQ) {
       try {

http://git-wip-us.apache.org/repos/asf/spark/blob/8c818060/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 7c4c6d8..7c9e14f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors}
 import org.apache.spark.mllib.random.RandomRDDs
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
+import org.apache.spark.mllib.util.TestingUtils._
 
 class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
 
@@ -281,6 +282,22 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext
{
       assert(cov(i, j) === cov(j, i))
     }
   }
+
+  test("QR decomposition should aware of empty partition (SPARK-16369)") {
+    val mat: RowMatrix = new RowMatrix(sc.parallelize(denseData, 1))
+    val qrResult = mat.tallSkinnyQR(true)
+
+    val matWithEmptyPartition = new RowMatrix(sc.parallelize(denseData, 8))
+    val qrResult2 = matWithEmptyPartition.tallSkinnyQR(true)
+
+    assert(qrResult.Q.numCols() === qrResult2.Q.numCols(), "Q matrix ncol not match")
+    assert(qrResult.Q.numRows() === qrResult2.Q.numRows(), "Q matrix nrow not match")
+    qrResult.Q.rows.collect().zip(qrResult2.Q.rows.collect())
+      .foreach(x => assert(x._1 ~== x._2 relTol 1E-8, "Q matrix not match"))
+
+    qrResult.R.toArray.zip(qrResult2.R.toArray)
+      .foreach(x => assert(x._1 ~== x._2 relTol 1E-8, "R matrix not match"))
+  }
 }
 
 class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message