Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 9DEA7200B38 for ; Fri, 8 Jul 2016 15:24:14 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 9C90A160A77; Fri, 8 Jul 2016 13:24:14 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E6FB6160A5A for ; Fri, 8 Jul 2016 15:24:13 +0200 (CEST) Received: (qmail 52991 invoked by uid 500); 8 Jul 2016 13:24:12 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 52982 invoked by uid 99); 8 Jul 2016 13:24:12 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 08 Jul 2016 13:24:12 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A5BA8DFFF8; Fri, 8 Jul 2016 13:24:12 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: srowen@apache.org To: commits@spark.apache.org Message-Id: <049e9807c96e42868f6183a1346230b2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-16369][MLLIB] tallSkinnyQR of RowMatrix should aware of empty partition Date: Fri, 8 Jul 2016 13:24:12 +0000 (UTC) archived-at: Fri, 08 Jul 2016 13:24:14 -0000 Repository: spark Updated Branches: refs/heads/branch-2.0 221a4a7fb -> 8c8180605 [SPARK-16369][MLLIB] tallSkinnyQR of RowMatrix should aware of empty partition ## What changes were proposed in this pull request? tallSkinnyQR of RowMatrix should aware of empty partition, which could cause exception from Breeze qr decomposition. See the [archived dev mail](https://mail-archives.apache.org/mod_mbox/spark-dev/201510.mbox/%3CCAF7ADNrycvPL3qX-VZJhq4OYmiUUhoscut_tkOm63Cm18iK1tQmail.gmail.com%3E) for more details. ## How was this patch tested? Scala unit test. Author: Xusen Yin Closes #14049 from yinxusen/SPARK-16369. (cherry picked from commit 255d74fe4a0db2cc842177ec735bbde07c7c8732) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c818060 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c818060 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c818060 Branch: refs/heads/branch-2.0 Commit: 8c8180605b0faa1f6223af9c4ffbbdb1c81486c4 Parents: 221a4a7 Author: Xusen Yin Authored: Fri Jul 8 14:23:57 2016 +0100 Committer: Sean Owen Committed: Fri Jul 8 14:24:07 2016 +0100 ---------------------------------------------------------------------- .../spark/mllib/linalg/distributed/RowMatrix.scala | 5 +++-- .../mllib/linalg/distributed/RowMatrixSuite.scala | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/8c818060/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 1c94479..ec32e37 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") ( def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = { val col = numCols().toInt // split rows horizontally into smaller matrices, and compute QR for each of them - val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows => + val blockQRs = rows.retag(classOf[Vector]).glom().filter(_.length != 0).map { partRows => val bdm = BDM.zeros[Double](partRows.length, col) var i = 0 partRows.foreach { row => @@ -548,10 +548,11 @@ class RowMatrix @Since("1.0.0") ( } // combine the R part from previous results vertically into a tall matrix - val combinedR = blockQRs.treeReduce{ (r1, r2) => + val combinedR = blockQRs.treeReduce { (r1, r2) => val stackedR = BDM.vertcat(r1, r2) breeze.linalg.qr.reduced(stackedR).r } + val finalR = Matrices.fromBreeze(combinedR.toDenseMatrix) val finalQ = if (computeQ) { try { http://git-wip-us.apache.org/repos/asf/spark/blob/8c818060/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala index 7c4c6d8..7c9e14f 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala @@ -28,6 +28,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors} import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext} +import org.apache.spark.mllib.util.TestingUtils._ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { @@ -281,6 +282,22 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext { assert(cov(i, j) === cov(j, i)) } } + + test("QR decomposition should aware of empty partition (SPARK-16369)") { + val mat: RowMatrix = new RowMatrix(sc.parallelize(denseData, 1)) + val qrResult = mat.tallSkinnyQR(true) + + val matWithEmptyPartition = new RowMatrix(sc.parallelize(denseData, 8)) + val qrResult2 = matWithEmptyPartition.tallSkinnyQR(true) + + assert(qrResult.Q.numCols() === qrResult2.Q.numCols(), "Q matrix ncol not match") + assert(qrResult.Q.numRows() === qrResult2.Q.numRows(), "Q matrix nrow not match") + qrResult.Q.rows.collect().zip(qrResult2.Q.rows.collect()) + .foreach(x => assert(x._1 ~== x._2 relTol 1E-8, "Q matrix not match")) + + qrResult.R.toArray.zip(qrResult2.R.toArray) + .foreach(x => assert(x._1 ~== x._2 relTol 1E-8, "R matrix not match")) + } } class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org