Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 9C294200B41 for ; Thu, 7 Jul 2016 12:28:10 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 9AB17160A68; Thu, 7 Jul 2016 10:28:10 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E2C3F160A59 for ; Thu, 7 Jul 2016 12:28:09 +0200 (CEST) Received: (qmail 90452 invoked by uid 500); 7 Jul 2016 10:28:09 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 90443 invoked by uid 99); 7 Jul 2016 10:28:09 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 07 Jul 2016 10:28:09 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id B02DBE03CE; Thu, 7 Jul 2016 10:28:08 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: srowen@apache.org To: commits@spark.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix Date: Thu, 7 Jul 2016 10:28:08 +0000 (UTC) archived-at: Thu, 07 Jul 2016 10:28:10 -0000 Repository: spark Updated Branches: refs/heads/master 986b25140 -> 4c6f00d09 [SPARK-16372][MLLIB] Retag RDD to tallSkinnyQR of RowMatrix ## What changes were proposed in this pull request? The following Java code because of type erasing: ```Java JavaRDD rows = jsc.parallelize(...); RowMatrix mat = new RowMatrix(rows.rdd()); QRDecomposition result = mat.tallSkinnyQR(true); ``` We should use retag to restore the type to prevent the following exception: ```Java java.lang.ClassCastException: [Ljava.lang.Object; cannot be cast to [Lorg.apache.spark.mllib.linalg.Vector; ``` ## How was this patch tested? Java unit test Author: Xusen Yin Closes #14051 from yinxusen/SPARK-16372. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c6f00d0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c6f00d0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c6f00d0 Branch: refs/heads/master Commit: 4c6f00d09c016dfc1d2de6e694dff219c9027fa0 Parents: 986b251 Author: Xusen Yin Authored: Thu Jul 7 11:28:04 2016 +0100 Committer: Sean Owen Committed: Thu Jul 7 11:28:04 2016 +0100 ---------------------------------------------------------------------- .../spark/mllib/api/python/PythonMLLibAPI.scala | 2 +- .../mllib/linalg/distributed/RowMatrix.scala | 2 +- .../linalg/distributed/JavaRowMatrixSuite.java | 44 ++++++++++++++++++++ 3 files changed, 46 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/4c6f00d0/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index f4819f7..a80cca7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -1127,7 +1127,7 @@ private[python] class PythonMLLibAPI extends Serializable { * Wrapper around RowMatrix constructor. */ def createRowMatrix(rows: JavaRDD[Vector], numRows: Long, numCols: Int): RowMatrix = { - new RowMatrix(rows.rdd.retag(classOf[Vector]), numRows, numCols) + new RowMatrix(rows.rdd, numRows, numCols) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/4c6f00d0/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index cd5209d..1c94479 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -537,7 +537,7 @@ class RowMatrix @Since("1.0.0") ( def tallSkinnyQR(computeQ: Boolean = false): QRDecomposition[RowMatrix, Matrix] = { val col = numCols().toInt // split rows horizontally into smaller matrices, and compute QR for each of them - val blockQRs = rows.glom().map { partRows => + val blockQRs = rows.retag(classOf[Vector]).glom().map { partRows => val bdm = BDM.zeros[Double](partRows.length, col) var i = 0 partRows.foreach { row => http://git-wip-us.apache.org/repos/asf/spark/blob/4c6f00d0/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java ---------------------------------------------------------------------- diff --git a/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java new file mode 100644 index 0000000..c01af40 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/mllib/linalg/distributed/JavaRowMatrixSuite.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.linalg.distributed; + +import java.util.Arrays; + +import org.junit.Test; + +import org.apache.spark.SharedSparkSession; +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.mllib.linalg.Matrix; +import org.apache.spark.mllib.linalg.QRDecomposition; +import org.apache.spark.mllib.linalg.Vector; +import org.apache.spark.mllib.linalg.Vectors; + +public class JavaRowMatrixSuite extends SharedSparkSession { + + @Test + public void rowMatrixQRDecomposition() { + Vector v1 = Vectors.dense(1.0, 10.0, 100.0); + Vector v2 = Vectors.dense(2.0, 20.0, 200.0); + Vector v3 = Vectors.dense(3.0, 30.0, 300.0); + + JavaRDD rows = jsc.parallelize(Arrays.asList(v1, v2, v3), 1); + RowMatrix mat = new RowMatrix(rows.rdd()); + + QRDecomposition result = mat.tallSkinnyQR(true); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org