Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 9D563200B3C for ; Wed, 29 Jun 2016 04:53:13 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 9BD96160A6C; Wed, 29 Jun 2016 02:53:13 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E39AA160A56 for ; Wed, 29 Jun 2016 04:53:12 +0200 (CEST) Received: (qmail 24260 invoked by uid 500); 29 Jun 2016 02:53:12 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 24250 invoked by uid 99); 29 Jun 2016 02:53:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 29 Jun 2016 02:53:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A1701E08FE; Wed, 29 Jun 2016 02:53:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: meng@apache.org To: commits@spark.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-16245][ML] model loading backward compatibility for ml.feature.PCA Date: Wed, 29 Jun 2016 02:53:11 +0000 (UTC) archived-at: Wed, 29 Jun 2016 02:53:13 -0000 Repository: spark Updated Branches: refs/heads/master 363bcedee -> 0df5ce1bc [SPARK-16245][ML] model loading backward compatibility for ml.feature.PCA ## What changes were proposed in this pull request? model loading backward compatibility for ml.feature.PCA. ## How was this patch tested? existing ut and manual test for loading models saved by Spark 1.6. Author: Yanbo Liang Closes #13937 from yanboliang/spark-16245. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0df5ce1b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0df5ce1b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0df5ce1b Branch: refs/heads/master Commit: 0df5ce1bc1387a58b33cd185008f4022bd3dcc69 Parents: 363bced Author: Yanbo Liang Authored: Tue Jun 28 19:53:07 2016 -0700 Committer: Xiangrui Meng Committed: Tue Jun 28 19:53:07 2016 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/ml/feature/PCA.scala | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/0df5ce1b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 72167b5..ef8b085 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -206,24 +206,22 @@ object PCAModel extends MLReadable[PCAModel] { override def load(path: String): PCAModel = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) - // explainedVariance field is not present in Spark <= 1.6 - val versionRegex = "([0-9]+)\\.([0-9]+).*".r - val hasExplainedVariance = metadata.sparkVersion match { - case versionRegex(major, minor) => - major.toInt >= 2 || (major.toInt == 1 && minor.toInt > 6) - case _ => false - } + val versionRegex = "([0-9]+)\\.(.+)".r + val versionRegex(major, _) = metadata.sparkVersion val dataPath = new Path(path, "data").toString - val model = if (hasExplainedVariance) { + val model = if (major.toInt >= 2) { val Row(pc: DenseMatrix, explainedVariance: DenseVector) = sparkSession.read.parquet(dataPath) .select("pc", "explainedVariance") .head() new PCAModel(metadata.uid, pc, explainedVariance) } else { - val Row(pc: DenseMatrix) = sparkSession.read.parquet(dataPath).select("pc").head() - new PCAModel(metadata.uid, pc, Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector]) + // pc field is the old matrix format in Spark <= 1.6 + // explainedVariance field is not present in Spark <= 1.6 + val Row(pc: OldDenseMatrix) = sparkSession.read.parquet(dataPath).select("pc").head() + new PCAModel(metadata.uid, pc.asML, + Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector]) } DefaultParamsReader.getAndSetParams(model, metadata) model --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org