spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From m...@apache.org
Subject spark git commit: [SPARK-16245][ML] model loading backward compatibility for ml.feature.PCA
Date Wed, 29 Jun 2016 02:53:11 GMT
Repository: spark
Updated Branches:
  refs/heads/master 363bcedee -> 0df5ce1bc


[SPARK-16245][ML] model loading backward compatibility for ml.feature.PCA

## What changes were proposed in this pull request?
model loading backward compatibility for ml.feature.PCA.

## How was this patch tested?
existing ut and manual test for loading models saved by Spark 1.6.

Author: Yanbo Liang <ybliang8@gmail.com>

Closes #13937 from yanboliang/spark-16245.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0df5ce1b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0df5ce1b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0df5ce1b

Branch: refs/heads/master
Commit: 0df5ce1bc1387a58b33cd185008f4022bd3dcc69
Parents: 363bced
Author: Yanbo Liang <ybliang8@gmail.com>
Authored: Tue Jun 28 19:53:07 2016 -0700
Committer: Xiangrui Meng <meng@databricks.com>
Committed: Tue Jun 28 19:53:07 2016 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/ml/feature/PCA.scala   | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0df5ce1b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 72167b5..ef8b085 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -206,24 +206,22 @@ object PCAModel extends MLReadable[PCAModel] {
     override def load(path: String): PCAModel = {
       val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
 
-      // explainedVariance field is not present in Spark <= 1.6
-      val versionRegex = "([0-9]+)\\.([0-9]+).*".r
-      val hasExplainedVariance = metadata.sparkVersion match {
-        case versionRegex(major, minor) =>
-          major.toInt >= 2 || (major.toInt == 1 && minor.toInt > 6)
-        case _ => false
-      }
+      val versionRegex = "([0-9]+)\\.(.+)".r
+      val versionRegex(major, _) = metadata.sparkVersion
 
       val dataPath = new Path(path, "data").toString
-      val model = if (hasExplainedVariance) {
+      val model = if (major.toInt >= 2) {
         val Row(pc: DenseMatrix, explainedVariance: DenseVector) =
           sparkSession.read.parquet(dataPath)
             .select("pc", "explainedVariance")
             .head()
         new PCAModel(metadata.uid, pc, explainedVariance)
       } else {
-        val Row(pc: DenseMatrix) = sparkSession.read.parquet(dataPath).select("pc").head()
-        new PCAModel(metadata.uid, pc, Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector])
+        // pc field is the old matrix format in Spark <= 1.6
+        // explainedVariance field is not present in Spark <= 1.6
+        val Row(pc: OldDenseMatrix) = sparkSession.read.parquet(dataPath).select("pc").head()
+        new PCAModel(metadata.uid, pc.asML,
+          Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector])
       }
       DefaultParamsReader.getAndSetParams(model, metadata)
       model


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message