spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-22538][ML] SQLTransformer should not unpersist possibly cached input dataset
Date Fri, 17 Nov 2017 16:43:58 GMT
Repository: spark
Updated Branches:
  refs/heads/master 7d039e0c0 -> fccb337f9


[SPARK-22538][ML] SQLTransformer should not unpersist possibly cached input dataset

## What changes were proposed in this pull request?

`SQLTransformer.transform` unpersists input dataset when dropping temporary view. We should
not change input dataset's cache status.

## How was this patch tested?

Added test.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #19772 from viirya/SPARK-22538.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fccb337f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fccb337f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fccb337f

Branch: refs/heads/master
Commit: fccb337f9d1e44a83cfcc00ce33eae1fad367695
Parents: 7d039e0
Author: Liang-Chi Hsieh <viirya@gmail.com>
Authored: Fri Nov 17 17:43:40 2017 +0100
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Fri Nov 17 17:43:40 2017 +0100

----------------------------------------------------------------------
 .../org/apache/spark/ml/feature/SQLTransformer.scala    |  3 ++-
 .../apache/spark/ml/feature/SQLTransformerSuite.scala   | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fccb337f/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
index 62c1972..0fb1d8c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala
@@ -70,7 +70,8 @@ class SQLTransformer @Since("1.6.0") (@Since("1.6.0") override val uid:
String)
     dataset.createOrReplaceTempView(tableName)
     val realStatement = $(statement).replace(tableIdentifier, tableName)
     val result = dataset.sparkSession.sql(realStatement)
-    dataset.sparkSession.catalog.dropTempView(tableName)
+    // Call SessionCatalog.dropTempView to avoid unpersisting the possibly cached dataset.
+    dataset.sparkSession.sessionState.catalog.dropTempView(tableName)
     result
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/fccb337f/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
index 753f890..673a146 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/SQLTransformerSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.types.{LongType, StructField, StructType}
+import org.apache.spark.storage.StorageLevel
 
 class SQLTransformerSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
@@ -60,4 +61,15 @@ class SQLTransformerSuite
     val expected = StructType(Seq(StructField("id1", LongType, nullable = false)))
     assert(outputSchema === expected)
   }
+
+  test("SPARK-22538: SQLTransformer should not unpersist given dataset") {
+    val df = spark.range(10)
+    df.cache()
+    df.count()
+    assert(df.storageLevel != StorageLevel.NONE)
+    new SQLTransformer()
+      .setStatement("SELECT id + 1 AS id1 FROM __THIS__")
+      .transform(df)
+    assert(df.storageLevel != StorageLevel.NONE)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message