spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From l...@apache.org
Subject spark git commit: [SPARK-5821] [SQL] JSON CTAS command should throw error message when delete path failure
Date Sat, 21 Mar 2015 03:23:37 GMT
Repository: spark
Updated Branches:
  refs/heads/master 937c1e550 -> e5d2c37c6


[SPARK-5821] [SQL] JSON CTAS command should throw error message when delete path failure

When using "CREATE TEMPORARY TABLE AS SELECT" to create JSON table, we first delete the path
file or directory and then generate a new directory with the same name. But if only read permission
was granted, the delete failed.
Here we just throwing an error message to let users know what happened.
ParquetRelation2 may also hit this problem. I think to restrict JSONRelation and ParquetRelation2
must base on directory is more reasonable for access control. Maybe I can do it in follow
up works.

Author: Yanbo Liang <ybliang8@gmail.com>
Author: Yanbo Liang <yanbohappy@gmail.com>

Closes #4610 from yanboliang/jsonInsertImprovements and squashes the following commits:

c387fce [Yanbo Liang] fix typos
42d7fb6 [Yanbo Liang] add unittest & fix output format
46f0d9d [Yanbo Liang] Update JSONRelation.scala
e2df8d5 [Yanbo Liang] check path exisit when write
79f7040 [Yanbo Liang] Update JSONRelation.scala
e4bc229 [Yanbo Liang] Update JSONRelation.scala
5a42d83 [Yanbo Liang] JSONRelation CTAS should check if delete is successful


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5d2c37c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5d2c37c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5d2c37c

Branch: refs/heads/master
Commit: e5d2c37c68ac00a57c2542e62d1c5b4ca267c89e
Parents: 937c1e5
Author: Yanbo Liang <ybliang8@gmail.com>
Authored: Sat Mar 21 11:23:28 2015 +0800
Committer: Cheng Lian <lian@databricks.com>
Committed: Sat Mar 21 11:23:28 2015 +0800

----------------------------------------------------------------------
 .../apache/spark/sql/json/JSONRelation.scala    | 36 ++++++++++++++++----
 .../sql/sources/CreateTableAsSelectSuite.scala  | 25 +++++++++++++-
 2 files changed, 53 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e5d2c37c/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
index b1e363d..f4c99b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala
@@ -68,9 +68,23 @@ private[sql] class DefaultSource
       mode match {
         case SaveMode.Append =>
           sys.error(s"Append mode is not supported by ${this.getClass.getCanonicalName}")
-        case SaveMode.Overwrite =>
-          fs.delete(filesystemPath, true)
+        case SaveMode.Overwrite => {
+          var success: Boolean = false
+          try {
+            success = fs.delete(filesystemPath, true)
+          } catch {
+            case e: IOException =>
+              throw new IOException(
+                s"Unable to clear output directory ${filesystemPath.toString} prior"
+                  + s" to writing to JSON table:\n${e.toString}")
+          }
+          if (!success) {
+            throw new IOException(
+              s"Unable to clear output directory ${filesystemPath.toString} prior"
+                + s" to writing to JSON table.")
+          }
           true
+        }
         case SaveMode.ErrorIfExists =>
           sys.error(s"path $path already exists.")
         case SaveMode.Ignore => false
@@ -114,13 +128,21 @@ private[sql] case class JSONRelation(
     val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
 
     if (overwrite) {
-      try {
-        fs.delete(filesystemPath, true)
-      } catch {
-        case e: IOException =>
+      if (fs.exists(filesystemPath)) {
+        var success: Boolean = false
+        try {
+          success = fs.delete(filesystemPath, true)
+        } catch {
+          case e: IOException =>
+            throw new IOException(
+              s"Unable to clear output directory ${filesystemPath.toString} prior"
+                + s" to writing to JSON table:\n${e.toString}")
+        }
+        if (!success) {
           throw new IOException(
             s"Unable to clear output directory ${filesystemPath.toString} prior"
-              + s" to INSERT OVERWRITE a JSON table:\n${e.toString}")
+              + s" to writing to JSON table.")
+        }
       }
       // Write the data.
       data.toJSON.saveAsTextFile(path)

http://git-wip-us.apache.org/repos/asf/spark/blob/e5d2c37c/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 2975a7f..20a23b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.sources
 
-import java.io.File
+import java.io.{IOException, File}
 
 import org.apache.spark.sql.AnalysisException
 import org.scalatest.BeforeAndAfterAll
@@ -62,6 +62,29 @@ class CreateTableAsSelectSuite extends DataSourceTest with BeforeAndAfterAll
{
     dropTempTable("jsonTable")
   }
 
+  test("CREATE TEMPORARY TABLE AS SELECT based on the file without write permission") {
+    val childPath = new File(path.toString, "child")
+    path.mkdir()
+    childPath.createNewFile()
+    path.setWritable(false)
+
+    val e = intercept[IOException] {
+      sql(
+        s"""
+           |CREATE TEMPORARY TABLE jsonTable
+           |USING org.apache.spark.sql.json.DefaultSource
+           |OPTIONS (
+           |  path '${path.toString}'
+           |) AS
+           |SELECT a, b FROM jt
+        """.stripMargin)
+      sql("SELECT a, b FROM jsonTable").collect()
+    }
+    assert(e.getMessage().contains("Unable to clear output directory"))
+
+    path.setWritable(true)
+  }
+
   test("create a table, drop it and create another one with the same name") {
     sql(
       s"""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message