spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lix...@apache.org
Subject spark git commit: [SPARK-22601][SQL] Data load is getting displayed successful on providing non existing nonlocal file path
Date Fri, 01 Dec 2017 04:45:35 GMT
Repository: spark
Updated Branches:
  refs/heads/master dc365422b -> 16adaf634


[SPARK-22601][SQL] Data load is getting displayed successful on providing non existing nonlocal
file path

## What changes were proposed in this pull request?
When user tries to load data with a non existing hdfs file path system is not validating it
and the load command operation is getting successful.
This is misleading to the user. already there is a validation in the scenario of none existing
local file path. This PR has added validation in the scenario of nonexisting hdfs file path
## How was this patch tested?
UT has been added for verifying the issue, also snapshots has been added after the verification
in a spark yarn cluster

Author: sujith71955 <sujithchacko.2010@gmail.com>

Closes #19823 from sujith71955/master_LoadComand_Issue.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/16adaf63
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/16adaf63
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/16adaf63

Branch: refs/heads/master
Commit: 16adaf634bcca3074b448d95e72177eefdf50069
Parents: dc36542
Author: sujith71955 <sujithchacko.2010@gmail.com>
Authored: Thu Nov 30 20:45:30 2017 -0800
Committer: gatorsmile <gatorsmile@gmail.com>
Committed: Thu Nov 30 20:45:30 2017 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/execution/command/tables.scala     | 9 ++++++++-
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala  | 9 +++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/16adaf63/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index c9f6e57..c42e6c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -340,7 +340,7 @@ case class LoadDataCommand(
         uri
       } else {
         val uri = new URI(path)
-        if (uri.getScheme() != null && uri.getAuthority() != null) {
+        val hdfsUri = if (uri.getScheme() != null && uri.getAuthority() != null)
{
           uri
         } else {
           // Follow Hive's behavior:
@@ -380,6 +380,13 @@ case class LoadDataCommand(
           }
           new URI(scheme, authority, absolutePath, uri.getQuery(), uri.getFragment())
         }
+        val hadoopConf = sparkSession.sessionState.newHadoopConf()
+        val srcPath = new Path(hdfsUri)
+        val fs = srcPath.getFileSystem(hadoopConf)
+        if (!fs.exists(srcPath)) {
+          throw new AnalysisException(s"LOAD DATA input path does not exist: $path")
+        }
+        hdfsUri
       }
 
     if (partition.nonEmpty) {

http://git-wip-us.apache.org/repos/asf/spark/blob/16adaf63/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 9063ef0..6c11905 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2141,4 +2141,13 @@ class HiveDDLSuite
       }
     }
   }
+
+  test("load command for non local invalid path validation") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(i INT, j STRING)")
+      val e = intercept[AnalysisException](
+        sql("load data inpath '/doesnotexist.csv' into table tbl"))
+      assert(e.message.contains("LOAD DATA input path does not exist"))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message