spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yh...@apache.org
Subject spark git commit: [SPARK-15257][SQL] Require CREATE EXTERNAL TABLE to specify LOCATION
Date Wed, 11 May 2016 22:31:09 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 b1e14d9bf -> 4e56857ca


[SPARK-15257][SQL] Require CREATE EXTERNAL TABLE to specify LOCATION

## What changes were proposed in this pull request?

Before:
```sql
-- uses warehouse dir anyway
CREATE EXTERNAL TABLE my_tab
-- doesn't actually delete the data
DROP TABLE my_tab
```
After:
```sql
-- no location is provided, throws exception
CREATE EXTERNAL TABLE my_tab
-- creates an external table using that location
CREATE EXTERNAL TABLE my_tab LOCATION '/path/to/something'
-- doesn't delete the data, which is expected
DROP TABLE my_tab
```

## How was this patch tested?

New test in `DDLCommandSuite`

Author: Andrew Or <andrew@databricks.com>

Closes #13032 from andrewor14/create-external-table-location.

(cherry picked from commit 8881765ac7ac6ba6fe9ef0d0a669c08cca58ed93)
Signed-off-by: Yin Huai <yhuai@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e56857c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e56857c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e56857c

Branch: refs/heads/branch-2.0
Commit: 4e56857ca0c3d61cc257ffd3bd1c59ed44c862d7
Parents: b1e14d9
Author: Andrew Or <andrew@databricks.com>
Authored: Wed May 11 15:30:53 2016 -0700
Committer: Yin Huai <yhuai@databricks.com>
Committed: Wed May 11 15:31:05 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/execution/SparkSqlParser.scala    |  4 ++++
 .../sql/execution/command/DDLCommandSuite.scala | 20 ++++++++++++++++++--
 .../hive/execution/HiveCompatibilitySuite.scala |  6 ++++--
 .../spark/sql/hive/HiveDDLCommandSuite.scala    |  9 ++-------
 .../sql/hive/execution/HiveCommandSuite.scala   |  2 +-
 .../sql/hive/execution/HiveTableScanSuite.scala |  2 +-
 .../spark/sql/hive/execution/HiveUDFSuite.scala |  2 +-
 7 files changed, 31 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 87e6f90..a51665f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -780,6 +780,10 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder {
       .getOrElse(EmptyStorageFormat)
     val rowStorage = Option(ctx.rowFormat).map(visitRowFormat).getOrElse(EmptyStorageFormat)
     val location = Option(ctx.locationSpec).map(visitLocationSpec)
+    // If we are creating an EXTERNAL table, then the LOCATION field is required
+    if (external && location.isEmpty) {
+      throw operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION",
ctx)
+    }
     val storage = CatalogStorageFormat(
       locationUri = location,
       inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat),

http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index a728ac3..fa8dabf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.{FunctionResource, FunctionResourceType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, FunctionResource}
+import org.apache.spark.sql.catalyst.catalog.FunctionResourceType
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.Project
@@ -35,7 +36,7 @@ class DDLCommandSuite extends PlanTest {
       parser.parsePlan(sql)
     }
     assert(e.getMessage.toLowerCase.contains("operation not allowed"))
-    containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p)) }
+    containsThesePhrases.foreach { p => assert(e.getMessage.toLowerCase.contains(p.toLowerCase))
}
   }
 
   test("create database") {
@@ -211,6 +212,21 @@ class DDLCommandSuite extends PlanTest {
     comparePlans(parsed4, expected4)
   }
 
+  test("create external table - location must be specified") {
+    assertUnsupported(
+      sql = "CREATE EXTERNAL TABLE my_tab",
+      containsThesePhrases = Seq("create external table", "location"))
+    val query = "CREATE EXTERNAL TABLE my_tab LOCATION '/something/anything'"
+    parser.parsePlan(query) match {
+      case ct: CreateTable =>
+        assert(ct.table.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.table.storage.locationUri == Some("/something/anything"))
+      case other =>
+        fail(s"Expected to parse ${classOf[CreateTable].getClass.getName} from query," +
+          s"got ${other.getClass.getName}: $query")
+    }
+  }
+
   // ALTER TABLE table_name RENAME TO new_table_name;
   // ALTER VIEW view_name RENAME TO new_view_name;
   test("alter table/view: rename table/view") {

http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f89a847..54fb440 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -508,7 +508,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter
{
     // These tests use EXPLAIN FORMATTED, which is not supported
     "input4",
     "join0",
-    "plan_json"
+    "plan_json",
+
+    // This test uses CREATE EXTERNAL TABLE without specifying LOCATION
+    "alter2"
   )
 
   /**
@@ -521,7 +524,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter
{
     "add_partition_no_whitelist",
     "add_partition_with_whitelist",
     "alias_casted_column",
-    "alter2",
     "alter_partition_with_whitelist",
     "alter_rename_partition",
     "ambiguous_col",

http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
index 3d74235..538e218 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDDLCommandSuite.scala
@@ -352,9 +352,10 @@ class HiveDDLCommandSuite extends PlanTest {
   }
 
   test("create table - external") {
-    val query = "CREATE EXTERNAL TABLE tab1 (id int, name string)"
+    val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'"
     val (desc, _) = extractTableDesc(query)
     assert(desc.tableType == CatalogTableType.EXTERNAL)
+    assert(desc.storage.locationUri == Some("/path/to/nowhere"))
   }
 
   test("create table - if not exists") {
@@ -454,12 +455,6 @@ class HiveDDLCommandSuite extends PlanTest {
     assert(e2.getMessage.contains("Operation not allowed"))
   }
 
-  test("create table - location") {
-    val query = "CREATE TABLE my_table (id int, name string) LOCATION '/path/to/mars'"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.storage.locationUri == Some("/path/to/mars"))
-  }
-
   test("create table - properties") {
     val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')"
     val (desc, _) = extractTableDesc(query)

http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index 1eed5b6..8225bd6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -35,7 +35,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
     sql(
       """
-        |CREATE EXTERNAL TABLE parquet_tab2 (c1 INT, c2 STRING)
+        |CREATE TABLE parquet_tab2 (c1 INT, c2 STRING)
         |STORED AS PARQUET
         |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val")
       """.stripMargin)

http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index b0c0dcb..8c9c37f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -65,7 +65,7 @@ class HiveTableScanSuite extends HiveComparisonTest {
     TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null")
     TestHive.sql(
       """
-        CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT)
+        CREATE TABLE timestamp_query_null (time TIMESTAMP,id INT)
         ROW FORMAT DELIMITED
         FIELDS TERMINATED BY ','
         LINES TERMINATED BY '\n'

http://git-wip-us.apache.org/repos/asf/spark/blob/4e56857c/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index dd4321d..51d537d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -72,7 +72,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils
{
   test("hive struct udf") {
     sql(
       """
-      |CREATE EXTERNAL TABLE hiveUDFTestTable (
+      |CREATE TABLE hiveUDFTestTable (
       |   pair STRUCT<id: INT, value: INT>
       |)
       |PARTITIONED BY (partition STRING)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message