spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-17432][SQL] PreprocessDDL should respect case sensitivity when checking duplicated columns
Date Thu, 08 Sep 2016 11:42:00 GMT
Repository: spark
Updated Branches:
  refs/heads/master b230fb92a -> 3ced39df3


[SPARK-17432][SQL] PreprocessDDL should respect case sensitivity when checking duplicated
columns

## What changes were proposed in this pull request?

In `PreprocessDDL` we will check if table columns are duplicated. However, this checking ignores
case sensitivity config(it's always case-sensitive) and lead to different result between `HiveExternalCatalog`
and `InMemoryCatalog`. `HiveExternalCatalog` will throw exception because hive metastore is
always case-nonsensitive, and `InMemoryCatalog` is fine.

This PR fixes it.

## How was this patch tested?

a new test in DDLSuite

Author: Wenchen Fan <wenchen@databricks.com>

Closes #14994 from cloud-fan/check-dup.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ced39df
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ced39df
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ced39df

Branch: refs/heads/master
Commit: 3ced39df32e52170d6954a2464f84e0c9f307423
Parents: b230fb9
Author: Wenchen Fan <wenchen@databricks.com>
Authored: Thu Sep 8 19:41:49 2016 +0800
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Thu Sep 8 19:41:49 2016 +0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/execution/datasources/rules.scala    | 7 ++++++-
 .../org/apache/spark/sql/execution/command/DDLSuite.scala     | 7 +++++++
 2 files changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3ced39df/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5b96206..fbf4063 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -97,7 +97,12 @@ case class PreprocessDDL(conf: SQLConf) extends Rule[LogicalPlan] {
     //   * sort columns' type must be orderable.
     case c @ CreateTable(tableDesc, mode, query) if c.childrenResolved =>
       val schema = if (query.isDefined) query.get.schema else tableDesc.schema
-      checkDuplication(schema.map(_.name), "table definition of " + tableDesc.identifier)
+      val columnNames = if (conf.caseSensitiveAnalysis) {
+        schema.map(_.name)
+      } else {
+        schema.map(_.name.toLowerCase)
+      }
+      checkDuplication(columnNames, "table definition of " + tableDesc.identifier)
 
       val partitionColsChecked = checkPartitionColumns(schema, tableDesc)
       val bucketColsChecked = checkBucketColumns(schema, partitionColsChecked)

http://git-wip-us.apache.org/repos/asf/spark/blob/3ced39df/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fd35c98..05f826a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -371,6 +371,13 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach
{
       sql("CREATE TABLE tbl(a int, a string) USING json")
     }
     assert(e.message == "Found duplicate column(s) in table definition of `tbl`: a")
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val e2 = intercept[AnalysisException] {
+        sql("CREATE TABLE tbl(a int, A string) USING json")
+      }
+      assert(e2.message == "Found duplicate column(s) in table definition of `tbl`: a")
+    }
   }
 
   test("create table - partition column names not in table definition") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message