spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From l...@apache.org
Subject spark git commit: [SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables
Date Tue, 26 Jul 2016 10:46:19 GMT
Repository: spark
Updated Branches:
  refs/heads/master 4c9695598 -> a2abb583c


[SPARK-16663][SQL] desc table should be consistent between data source and hive serde tables

## What changes were proposed in this pull request?

Currently there are 2 inconsistence:

1. for data source table, we only print partition names, for hive table, we also print partition
schema. After this PR, we will always print schema
2. if column doesn't have comment, data source table will print empty string, hive table will
print null. After this PR, we will always print null

## How was this patch tested?

new test in `HiveDDLSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #14302 from cloud-fan/minor3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2abb583
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2abb583
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2abb583

Branch: refs/heads/master
Commit: a2abb583caaec9a2cecd5d65b05d172fc096c125
Parents: 4c96955
Author: Wenchen Fan <wenchen@databricks.com>
Authored: Tue Jul 26 18:46:12 2016 +0800
Committer: Cheng Lian <lian@databricks.com>
Committed: Tue Jul 26 18:46:12 2016 +0800

----------------------------------------------------------------------
 .../spark/sql/execution/command/tables.scala    | 12 ++++----
 .../apache/spark/sql/sources/DDLTestSuite.scala | 30 ++++++++++----------
 .../sql/hive/MetastoreDataSourcesSuite.scala    |  2 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala | 30 +++++++++++++++-----
 .../sql/hive/execution/HiveQuerySuite.scala     |  4 +--
 5 files changed, 47 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index c6daa95..8263380 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -439,11 +439,12 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended:
Boolean, isF
 
   private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit
= {
     if (DDLUtils.isDatasourceTable(table)) {
-      val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
-      if (partCols.nonEmpty) {
+      val userSpecifiedSchema = DDLUtils.getSchemaFromTableProperties(table)
+      val partColNames = DDLUtils.getPartitionColumnsFromTableProperties(table)
+      for (schema <- userSpecifiedSchema if partColNames.nonEmpty) {
         append(buffer, "# Partition Information", "", "")
-        append(buffer, s"# ${output.head.name}", "", "")
-        partCols.foreach(col => append(buffer, col, "", ""))
+        append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
+        describeSchema(StructType(partColNames.map(schema(_))), buffer)
       }
     } else {
       if (table.partitionColumns.nonEmpty) {
@@ -525,8 +526,7 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean,
isF
 
   private def describeSchema(schema: StructType, buffer: ArrayBuffer[Row]): Unit = {
     schema.foreach { column =>
-      val comment = column.getComment().getOrElse("")
-      append(buffer, column.name, column.dataType.simpleString, comment)
+      append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
index d0ad319..e535d4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLTestSuite.scala
@@ -97,21 +97,21 @@ class DDLTestSuite extends DataSourceTest with SharedSQLContext {
       "describe ddlPeople",
       Seq(
         Row("intType", "int", "test comment test1"),
-        Row("stringType", "string", ""),
-        Row("dateType", "date", ""),
-        Row("timestampType", "timestamp", ""),
-        Row("doubleType", "double", ""),
-        Row("bigintType", "bigint", ""),
-        Row("tinyintType", "tinyint", ""),
-        Row("decimalType", "decimal(10,0)", ""),
-        Row("fixedDecimalType", "decimal(5,1)", ""),
-        Row("binaryType", "binary", ""),
-        Row("booleanType", "boolean", ""),
-        Row("smallIntType", "smallint", ""),
-        Row("floatType", "float", ""),
-        Row("mapType", "map<string,string>", ""),
-        Row("arrayType", "array<string>", ""),
-        Row("structType", "struct<f1:string,f2:int>", "")
+        Row("stringType", "string", null),
+        Row("dateType", "date", null),
+        Row("timestampType", "timestamp", null),
+        Row("doubleType", "double", null),
+        Row("bigintType", "bigint", null),
+        Row("tinyintType", "tinyint", null),
+        Row("decimalType", "decimal(10,0)", null),
+        Row("fixedDecimalType", "decimal(5,1)", null),
+        Row("binaryType", "binary", null),
+        Row("booleanType", "boolean", null),
+        Row("smallIntType", "smallint", null),
+        Row("floatType", "float", null),
+        Row("mapType", "map<string,string>", null),
+        Row("arrayType", "array<string>", null),
+        Row("structType", "struct<f1:string,f2:int>", null)
       ))
 
   test("SPARK-7686 DescribeCommand should have correct physical plan output attributes")
{

http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 22f8c0f..111fb8b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -748,7 +748,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with
TestHiv
       assert(schema === actualSchema)
 
       // Checks the DESCRIBE output.
-      checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", "") :: Nil)
+      checkAnswer(sql("DESCRIBE spark6655"), Row("int", "int", null) :: Nil)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index fb5c994..d15e11a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -431,6 +431,22 @@ class HiveDDLSuite
     }
   }
 
+  test("desc table for Hive table - partitioned table") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(a int) PARTITIONED BY (b int)")
+
+      assert(sql("DESC tbl").collect().containsSlice(
+        Seq(
+          Row("a", "int", null),
+          Row("b", "int", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("b", "int", null)
+        )
+      ))
+    }
+  }
+
   test("desc table for data source table using Hive Metastore") {
     assume(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")
     val tabName = "tab1"
@@ -621,7 +637,7 @@ class HiveDDLSuite
 
           val desc = sql("DESC FORMATTED t1").collect().toSeq
 
-          assert(desc.contains(Row("id", "bigint", "")))
+          assert(desc.contains(Row("id", "bigint", null)))
         }
       }
     }
@@ -638,13 +654,13 @@ class HiveDDLSuite
 
       assert(formattedDesc.containsSlice(
         Seq(
-          Row("a", "bigint", ""),
-          Row("b", "bigint", ""),
-          Row("c", "bigint", ""),
-          Row("d", "bigint", ""),
+          Row("a", "bigint", null),
+          Row("b", "bigint", null),
+          Row("c", "bigint", null),
+          Row("d", "bigint", null),
           Row("# Partition Information", "", ""),
-          Row("# col_name", "", ""),
-          Row("d", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("d", "bigint", null),
           Row("", "", ""),
           Row("# Detailed Table Information", "", ""),
           Row("Database:", "default", "")

http://git-wip-us.apache.org/repos/asf/spark/blob/a2abb583/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index f8c55ec..31283b9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -834,8 +834,8 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
 
     assertResult(
       Array(
-        Row("a", "int", ""),
-        Row("b", "string", ""))
+        Row("a", "int", null),
+        Row("b", "string", null))
     ) {
       sql("DESCRIBE test_describe_commands2")
         .select('col_name, 'data_type, 'comment)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message