spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From andrewo...@apache.org
Subject spark git commit: [SPARK-14506][SQL] HiveClientImpl's toHiveTable misses a table property for external tables
Date Sun, 10 Apr 2016 06:32:24 GMT
Repository: spark
Updated Branches:
  refs/heads/master aea30a1a9 -> 3fb09afd5


[SPARK-14506][SQL] HiveClientImpl's toHiveTable misses a table property for external tables

## What changes were proposed in this pull request?

For an external table's metadata (in Hive's representation), its table type needs to be EXTERNAL_TABLE.
Also, there needs to be a field called EXTERNAL set in the table property with a value of
TRUE (for a MANAGED_TABLE it will be FALSE) based on https://github.com/apache/hive/blob/release-1.2.1/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105.
HiveClientImpl's toHiveTable misses to set this table property.

## How was this patch tested?

Added a new test.

Author: Yin Huai <yhuai@databricks.com>

Closes #12275 from yhuai/SPARK-14506.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fb09afd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fb09afd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fb09afd

Branch: refs/heads/master
Commit: 3fb09afd5e55b9a7a0a332273f09f984a78c3645
Parents: aea30a1
Author: Yin Huai <yhuai@databricks.com>
Authored: Sat Apr 9 23:32:17 2016 -0700
Committer: Andrew Or <andrew@databricks.com>
Committed: Sat Apr 9 23:32:17 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/catalog/CatalogTestCases.scala  |  9 +++++++++
 .../apache/spark/sql/hive/client/HiveClientImpl.scala  | 13 +++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3fb09afd/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
index fbcac09..0009438 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
@@ -149,6 +149,15 @@ abstract class CatalogTestCases extends SparkFunSuite with BeforeAndAfterEach
{
   // Tables
   // --------------------------------------------------------------------------
 
+  test("the table type of an external table should be EXTERNAL_TABLE") {
+    val catalog = newBasicCatalog()
+    val table =
+      newTable("external_table1", "db2").copy(tableType = CatalogTableType.EXTERNAL_TABLE)
+    catalog.createTable("db2", table, ignoreIfExists = false)
+    val actual = catalog.getTable("db2", "external_table1")
+    assert(actual.tableType === CatalogTableType.EXTERNAL_TABLE)
+  }
+
   test("drop table") {
     val catalog = newBasicCatalog()
     assert(catalog.listTables("db2").toSet == Set("tbl1", "tbl2"))

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb09afd/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index d0eb9dd..a037671 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -659,9 +659,18 @@ private[hive] class HiveClientImpl(
 
   private def toHiveTable(table: CatalogTable): HiveTable = {
     val hiveTable = new HiveTable(table.database, table.identifier.table)
+    // For EXTERNAL_TABLE/MANAGED_TABLE, we also need to set EXTERNAL field in
+    // the table properties accodringly. Otherwise, if EXTERNAL_TABLE is the table type
+    // but EXTERNAL field is not set, Hive metastore will change the type to
+    // MANAGED_TABLE (see
+    // metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java#L1095-L1105)
     hiveTable.setTableType(table.tableType match {
-      case CatalogTableType.EXTERNAL_TABLE => HiveTableType.EXTERNAL_TABLE
-      case CatalogTableType.MANAGED_TABLE => HiveTableType.MANAGED_TABLE
+      case CatalogTableType.EXTERNAL_TABLE =>
+        hiveTable.setProperty("EXTERNAL", "TRUE")
+        HiveTableType.EXTERNAL_TABLE
+      case CatalogTableType.MANAGED_TABLE =>
+        hiveTable.setProperty("EXTERNAL", "FALSE")
+        HiveTableType.MANAGED_TABLE
       case CatalogTableType.INDEX_TABLE => HiveTableType.INDEX_TABLE
       case CatalogTableType.VIRTUAL_VIEW => HiveTableType.VIRTUAL_VIEW
     })


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message