spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From andrewo...@apache.org
Subject spark git commit: [SPARK-14346][SQL] Lists unsupported Hive features in SHOW CREATE TABLE output
Date Thu, 19 May 2016 19:02:57 GMT
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 97fd9a09c -> 4f8639f9d


[SPARK-14346][SQL] Lists unsupported Hive features in SHOW CREATE TABLE output

## What changes were proposed in this pull request?

This PR is a follow-up of #13079. It replaces `hasUnsupportedFeatures: Boolean` in `CatalogTable`
with `unsupportedFeatures: Seq[String]`, which contains unsupported Hive features of the underlying
Hive table. In this way, we can accurately report all unsupported Hive features in the exception
message.

## How was this patch tested?

Updated existing test case to check exception message.

Author: Cheng Lian <lian@databricks.com>

Closes #13173 from liancheng/spark-14346-follow-up.

(cherry picked from commit 6ac1c3a040f88fae15c46acd73e7e3691f7d3619)
Signed-off-by: Andrew Or <andrew@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f8639f9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f8639f9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f8639f9

Branch: refs/heads/branch-2.0
Commit: 4f8639f9d4144a5e2b1eb24a98e4c752c2dc00b8
Parents: 97fd9a0
Author: Cheng Lian <lian@databricks.com>
Authored: Thu May 19 12:02:41 2016 -0700
Committer: Andrew Or <andrew@databricks.com>
Committed: Thu May 19 12:02:50 2016 -0700

----------------------------------------------------------------------
 .../spark/sql/catalyst/catalog/interface.scala   |  9 +++------
 .../spark/sql/execution/command/tables.scala     | 14 +++++++-------
 .../spark/sql/hive/client/HiveClientImpl.scala   | 19 ++++++++++++++-----
 .../spark/sql/hive/ShowCreateTableSuite.scala    |  8 +++++---
 4 files changed, 29 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4f8639f9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 3fdd411..4a073d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -80,11 +80,8 @@ case class CatalogTablePartition(
  * Note that Hive's metastore also tracks skewed columns. We should consider adding that
in the
  * future once we have a better understanding of how we want to handle skewed columns.
  *
- * @param hasUnsupportedFeatures is used to indicate whether all table metadata entries retrieved
- *        from the concrete underlying external catalog (e.g. Hive metastore) are supported
by
- *        Spark SQL. For example, if the underlying Hive table has skewed columns, this information
- *        can't be mapped to [[CatalogTable]] since Spark SQL doesn't handle skewed columns
for now.
- *        In this case `hasUnsupportedFeatures` is set to true. By default, it is false.
+ * @param unsupportedFeatures is a list of string descriptions of features that are used
by the
+ *        underlying table but not supported by Spark SQL yet.
  */
 case class CatalogTable(
     identifier: TableIdentifier,
@@ -102,7 +99,7 @@ case class CatalogTable(
     viewOriginalText: Option[String] = None,
     viewText: Option[String] = None,
     comment: Option[String] = None,
-    hasUnsupportedFeatures: Boolean = false) {
+    unsupportedFeatures: Seq[String] = Seq.empty) {
 
   // Verify that the provided columns are part of the schema
   private val colNames = schema.map(_.name).toSet

http://git-wip-us.apache.org/repos/asf/spark/blob/4f8639f9/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 1fc02d1..a347274 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -633,16 +633,16 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
   }
 
   private def showCreateHiveTable(metadata: CatalogTable): String = {
-    def reportUnsupportedError(): Unit = {
-      throw new UnsupportedOperationException(
+    def reportUnsupportedError(features: Seq[String]): Unit = {
+      throw new AnalysisException(
         s"Failed to execute SHOW CREATE TABLE against table ${metadata.identifier.quotedString},
" +
-          "because it contains table structure(s) (e.g. skewed columns) that Spark SQL doesn't
" +
-          "support yet."
+          "which is created by Hive and uses the following unsupported feature(s)\n" +
+          features.map(" - " + _).mkString("\n")
       )
     }
 
-    if (metadata.hasUnsupportedFeatures) {
-      reportUnsupportedError()
+    if (metadata.unsupportedFeatures.nonEmpty) {
+      reportUnsupportedError(metadata.unsupportedFeatures)
     }
 
     val builder = StringBuilder.newBuilder
@@ -651,7 +651,7 @@ case class ShowCreateTableCommand(table: TableIdentifier) extends RunnableComman
       case EXTERNAL => " EXTERNAL TABLE"
       case VIEW => " VIEW"
       case MANAGED => " TABLE"
-      case INDEX => reportUnsupportedError()
+      case INDEX => reportUnsupportedError(Seq("index table"))
     }
 
     builder ++= s"CREATE$tableTypeString ${table.quotedString}"

http://git-wip-us.apache.org/repos/asf/spark/blob/4f8639f9/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index af2850d..0f0c1b0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -337,10 +337,19 @@ private[hive] class HiveClientImpl(
       val schema = h.getCols.asScala.map(fromHiveColumn) ++ partCols
 
       // Skew spec, storage handler, and bucketing info can't be mapped to CatalogTable (yet)
-      val hasUnsupportedFeatures =
-        !h.getSkewedColNames.isEmpty ||
-          h.getStorageHandler != null ||
-          !h.getBucketCols.isEmpty
+      val unsupportedFeatures = ArrayBuffer.empty[String]
+
+      if (!h.getSkewedColNames.isEmpty) {
+        unsupportedFeatures += "skewed columns"
+      }
+
+      if (h.getStorageHandler != null) {
+        unsupportedFeatures += "storage handler"
+      }
+
+      if (!h.getBucketCols.isEmpty) {
+        unsupportedFeatures += "bucketing"
+      }
 
       CatalogTable(
         identifier = TableIdentifier(h.getTableName, Option(h.getDbName)),
@@ -369,7 +378,7 @@ private[hive] class HiveClientImpl(
         properties = h.getParameters.asScala.toMap,
         viewOriginalText = Option(h.getViewOriginalText),
         viewText = Option(h.getViewExpandedText),
-        hasUnsupportedFeatures = hasUnsupportedFeatures)
+        unsupportedFeatures = unsupportedFeatures)
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f8639f9/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
index 3b8068d..dedc8f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ShowCreateTableSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -247,7 +247,7 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
     }
   }
 
-  test("hive bucketing not supported") {
+  test("hive bucketing is not supported") {
     withTable("t1") {
       createRawHiveTable(
         s"""CREATE TABLE t1 (a INT, b STRING)
@@ -257,9 +257,11 @@ class ShowCreateTableSuite extends QueryTest with SQLTestUtils with TestHiveSing
          """.stripMargin
       )
 
-      intercept[UnsupportedOperationException] {
+      val cause = intercept[AnalysisException] {
         sql("SHOW CREATE TABLE t1")
       }
+
+      assert(cause.getMessage.contains(" - bucketing"))
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message