spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject git commit: SPARK-3807: SparkSql does not work for tables created using custom serde
Date Mon, 13 Oct 2014 20:47:54 GMT
Repository: spark
Updated Branches:
  refs/heads/master 371321cad -> e6e37701f


SPARK-3807: SparkSql does not work for tables created using custom serde

SparkSql crashes on selecting tables using custom serde.

Example:
----------------

CREATE EXTERNAL TABLE table_name PARTITIONED BY ( a int) ROW FORMAT 'SERDE "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer"
with serdeproperties("serialization.format"="org.apache.thrift.protocol.TBinaryProtocol","serialization.class"="ser_class")
STORED AS SEQUENCEFILE;

The following exception is seen on running a query like 'select * from table_name limit 1':

ERROR CliDriver: org.apache.hadoop.hive.serde2.SerDeException: java.lang.NullPointerException
at org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer.initialize(ThriftDeserializer.java:68)
at org.apache.hadoop.hive.ql.plan.TableDesc.getDeserializer(TableDesc.java:80)
at org.apache.spark.sql.hive.execution.HiveTableScan.addColumnMetadataToConf(HiveTableScan.scala:86)
at org.apache.spark.sql.hive.execution.HiveTableScan.<init>(HiveTableScan.scala:100)
at org.apache.spark.sql.hive.HiveStrategies$HiveTableScans$$anonfun$14.apply(HiveStrategies.scala:188)
at org.apache.spark.sql.hive.HiveStrategies$HiveTableScans$$anonfun$14.apply(HiveStrategies.scala:188)
at org.apache.spark.sql.SQLContext$SparkPlanner.pruneFilterProject(SQLContext.scala:364)
at org.apache.spark.sql.hive.HiveStrategies$HiveTableScans$.apply(HiveStrategies.scala:184)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.apply(QueryPlanner.scala:59)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54)
at org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:280)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58)
at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
at org.apache.spark.sql.catalyst.planning.QueryPlanner.apply(QueryPlanner.scala:59)
at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:402)
at org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:400)
at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:406)
at org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:406)
at org.apache.spark.sql.hive.HiveContext$QueryExecution.stringResult(HiveContext.scala:406)
at org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:59)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:291)
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:413)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:226)
at org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(Unknown Source)
at java.lang.reflect.Method.invoke(Unknown Source)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:328)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.NullPointerException

Author: chirag <chirag.aggarwal@guavus.com>

Closes #2674 from chiragaggarwal/branch-1.1 and squashes the following commits:

370c31b [chirag] SPARK-3807: Add a test case to validate the fix.
1f26805 [chirag] SPARK-3807: SparkSql does not work for tables created using custom serde
(Incorporated Review Comments)
ba4bc0c [chirag] SPARK-3807: SparkSql does not work for tables created using custom serde
5c73b72 [chirag] SPARK-3807: SparkSql does not work for tables created using custom serde

(cherry picked from commit 925e22d3132b983a2fcee31e3878b680c7ff92da)
Signed-off-by: Michael Armbrust <michael@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6e37701
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6e37701
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6e37701

Branch: refs/heads/master
Commit: e6e37701f12be82fa77dc28d825ddd36a1ab7594
Parents: 371321c
Author: chirag <chirag.aggarwal@guavus.com>
Authored: Mon Oct 13 13:47:26 2014 -0700
Committer: Michael Armbrust <michael@databricks.com>
Committed: Mon Oct 13 13:47:51 2014 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 2 +-
 .../org/apache/spark/sql/hive/execution/HiveTableScan.scala    | 6 +++++-
 .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala   | 3 +++
 3 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e6e37701/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index c5fee5e..75a1965 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -305,7 +305,7 @@ private[hive] case class MetastoreRelation
   val partitionKeys = hiveQlTable.getPartitionKeys.map(_.toAttribute)
 
   /** Non-partitionKey attributes */
-  val attributes = table.getSd.getCols.map(_.toAttribute)
+  val attributes = hiveQlTable.getCols.map(_.toAttribute) 
 
   val output = attributes ++ partitionKeys
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e6e37701/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
index a321475..5b83b77 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala
@@ -86,10 +86,14 @@ case class HiveTableScan(
     ColumnProjectionUtils.appendReadColumnIDs(hiveConf, neededColumnIDs)
     ColumnProjectionUtils.appendReadColumnNames(hiveConf, attributes.map(_.name))
 
+    val tableDesc = relation.tableDesc
+    val deserializer = tableDesc.getDeserializerClass.newInstance
+    deserializer.initialize(hiveConf, tableDesc.getProperties)
+
     // Specifies types and object inspectors of columns to be scanned.
     val structOI = ObjectInspectorUtils
       .getStandardObjectInspector(
-        relation.tableDesc.getDeserializer.getObjectInspector,
+        deserializer.getObjectInspector,
         ObjectInspectorCopyOption.JAVA)
       .asInstanceOf[StructObjectInspector]
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e6e37701/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 2829105..3e10077 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -802,6 +802,9 @@ class HiveQuerySuite extends HiveComparisonTest {
     clear()
   }
 
+  createQueryTest("select from thrift based table",
+    "SELECT * from src_thrift")
+  
   // Put tests that depend on specific Hive settings before these last two test,
   // since they modify /clear stuff.
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message