spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject spark git commit: [SPARK-10113][SQL] Explicit error message for unsigned Parquet logical types
Date Thu, 12 Nov 2015 20:29:53 GMT
Repository: spark
Updated Branches:
  refs/heads/master 4fe99c72c -> f5a9526fe


[SPARK-10113][SQL] Explicit error message for unsigned Parquet logical types

Parquet supports some unsigned datatypes. However, Since Spark does not support unsigned datatypes,
it needs to emit an exception with a clear message rather then with the one saying illegal
datatype.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #9646 from HyukjinKwon/SPARK-10113.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f5a9526f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f5a9526f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f5a9526f

Branch: refs/heads/master
Commit: f5a9526fec284cccd0755d190c91e8d9999f7877
Parents: 4fe99c7
Author: hyukjinkwon <gurwls223@gmail.com>
Authored: Thu Nov 12 12:29:50 2015 -0800
Committer: Michael Armbrust <michael@databricks.com>
Committed: Thu Nov 12 12:29:50 2015 -0800

----------------------------------------------------------------------
 .../parquet/CatalystSchemaConverter.scala       |  7 ++++++
 .../datasources/parquet/ParquetIOSuite.scala    | 24 ++++++++++++++++++++
 2 files changed, 31 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f5a9526f/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
index 7f3394c..f28a18e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystSchemaConverter.scala
@@ -108,6 +108,9 @@ private[parquet] class CatalystSchemaConverter(
     def typeString =
       if (originalType == null) s"$typeName" else s"$typeName ($originalType)"
 
+    def typeNotSupported() =
+      throw new AnalysisException(s"Parquet type not supported: $typeString")
+
     def typeNotImplemented() =
       throw new AnalysisException(s"Parquet type not yet supported: $typeString")
 
@@ -142,6 +145,9 @@ private[parquet] class CatalystSchemaConverter(
           case INT_32 | null => IntegerType
           case DATE => DateType
           case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT32)
+          case UINT_8 => typeNotSupported()
+          case UINT_16 => typeNotSupported()
+          case UINT_32 => typeNotSupported()
           case TIME_MILLIS => typeNotImplemented()
           case _ => illegalType()
         }
@@ -150,6 +156,7 @@ private[parquet] class CatalystSchemaConverter(
         originalType match {
           case INT_64 | null => LongType
           case DECIMAL => makeDecimalType(MAX_PRECISION_FOR_INT64)
+          case UINT_64 => typeNotSupported()
           case TIMESTAMP_MILLIS => typeNotImplemented()
           case _ => illegalType()
         }

http://git-wip-us.apache.org/repos/asf/spark/blob/f5a9526f/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 7274479..82a42d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -206,6 +206,30 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSQLContext
{
     }
   }
 
+  test("SPARK-10113 Support for unsigned Parquet logical types") {
+    val parquetSchema = MessageTypeParser.parseMessageType(
+      """message root {
+        |  required int32 c(UINT_32);
+        |}
+      """.stripMargin)
+
+    withTempPath { location =>
+      val extraMetadata = Map.empty[String, String].asJava
+      val fileMetadata = new FileMetaData(parquetSchema, extraMetadata, "Spark")
+      val path = new Path(location.getCanonicalPath)
+      val footer = List(
+        new Footer(path, new ParquetMetadata(fileMetadata, Collections.emptyList()))
+      ).asJava
+
+      ParquetFileWriter.writeMetadataFile(sparkContext.hadoopConfiguration, path, footer)
+
+      val errorMessage = intercept[Throwable] {
+        sqlContext.read.parquet(path.toString).printSchema()
+      }.toString
+      assert(errorMessage.contains("Parquet type not supported"))
+    }
+  }
+
   test("compression codec") {
     def compressionCodecFor(path: String, codecName: String): String = {
       val codecs = for {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message