Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 2C122200B4B for ; Thu, 21 Jul 2016 16:08:42 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 2AA9D160A73; Thu, 21 Jul 2016 14:08:42 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 720D8160A72 for ; Thu, 21 Jul 2016 16:08:41 +0200 (CEST) Received: (qmail 69938 invoked by uid 500); 21 Jul 2016 14:08:40 -0000 Mailing-List: contact commits-help@spark.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list commits@spark.apache.org Received: (qmail 69929 invoked by uid 99); 21 Jul 2016 14:08:40 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 21 Jul 2016 14:08:40 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 880F8E1101; Thu, 21 Jul 2016 14:08:40 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: lian@apache.org To: commits@spark.apache.org Message-Id: <6abc9fdd8feb4531ba2cc0c357eac4c9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: spark git commit: [SPARK-16632][SQL] Revert PR #14272: Respect Hive schema when merging parquet schema Date: Thu, 21 Jul 2016 14:08:40 +0000 (UTC) archived-at: Thu, 21 Jul 2016 14:08:42 -0000 Repository: spark Updated Branches: refs/heads/master 6203668d5 -> 69626addd [SPARK-16632][SQL] Revert PR #14272: Respect Hive schema when merging parquet schema ## What changes were proposed in this pull request? PR #14278 is a more general and simpler fix for SPARK-16632 than PR #14272. After merging #14278, we no longer need changes made in #14272. So here I revert them. This PR targets both master and branch-2.0. ## How was this patch tested? Existing tests. Author: Cheng Lian Closes #14300 from liancheng/revert-pr-14272. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69626add Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69626add Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69626add Branch: refs/heads/master Commit: 69626adddc0441a4834b70a32e2d95b11d69a219 Parents: 6203668 Author: Cheng Lian Authored: Thu Jul 21 22:08:34 2016 +0800 Committer: Cheng Lian Committed: Thu Jul 21 22:08:34 2016 +0800 ---------------------------------------------------------------------- .../parquet/ParquetReadSupport.scala | 18 --------- .../parquet/ParquetSchemaSuite.scala | 39 -------------------- 2 files changed, 57 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/69626add/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala index 0bee874..8a2e0d7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala @@ -26,8 +26,6 @@ import org.apache.parquet.hadoop.api.{InitContext, ReadSupport} import org.apache.parquet.hadoop.api.ReadSupport.ReadContext import org.apache.parquet.io.api.RecordMaterializer import org.apache.parquet.schema._ -import org.apache.parquet.schema.OriginalType._ -import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._ import org.apache.parquet.schema.Type.Repetition import org.apache.spark.internal.Logging @@ -123,12 +121,6 @@ private[parquet] object ParquetReadSupport { } private def clipParquetType(parquetType: Type, catalystType: DataType): Type = { - val primName = if (parquetType.isPrimitive()) { - parquetType.asPrimitiveType().getPrimitiveTypeName() - } else { - null - } - catalystType match { case t: ArrayType if !isPrimitiveCatalystType(t.elementType) => // Only clips array types with nested type as element type. @@ -143,16 +135,6 @@ private[parquet] object ParquetReadSupport { case t: StructType => clipParquetGroup(parquetType.asGroupType(), t) - case _: ByteType if primName == INT32 => - // SPARK-16632: Handle case where Hive stores bytes in a int32 field without specifying - // the original type. - Types.primitive(INT32, parquetType.getRepetition()).as(INT_8).named(parquetType.getName()) - - case _: ShortType if primName == INT32 => - // SPARK-16632: Handle case where Hive stores shorts in a int32 field without specifying - // the original type. - Types.primitive(INT32, parquetType.getRepetition()).as(INT_16).named(parquetType.getName()) - case _ => // UDTs and primitive types are not clipped. For UDTs, a clipped version might not be able // to be mapped to desired user-space types. So UDTs shouldn't participate schema merging. http://git-wip-us.apache.org/repos/asf/spark/blob/69626add/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala index 31ebec0..8a980a7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala @@ -1581,43 +1581,4 @@ class ParquetSchemaSuite extends ParquetSchemaTest { | } |} """.stripMargin) - - testSchemaClipping( - "int32 parquet field with byte schema field", - - parquetSchema = - """message root { - | optional int32 value; - |} - """.stripMargin, - - catalystSchema = - new StructType() - .add("value", ByteType, nullable = true), - - expectedSchema = - """message root { - | optional int32 value (INT_8); - |} - """.stripMargin) - - testSchemaClipping( - "int32 parquet field with short schema field", - - parquetSchema = - """message root { - | optional int32 value; - |} - """.stripMargin, - - catalystSchema = - new StructType() - .add("value", ShortType, nullable = true), - - expectedSchema = - """message root { - | optional int32 value (INT_16); - |} - """.stripMargin) - } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org For additional commands, e-mail: commits-help@spark.apache.org