Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D4877200BE5 for ; Sat, 24 Dec 2016 18:36:42 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id D30EC160B2F; Sat, 24 Dec 2016 17:36:42 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 04025160B1E for ; Sat, 24 Dec 2016 18:36:41 +0100 (CET) Received: (qmail 58275 invoked by uid 500); 24 Dec 2016 17:36:41 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 58264 invoked by uid 99); 24 Dec 2016 17:36:41 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 24 Dec 2016 17:36:41 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1C286DFB01; Sat, 24 Dec 2016 17:36:41 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sunchao@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-15499: Nested column pruning: don't prune paths when a SerDe is used only for serializing (Chao Sun, reviewed by Ferdinand Xu) Date: Sat, 24 Dec 2016 17:36:41 +0000 (UTC) archived-at: Sat, 24 Dec 2016 17:36:43 -0000 Repository: hive Updated Branches: refs/heads/master ab0f9cab3 -> ac68aed6e HIVE-15499: Nested column pruning: don't prune paths when a SerDe is used only for serializing (Chao Sun, reviewed by Ferdinand Xu) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ac68aed6 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ac68aed6 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ac68aed6 Branch: refs/heads/master Commit: ac68aed6e1e7d253e589132ba8ac493b396c3408 Parents: ab0f9ca Author: Chao Sun Authored: Thu Dec 22 11:29:40 2016 -0800 Committer: Chao Sun Committed: Sat Dec 24 09:35:47 2016 -0800 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FileSinkOperator.java | 16 +++++++- .../clientpositive/nested_column_pruning.q | 12 ++++++ .../clientpositive/nested_column_pruning.q.out | 41 ++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java index 28d4789..3bbe92d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.plan.SkewedColumnPositionPair; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.stats.StatsCollectionContext; import org.apache.hadoop.hive.ql.stats.StatsPublisher; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.Serializer; @@ -355,7 +356,7 @@ public class FileSinkOperator extends TerminalOperator implements parent = Utilities.toTempPath(conf.getDirName()); statsFromRecordWriter = new boolean[numFiles]; serializer = (Serializer) conf.getTableInfo().getDeserializerClass().newInstance(); - serializer.initialize(hconf, conf.getTableInfo().getProperties()); + serializer.initialize(unsetNestedColumnPaths(hconf), conf.getTableInfo().getProperties()); outputClass = serializer.getSerializedClass(); if (isLogInfoEnabled) { @@ -1288,4 +1289,17 @@ public class FileSinkOperator extends TerminalOperator implements } return new String[] {fspKey, null}; } + + /** + * Check if nested column paths is set for 'conf'. + * If set, create a copy of 'conf' with this property unset. + */ + private Configuration unsetNestedColumnPaths(Configuration conf) { + if (conf.get(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR) != null) { + Configuration confCopy = new Configuration(conf); + confCopy.unset(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR); + return confCopy; + } + return conf; + } } http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/test/queries/clientpositive/nested_column_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q b/ql/src/test/queries/clientpositive/nested_column_pruning.q index 28b974e..b08b356 100644 --- a/ql/src/test/queries/clientpositive/nested_column_pruning.q +++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion = none; +set hive.exec.dynamic.partition.mode = nonstrict; -- First, create source tables DROP TABLE IF EXISTS dummy; @@ -110,3 +111,14 @@ SELECT t1.s1.f3.f5, t2.s2.f8 FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2 ON t1.s1.f3.f4 = t2.s1.f6 WHERE t2.s2.f8.f9 == TRUE; + +-- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3; +CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET; + +INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1; + +SELECT * FROM nested_tbl_3; http://git-wip-us.apache.org/repos/asf/hive/blob/ac68aed6/ql/src/test/results/clientpositive/nested_column_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/nested_column_pruning.q.out b/ql/src/test/results/clientpositive/nested_column_pruning.q.out index c501c6a..8d32df5 100644 --- a/ql/src/test/results/clientpositive/nested_column_pruning.q.out +++ b/ql/src/test/results/clientpositive/nested_column_pruning.q.out @@ -1091,3 +1091,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@nested_tbl_1 #### A masked pattern was here #### 5.0 {"f9":true,"f10":[10,11],"f11":{"key1":true,"key2":false}} +PREHOOK: query: -- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Testing insert with aliases + +DROP TABLE IF EXISTS nested_tbl_3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: CREATE TABLE nested_tbl_3 (f1 boolean, f2 string) PARTITIONED BY (f3 int) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nested_tbl_3 +PREHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_1 +PREHOOK: Output: default@nested_tbl_3 +POSTHOOK: query: INSERT OVERWRITE TABLE nested_tbl_3 PARTITION(f3) +SELECT s1.f1 AS f1, S1.f2 AS f2, s1.f6 AS f3 +FROM nested_tbl_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_1 +POSTHOOK: Output: default@nested_tbl_3@f3=4 +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f1 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +POSTHOOK: Lineage: nested_tbl_3 PARTITION(f3=4).f2 EXPRESSION [(nested_tbl_1)nested_tbl_1.FieldSchema(name:s1, type:struct,f6:int>, comment:null), ] +PREHOOK: query: SELECT * FROM nested_tbl_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@nested_tbl_3 +PREHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM nested_tbl_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nested_tbl_3 +POSTHOOK: Input: default@nested_tbl_3@f3=4 +#### A masked pattern was here #### +false foo 4