Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 81FA81845A for ; Tue, 17 Nov 2015 20:19:18 +0000 (UTC) Received: (qmail 79459 invoked by uid 500); 17 Nov 2015 20:19:14 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 79403 invoked by uid 500); 17 Nov 2015 20:19:14 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 79121 invoked by uid 99); 17 Nov 2015 20:19:14 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Nov 2015 20:19:14 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 8F85AE0451; Tue, 17 Nov 2015 20:19:14 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: omalley@apache.org To: commits@hive.apache.org Date: Tue, 17 Nov 2015 20:19:48 -0000 Message-Id: In-Reply-To: <114d786a19fe4a6eb35481fb948c8866@git.apache.org> References: <114d786a19fe4a6eb35481fb948c8866@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [37/43] hive git commit: HIVE-12396 : BucketingSortingReduceSinkOptimizer may still throw IOB exception for duplicate columns (Ashutosh Chauhan via Jesus Camacho Rodriguez) HIVE-12396 : BucketingSortingReduceSinkOptimizer may still throw IOB exception for duplicate columns (Ashutosh Chauhan via Jesus Camacho Rodriguez) Signed-off-by: Ashutosh Chauhan Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0b8e7098 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0b8e7098 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0b8e7098 Branch: refs/heads/master-fixed Commit: 0b8e70984d8ef44fb947101ff52d2d489befcf6f Parents: fd54e59 Author: Ashutosh Chauhan Authored: Thu Nov 12 11:28:33 2015 -0800 Committer: Owen O'Malley Committed: Tue Nov 17 12:18:34 2015 -0800 ---------------------------------------------------------------------- .../BucketingSortingReduceSinkOptimizer.java | 3 + .../clientpositive/insertoverwrite_bucket.q | 12 +++ .../clientpositive/insertoverwrite_bucket.q.out | 87 ++++++++++++++++++++ 3 files changed, 102 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/0b8e7098/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java index d5df34c..f59ab4b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java @@ -602,6 +602,9 @@ public class BucketingSortingReduceSinkOptimizer implements Transform { } for (int pos : sortPositions) { + if (pos >= selectDesc.getColList().size()) { + return null; + } ExprNodeDesc selectColList = selectDesc.getColList().get(pos); if (!(selectColList instanceof ExprNodeColumnDesc)) { return null; http://git-wip-us.apache.org/repos/asf/hive/blob/0b8e7098/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q index 5a10f94..50f9361 100644 --- a/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q +++ b/ql/src/test/queries/clientpositive/insertoverwrite_bucket.q @@ -25,12 +25,24 @@ CREATE TABLE temp1 ) CLUSTERED BY (num) SORTED BY (num) INTO 4 BUCKETS; explain insert overwrite table temp1 select data, data from bucketinput; +CREATE TABLE temp2 +( + create_ts STRING , + change STRING, + num STRING +) +CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS; +explain +INSERT OVERWRITE TABLE temp2 +SELECT change, change,num +FROM temp1; set hive.auto.convert.sortmerge.join=true; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data); drop table temp1; +drop table temp2; drop table buckettestinput; drop table buckettestoutput1; drop table buckettestoutput2; http://git-wip-us.apache.org/repos/asf/hive/blob/0b8e7098/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out index 4add20c..bfbe87b 100644 --- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out +++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out @@ -150,6 +150,85 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator +PREHOOK: query: CREATE TABLE temp2 +( + create_ts STRING , + change STRING, + num STRING +) +CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@temp2 +POSTHOOK: query: CREATE TABLE temp2 +( + create_ts STRING , + change STRING, + num STRING +) +CLUSTERED BY (create_ts) SORTED BY (num) INTO 4 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@temp2 +PREHOOK: query: explain +INSERT OVERWRITE TABLE temp2 +SELECT change, change,num +FROM temp1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +INSERT OVERWRITE TABLE temp2 +SELECT change, change,num +FROM temp1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: temp1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: change (type: string), num (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.temp2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.temp2 + + Stage: Stage-2 + Stats-Aggr Operator + PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data) PREHOOK: type: QUERY PREHOOK: Input: default@bucketoutput1 @@ -168,6 +247,14 @@ POSTHOOK: query: drop table temp1 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@temp1 POSTHOOK: Output: default@temp1 +PREHOOK: query: drop table temp2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@temp2 +PREHOOK: Output: default@temp2 +POSTHOOK: query: drop table temp2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@temp2 +POSTHOOK: Output: default@temp2 PREHOOK: query: drop table buckettestinput PREHOOK: type: DROPTABLE POSTHOOK: query: drop table buckettestinput