Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7CB351163E for ; Tue, 13 May 2014 16:24:35 +0000 (UTC) Received: (qmail 76148 invoked by uid 500); 13 May 2014 16:17:55 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 75968 invoked by uid 500); 13 May 2014 16:17:55 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 75822 invoked by uid 99); 13 May 2014 16:17:55 -0000 Received: from Unknown (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 13 May 2014 16:17:55 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 13 May 2014 16:17:52 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id C2DB22388999; Tue, 13 May 2014 16:17:32 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1594262 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/ test/results/clientpositive/ test/results/compiler/plan/ Date: Tue, 13 May 2014 16:17:32 -0000 To: commits@hive.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140513161732.C2DB22388999@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: hashutosh Date: Tue May 13 16:17:31 2014 New Revision: 1594262 URL: http://svn.apache.org/r1594262 Log: HIVE-7012 : Wrong RS de-duplication in the ReduceSinkDeDuplication Optimizer (Navis via Ashutosh Chauhan) Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java Tue May 13 16:17:31 2014 @@ -100,7 +100,7 @@ public class SimpleFetchAggregation impl for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) { List parameters = aggregation.getParameters(); - aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, pGBY)); + aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, RS)); } pctx.setFetchTabledesc(tsDesc); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue May 13 16:17:31 2014 @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -2710,6 +2711,7 @@ public class SemanticAnalyzer extends Ba fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())), inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); + output.setColumnExprMap(Collections.emptyMap()); // disable backtracking return output; } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java Tue May 13 16:17:31 2014 @@ -225,11 +225,11 @@ public class ExprNodeDescUtils { private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator current, Operator terminal) throws SemanticException { Map mapping = current.getColumnExprMap(); - if (mapping == null || !mapping.containsKey(column.getColumn())) { + if (mapping == null) { return backtrack((ExprNodeDesc)column, current, terminal); } ExprNodeDesc mapped = mapping.get(column.getColumn()); - return backtrack(mapped, current, terminal); + return mapped == null ? null : backtrack(mapped, current, terminal); } public static Operator getSingleParent(Operator current, Operator terminal) Modified: hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q Tue May 13 16:17:31 2014 @@ -11,7 +11,7 @@ explain select key, sum(key) from (selec explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -- mGBY-RS-rGBY-RS -explain select key, sum(key) as value from src group by key order by key, value; +explain select key, sum(key) as value from src group by key order by key; -- RS-JOIN-mGBY-RS-rGBY explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; -- RS-JOIN-RS @@ -23,7 +23,7 @@ explain select key, count(distinct value select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key; select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -select key, sum(key) as value from src group by key order by key, value; +select key, sum(key) as value from src group by key order by key; select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; from (select key, value from src group by key, value) s select s.key group by s.key; @@ -36,7 +36,7 @@ explain select key, sum(key) from (selec explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -- RS-GBY-RS -explain select key, sum(key) as value from src group by key order by key, value; +explain select key, sum(key) as value from src group by key order by key; -- RS-JOIN-RS-GBY explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; -- RS-JOIN-RS @@ -48,7 +48,7 @@ explain select key, count(distinct value select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group by key; select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key, lower(value); select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1 group by key, (X + 1); -select key, sum(key) as value from src group by key order by key, value; +select key, sum(key) as value from src group by key order by key; select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value; select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value; from (select key, value from src group by key, value) s select s.key group by s.key; Modified: hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out Tue May 13 16:17:31 2014 @@ -28,6 +28,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -48,8 +49,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) @@ -69,11 +70,35 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -419,6 +444,7 @@ where b.cc>1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -442,8 +468,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: bigint) - sort order: +- + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) @@ -458,20 +484,40 @@ STAGE PLANS: expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col1 > 1) (type: boolean) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: bigint) + sort order: +- + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reduce Operator Tree: + Extract + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col1 > 1) (type: boolean) + Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator Modified: hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue May 13 16:17:31 2014 @@ -171,10 +171,10 @@ STAGE PLANS: limit: -1 PREHOOK: query: -- mGBY-RS-rGBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY POSTHOOK: query: -- mGBY-RS-rGBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -198,8 +198,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: double) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) @@ -1467,11 +1467,11 @@ POSTHOOK: Input: default@src 96 96.0 NULL 97 194.0 NULL 98 196.0 NULL -PREHOOK: query: select key, sum(key) as value from src group by key order by key, value +PREHOOK: query: select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select key, sum(key) as value from src group by key order by key, value +POSTHOOK: query: select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### @@ -2655,10 +2655,10 @@ STAGE PLANS: limit: -1 PREHOOK: query: -- RS-GBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY POSTHOOK: query: -- RS-GBY-RS -explain select key, sum(key) as value from src group by key order by key, value +explain select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -2676,8 +2676,8 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: key (type: string), _col1 (type: double) - sort order: ++ + key expressions: key (type: string) + sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: @@ -3927,11 +3927,11 @@ POSTHOOK: Input: default@src 96 96.0 NULL 97 194.0 NULL 98 196.0 NULL -PREHOOK: query: select key, sum(key) as value from src group by key order by key, value +PREHOOK: query: select key, sum(key) as value from src group by key order by key PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select key, sum(key) as value from src group by key order by key, value +POSTHOOK: query: select key, sum(key) as value from src group by key order by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### Modified: hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml (original) +++ hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml Tue May 13 16:17:31 2014 @@ -384,6 +384,9 @@ + + + @@ -1028,6 +1031,9 @@ + + + Modified: hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml (original) +++ hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml Tue May 13 16:17:31 2014 @@ -592,6 +592,9 @@ + + + Modified: hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml?rev=1594262&r1=1594261&r2=1594262&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml (original) +++ hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml Tue May 13 16:17:31 2014 @@ -540,6 +540,9 @@ + + +