hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1594262 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/ test/queries/clientpositive/ test/results/clientpositive/ test/results/compiler/plan/
Date Tue, 13 May 2014 16:17:32 GMT
Author: hashutosh
Date: Tue May 13 16:17:31 2014
New Revision: 1594262

URL: http://svn.apache.org/r1594262
Log:
HIVE-7012 : Wrong RS de-duplication in the ReduceSinkDeDuplication Optimizer (Navis via Ashutosh
Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
    hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
    hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out
    hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
    hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml
    hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchAggregation.java
Tue May 13 16:17:31 2014
@@ -100,7 +100,7 @@ public class SimpleFetchAggregation impl
 
       for (AggregationDesc aggregation : cGBY.getConf().getAggregators()) {
         List<ExprNodeDesc> parameters = aggregation.getParameters();
-        aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, pGBY));
+        aggregation.setParameters(ExprNodeDescUtils.backtrack(parameters, cGBY, RS));
       }
 
       pctx.setFetchTabledesc(tsDesc);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Tue May 13
16:17:31 2014
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -2710,6 +2711,7 @@ public class SemanticAnalyzer extends Ba
             fetchFilesNotInLocalFilesystem(stripQuotes(trfm.getChild(execPos).getText())),
             inInfo, inRecordWriter, outInfo, outRecordReader, errRecordReader, errInfo),
         new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch);
+    output.setColumnExprMap(Collections.emptyMap());  // disable backtracking
 
     return output;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java Tue May 13
16:17:31 2014
@@ -225,11 +225,11 @@ public class ExprNodeDescUtils {
   private static ExprNodeDesc backtrack(ExprNodeColumnDesc column, Operator<?> current,
       Operator<?> terminal) throws SemanticException {
     Map<String, ExprNodeDesc> mapping = current.getColumnExprMap();
-    if (mapping == null || !mapping.containsKey(column.getColumn())) {
+    if (mapping == null) {
       return backtrack((ExprNodeDesc)column, current, terminal);
     }
     ExprNodeDesc mapped = mapping.get(column.getColumn());
-    return backtrack(mapped, current, terminal);
+    return mapped == null ? null : backtrack(mapped, current, terminal);
   }
 
   public static Operator<?> getSingleParent(Operator<?> current, Operator<?>
terminal)

Modified: hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/reduce_deduplicate_extended.q Tue May 13
16:17:31 2014
@@ -11,7 +11,7 @@ explain select key, sum(key) from (selec
 explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group
by key, lower(value);
 explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by
key) Q1 group by key, (X + 1);
 -- mGBY-RS-rGBY-RS
-explain select key, sum(key) as value from src group by key order by key, value;
+explain select key, sum(key) as value from src group by key order by key;
 -- RS-JOIN-mGBY-RS-rGBY
 explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value;
 -- RS-JOIN-RS
@@ -23,7 +23,7 @@ explain select key, count(distinct value
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group
by key;
 select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key,
lower(value);
 select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1
group by key, (X + 1);
-select key, sum(key) as value from src group by key order by key, value;
+select key, sum(key) as value from src group by key order by key;
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
 from (select key, value from src group by key, value) s select s.key group by s.key;
@@ -36,7 +36,7 @@ explain select key, sum(key) from (selec
 explain select key, sum(key), lower(value) from (select * from src order by key) Q1 group
by key, lower(value);
 explain select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by
key) Q1 group by key, (X + 1);
 -- RS-GBY-RS
-explain select key, sum(key) as value from src group by key order by key, value;
+explain select key, sum(key) as value from src group by key order by key;
 -- RS-JOIN-RS-GBY
 explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key,
src.value;
 -- RS-JOIN-RS
@@ -48,7 +48,7 @@ explain select key, count(distinct value
 select key, sum(key) from (select * from src distribute by key sort by key, value) Q1 group
by key;
 select key, sum(key), lower(value) from (select * from src order by key) Q1 group by key,
lower(value);
 select key, sum(key), (X + 1) from (select key, (value + 1) as X from src order by key) Q1
group by key, (X + 1);
-select key, sum(key) as value from src group by key order by key, value;
+select key, sum(key) as value from src group by key order by key;
 select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key group by src.key, src.value;
 select src.key, src.value FROM src JOIN src1 ON src.key = src1.key order by src.key, src.value;
 from (select key, value from src group by key, value) s select s.key group by s.key;

Modified: hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ppd2.q.out Tue May 13 16:17:31 2014
@@ -28,6 +28,7 @@ where b.cc>1
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
   Stage-0 is a root stage
 
 STAGE PLANS:
@@ -48,8 +49,8 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: bigint)
-                  sort order: +-
+                  key expressions: _col0 (type: string)
+                  sort order: +
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
                   value expressions: _col1 (type: bigint)
@@ -69,11 +70,35 @@ STAGE PLANS:
               Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
                 table:
-                    input format: org.apache.hadoop.mapred.TextInputFormat
-                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: +-
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col0 (type: string), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col0 (type: string), _col1 (type: bigint)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator
@@ -419,6 +444,7 @@ where b.cc>1
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
   Stage-0 is a root stage
 
 STAGE PLANS:
@@ -442,8 +468,8 @@ STAGE PLANS:
                   outputColumnNames: _col0, _col1
                   Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
                   Reduce Output Operator
-                    key expressions: _col0 (type: string), _col1 (type: bigint)
-                    sort order: +-
+                    key expressions: _col0 (type: string)
+                    sort order: +
                     Map-reduce partition columns: _col0 (type: string)
                     Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column
stats: NONE
                     value expressions: _col1 (type: bigint)
@@ -458,20 +484,40 @@ STAGE PLANS:
             expressions: _col0 (type: string), _col1 (type: bigint)
             outputColumnNames: _col0, _col1
             Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats:
NONE
-            Filter Operator
-              predicate: (_col1 > 1) (type: boolean)
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col0 (type: string), _col1 (type: bigint)
+              sort order: +-
+              Map-reduce partition columns: _col0 (type: string)
+              Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats:
NONE
+              value expressions: _col0 (type: string), _col1 (type: bigint)
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE
+          Filter Operator
+            predicate: (_col1 > 1) (type: boolean)
+            Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: _col0 (type: string), _col1 (type: bigint)
+              outputColumnNames: _col0, _col1
               Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
-              Select Operator
-                expressions: _col0 (type: string), _col1 (type: bigint)
-                outputColumnNames: _col0, _col1
+              File Output Operator
+                compressed: false
                 Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
-                File Output Operator
-                  compressed: false
-                  Statistics: Num rows: 4 Data size: 801 Basic stats: COMPLETE Column stats:
NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out Tue May
13 16:17:31 2014
@@ -171,10 +171,10 @@ STAGE PLANS:
       limit: -1
 
 PREHOOK: query: -- mGBY-RS-rGBY-RS
-explain select key, sum(key) as value from src group by key order by key, value
+explain select key, sum(key) as value from src group by key order by key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- mGBY-RS-rGBY-RS
-explain select key, sum(key) as value from src group by key order by key, value
+explain select key, sum(key) as value from src group by key order by key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -198,8 +198,8 @@ STAGE PLANS:
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
                 Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: double)
-                  sort order: ++
+                  key expressions: _col0 (type: string)
+                  sort order: +
                   Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
                   value expressions: _col1 (type: double)
@@ -1467,11 +1467,11 @@ POSTHOOK: Input: default@src
 96	96.0	NULL
 97	194.0	NULL
 98	196.0	NULL
-PREHOOK: query: select key, sum(key) as value from src group by key order by key, value
+PREHOOK: query: select key, sum(key) as value from src group by key order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: select key, sum(key) as value from src group by key order by key, value
+POSTHOOK: query: select key, sum(key) as value from src group by key order by key
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
@@ -2655,10 +2655,10 @@ STAGE PLANS:
       limit: -1
 
 PREHOOK: query: -- RS-GBY-RS
-explain select key, sum(key) as value from src group by key order by key, value
+explain select key, sum(key) as value from src group by key order by key
 PREHOOK: type: QUERY
 POSTHOOK: query: -- RS-GBY-RS
-explain select key, sum(key) as value from src group by key order by key, value
+explain select key, sum(key) as value from src group by key order by key
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
@@ -2676,8 +2676,8 @@ STAGE PLANS:
               outputColumnNames: key
               Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
               Reduce Output Operator
-                key expressions: key (type: string), _col1 (type: double)
-                sort order: ++
+                key expressions: key (type: string)
+                sort order: +
                 Map-reduce partition columns: key (type: string)
                 Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats:
NONE
       Reduce Operator Tree:
@@ -3927,11 +3927,11 @@ POSTHOOK: Input: default@src
 96	96.0	NULL
 97	194.0	NULL
 98	196.0	NULL
-PREHOOK: query: select key, sum(key) as value from src group by key order by key, value
+PREHOOK: query: select key, sum(key) as value from src group by key order by key
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: select key, sum(key) as value from src group by key order by key, value
+POSTHOOK: query: select key, sum(key) as value from src group by key order by key
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####

Modified: hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/input20.q.xml Tue May 13 16:17:31 2014
@@ -384,6 +384,9 @@
                 </object> 
                </void> 
               </void> 
+              <void property="columnExprMap"> 
+               <object id="Collections$EmptyMap0" class="java.util.Collections" method="emptyMap"/>

+              </void> 
               <void property="conf"> 
                <object class="org.apache.hadoop.hive.ql.plan.ScriptDesc"> 
                 <void property="errRecordReaderClass"> 
@@ -1028,6 +1031,9 @@
                 </object> 
                </void> 
               </void> 
+              <void property="columnExprMap"> 
+               <object idref="Collections$EmptyMap0"/> 
+              </void> 
               <void property="conf"> 
                <object class="org.apache.hadoop.hive.ql.plan.ScriptDesc"> 
                 <void property="errRecordReaderClass"> 

Modified: hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/input4.q.xml Tue May 13 16:17:31 2014
@@ -592,6 +592,9 @@
                 </object> 
                </void> 
               </void> 
+              <void property="columnExprMap"> 
+               <object class="java.util.Collections" method="emptyMap"/> 
+              </void> 
               <void property="conf"> 
                <object class="org.apache.hadoop.hive.ql.plan.ScriptDesc"> 
                 <void property="errRecordReaderClass"> 

Modified: hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml?rev=1594262&r1=1594261&r2=1594262&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml (original)
+++ hive/trunk/ql/src/test/results/compiler/plan/input5.q.xml Tue May 13 16:17:31 2014
@@ -540,6 +540,9 @@
                 </object> 
                </void> 
               </void> 
+              <void property="columnExprMap"> 
+               <object class="java.util.Collections" method="emptyMap"/> 
+              </void> 
               <void property="conf"> 
                <object class="org.apache.hadoop.hive.ql.plan.ScriptDesc"> 
                 <void property="errRecordReaderClass"> 



Mime
View raw message