Mailing-List: contact commits-help@hive.apache.org; run by ezmlm
Precedence: bulk
Reply-To: hive-dev@hive.apache.org
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: harisankar@apache.org
To: commits@hive.apache.org
Date: Tue, 11 Aug 2015 01:00:41 -0000
Message-Id: <7675d9f0694b47d5a81fb862c41e53e1@git.apache.org>
Subject: [1/2] hive git commit: HIVE-11387: CBO: Calcite Operator To Hive
 Operator (Calcite Return Path) : fix reduce_deduplicate optimization
 (Pengcheng Xiong, reviewed by Jesus Camacho Rodriguez, Hari Subramaniyan)

Repository: hive
Updated Branches:
  refs/heads/master bddbd1da0 -> 538ae7036


http://git-wip-us.apache.org/repos/asf/hive/blob/538ae703/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
index 6960bee..32514ca 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
@@ -1855,8 +1855,7 @@ STAGE PLANS:
     Spark
       Edges:
         Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2)
-        Reducer 3 <- Reducer 2 (GROUP, 2)
-        Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2)
+        Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1956,7 +1955,7 @@ STAGE PLANS:
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                         sort order: +++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: false
@@ -1968,22 +1967,6 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-                  tag: -1
-                  value expressions: _col2 (type: int)
-                  auto parallelism: false
-            Execution mode: vectorized
-        Reducer 4 
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition

http://git-wip-us.apache.org/repos/asf/hive/blob/538ae703/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
index 1c49f52..9756b0c 100644
--- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out
@@ -6931,13 +6931,12 @@ Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE)
 Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
 Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
 
 Stage-0
    Fetch Operator
       limit:-1
       Stage-1
-         Reducer 5
+         Reducer 4
          File Output Operator [FS_14]
             compressed:true
             Statistics:Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE
@@ -6948,52 +6947,42 @@ Stage-0
                PTF Operator [PTF_11]
                   Function definitions:[{"Input definition":{"type:":"WINDOWING"}},{"partition by:":"_col0","name:":"windowingtablefunction","order by:":"_col1"}]
                   Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                  Select Operator [SEL_10]
+                  Group By Operator [GBY_8]
+                  |  keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
                   |  outputColumnNames:["_col0","_col1","_col2"]
                   |  Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                  |<-Reducer 4 [SIMPLE_EDGE]
-                     Reduce Output Operator [RS_9]
-                        key expressions:_col0 (type: string), _col1 (type: string)
+                  |<-Reducer 3 [SIMPLE_EDGE]
+                     Reduce Output Operator [RS_7]
+                        key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: int)
                         Map-reduce partition columns:_col0 (type: string)
-                        sort order:++
+                        sort order:+++
                         Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                        value expressions:_col2 (type: int)
-                        Group By Operator [GBY_8]
+                        Group By Operator [GBY_6]
                         |  keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
                         |  outputColumnNames:["_col0","_col1","_col2"]
                         |  Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                        |<-Reducer 3 [SIMPLE_EDGE]
-                           Reduce Output Operator [RS_7]
-                              key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: int)
-                              Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        |<-Reducer 2 [SIMPLE_EDGE]
+                           Reduce Output Operator [RS_5]
+                              key expressions:_col2 (type: string), _col1 (type: string), _col5 (type: int)
+                              Map-reduce partition columns:rand() (type: double)
                               sort order:+++
-                              Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                              Group By Operator [GBY_6]
-                              |  keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int)
-                              |  outputColumnNames:["_col0","_col1","_col2"]
-                              |  Statistics:Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE
-                              |<-Reducer 2 [SIMPLE_EDGE]
-                                 Reduce Output Operator [RS_5]
-                                    key expressions:_col2 (type: string), _col1 (type: string), _col5 (type: int)
-                                    Map-reduce partition columns:rand() (type: double)
-                                    sort order:+++
-                                    Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
-                                    PTF Operator [PTF_3]
-                                       Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
-                                       Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
-                                       Select Operator [SEL_2]
-                                       |  outputColumnNames:["_col1","_col2","_col5"]
-                                       |  Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
-                                       |<-Map 1 [SIMPLE_EDGE]
-                                          Reduce Output Operator [RS_1]
-                                             key expressions:p_mfgr (type: string), p_name (type: string)
-                                             Map-reduce partition columns:p_mfgr (type: string)
-                                             sort order:++
-                                             Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
-                                             value expressions:p_size (type: int)
-                                             TableScan [TS_0]
-                                                alias:part
-                                                Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+                              Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                              PTF Operator [PTF_3]
+                                 Function definitions:[{"Input definition":{"type:":"TABLE"}},{"Partition table definition":{"partition by:":"_col2","name:":"noop","order by:":"_col1"}}]
+                                 Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                                 Select Operator [SEL_2]
+                                 |  outputColumnNames:["_col1","_col2","_col5"]
+                                 |  Statistics:Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE
+                                 |<-Map 1 [SIMPLE_EDGE]
+                                    Reduce Output Operator [RS_1]
+                                       key expressions:p_mfgr (type: string), p_name (type: string)
+                                       Map-reduce partition columns:p_mfgr (type: string)
+                                       sort order:++
+                                       Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
+                                       value expressions:p_size (type: int)
+                                       TableScan [TS_0]
+                                          alias:part
+                                          Statistics:Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: COMPLETE
 
 PREHOOK: query: explain
 select abc.* 

http://git-wip-us.apache.org/repos/asf/hive/blob/538ae703/ql/src/test/results/clientpositive/tez/ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/ptf.q.out b/ql/src/test/results/clientpositive/tez/ptf.q.out
index 88d1a98..b134440 100644
--- a/ql/src/test/results/clientpositive/tez/ptf.q.out
+++ b/ql/src/test/results/clientpositive/tez/ptf.q.out
@@ -869,7 +869,6 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -915,7 +914,7 @@ STAGE PLANS:
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                         sort order: +++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE
         Reducer 3 
             Reduce Operator Tree:
@@ -924,18 +923,6 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col2 (type: int)
-        Reducer 4 
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition

http://git-wip-us.apache.org/repos/asf/hive/blob/538ae703/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
index c2e9b1a..2dad1e7 100644
--- a/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
@@ -1857,7 +1857,6 @@ STAGE PLANS:
       Edges:
         Reducer 2 <- Map 1 (SIMPLE_EDGE)
         Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-        Reducer 4 <- Reducer 3 (SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1957,7 +1956,7 @@ STAGE PLANS:
                       Reduce Output Operator
                         key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
                         sort order: +++
-                        Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+                        Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         auto parallelism: true
@@ -1969,22 +1968,6 @@ STAGE PLANS:
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col0 (type: string), _col1 (type: string)
-                  sort order: ++
-                  Map-reduce partition columns: _col0 (type: string)
-                  Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-                  tag: -1
-                  value expressions: _col2 (type: int)
-                  auto parallelism: true
-            Execution mode: vectorized
-        Reducer 4 
-            Needs Tagging: false
-            Reduce Operator Tree:
-              Select Operator
-                expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int)
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition

http://git-wip-us.apache.org/repos/asf/hive/blob/538ae703/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
index a38548f..05363e4 100644
--- a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
+++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out
@@ -447,10 +447,9 @@ WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING)
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
-  Stage-2 depends on stages: Stage-1, Stage-4
-  Stage-3 depends on stages: Stage-2
-  Stage-4 is a root stage
-  Stage-0 depends on stages: Stage-3
+  Stage-2 depends on stages: Stage-1, Stage-3
+  Stage-3 is a root stage
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -507,7 +506,7 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: bigint)
                   sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           TableScan
             Union
@@ -520,7 +519,7 @@ STAGE PLANS:
                 Reduce Output Operator
                   key expressions: _col0 (type: string), _col1 (type: bigint)
                   sort order: ++
-                  Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
+                  Map-reduce partition columns: _col0 (type: string)
                   Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Group By Operator
@@ -528,27 +527,6 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: bigint)
-              sort order: ++
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint)
-          outputColumnNames: _col0, _col1
-          Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
@@ -581,7 +559,7 @@ STAGE PLANS:
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
-  Stage: Stage-4
+  Stage: Stage-3
     Map Reduce
       Map Operator Tree:
           TableScan

http://git-wip-us.apache.org/repos/asf/hive/blob/538ae703/ql/src/test/results/clientpositive/vectorized_ptf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/vectorized_ptf.q.out
index 79edb0e..e65a880 100644
--- a/ql/src/test/results/clientpositive/vectorized_ptf.q.out
+++ b/ql/src/test/results/clientpositive/vectorized_ptf.q.out
@@ -2001,8 +2001,7 @@ TOK_QUERY
 STAGE DEPENDENCIES:
   Stage-1 is a root stage
   Stage-2 depends on stages: Stage-1
-  Stage-3 depends on stages: Stage-2
-  Stage-0 depends on stages: Stage-3
+  Stage-0 depends on stages: Stage-2
 
 STAGE PLANS:
   Stage: Stage-1
@@ -2125,7 +2124,7 @@ STAGE PLANS:
             Reduce Output Operator
               key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int)
               sort order: +++
-              Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int)
+              Map-reduce partition columns: _col0 (type: string)
               Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE
               tag: -1
               auto parallelism: false
@@ -2161,68 +2160,6 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-          File Output Operator
-            compressed: false
-            GlobalTableId: 0
-#### A masked pattern was here ####
-            NumFilesPerFileSink: 1
-            table:
-                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-                properties:
-                  columns _col0,_col1,_col2
-                  columns.types string,string,int
-                  escape.delim \
-                  serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-            TotalFiles: 1
-            GatherStats: false
-            MultiFileSpray: false
-
-  Stage: Stage-3
-    Map Reduce
-      Map Operator Tree:
-          TableScan
-            GatherStats: false
-            Reduce Output Operator
-              key expressions: _col0 (type: string), _col1 (type: string)
-              sort order: ++
-              Map-reduce partition columns: _col0 (type: string)
-              Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
-              tag: -1
-              value expressions: _col2 (type: int)
-              auto parallelism: false
-      Path -> Alias:
-#### A masked pattern was here ####
-      Path -> Partition:
-#### A masked pattern was here ####
-          Partition
-            base file name: -mr-10003
-            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-            properties:
-              columns _col0,_col1,_col2
-              columns.types string,string,int
-              escape.delim \
-              serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-            serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-          
-              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
-              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
-              properties:
-                columns _col0,_col1,_col2
-                columns.types string,string,int
-                escape.delim \
-                serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-              serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
-      Truncated Path -> Alias:
-#### A masked pattern was here ####
-      Needs Tagging: false
-      Reduce Operator Tree:
-        Select Operator
-          expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int)
-          outputColumnNames: _col0, _col1, _col2
-          Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition