hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject hive git commit: HIVE-10855: Make HIVE-10568 work with Spark [Spark Branch] (Rui reviewed by Xuefu)
Date Sun, 02 Aug 2015 02:49:23 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 399168ccf -> b72766d5f


HIVE-10855: Make HIVE-10568 work with Spark [Spark Branch] (Rui reviewed by Xuefu)

Conflicts:

	ql/src/test/results/clientpositive/spark/auto_join18.q.out
	ql/src/test/results/clientpositive/spark/groupby_position.q.out
	ql/src/test/results/clientpositive/spark/join18.q.out


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b72766d5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b72766d5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b72766d5

Branch: refs/heads/branch-1
Commit: b72766d5f0a0bc34870a6bee9267e49c7a757c87
Parents: 399168c
Author: Rui Li <rui.li@intel.com>
Authored: Fri Jun 12 09:31:18 2015 +0800
Committer: xzhang <xzhang@xzdt>
Committed: Sat Aug 1 19:47:09 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  2 +-
 .../clientpositive/spark/auto_join32.q.out      | 24 ++++++++-------
 .../clientpositive/spark/limit_pushdown.q.out   | 31 +++++++++++---------
 .../spark/vector_count_distinct.q.out           | 30 +++++++++++++++----
 4 files changed, 56 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b72766d5/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index c5b0afe..130d525 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -920,7 +920,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       //0. Distinct aggregate rewrite
       // Run this optimization early, since it is expanding the operator pipeline.
-      if (conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") &&
+      if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") &&
           conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) {
         // Its not clear, if this rewrite is always performant on MR, since extra map phase
         // introduced for 2nd MR job may offset gains of this multi-stage aggregation.

http://git-wip-us.apache.org/repos/asf/hive/blob/b72766d5/ql/src/test/results/clientpositive/spark/auto_join32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/auto_join32.q.out b/ql/src/test/results/clientpositive/spark/auto_join32.q.out
index e26e4a2..316792b 100644
--- a/ql/src/test/results/clientpositive/spark/auto_join32.q.out
+++ b/ql/src/test/results/clientpositive/spark/auto_join32.q.out
@@ -450,10 +450,9 @@ STAGE PLANS:
                           outputColumnNames: _col0, _col1
                           Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE
                           Group By Operator
-                            aggregations: count(DISTINCT _col1)
                             keys: _col0 (type: string), _col1 (type: string)
                             mode: hash
-                            outputColumnNames: _col0, _col1, _col2
+                            outputColumnNames: _col0, _col1
                             Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column
stats: NONE
                             Reduce Output Operator
                               key expressions: _col0 (type: string), _col1 (type: string)
@@ -463,18 +462,23 @@ STAGE PLANS:
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(DISTINCT KEY._col1:0._col0)
-                keys: KEY._col0 (type: string)
+                keys: KEY._col0 (type: string), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                File Output Operator
-                  compressed: false
+                Group By Operator
+                  aggregations: count(_col1)
+                  keys: _col0 (type: string)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
                   Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats:
NONE
-                  table:
-                      input format: org.apache.hadoop.mapred.TextInputFormat
-                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats:
NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.TextInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/b72766d5/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
index 1efa9e7..4a0306d 100644
--- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
@@ -476,35 +476,38 @@ STAGE PLANS:
                     outputColumnNames: _col0, _col1
                     Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column
stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT _col1)
                       keys: _col0 (type: tinyint), _col1 (type: double)
                       mode: hash
-                      outputColumnNames: _col0, _col1, _col2
+                      outputColumnNames: _col0, _col1
                       Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: double)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: tinyint)
                         Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE
Column stats: NONE
-                        TopN Hash Memory Usage: 0.3
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(DISTINCT KEY._col1:0._col0)
-                keys: KEY._col0 (type: tinyint)
+                keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
                 Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column
stats: NONE
-                Limit
-                  Number of rows: 20
-                  Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats:
NONE
-                  File Output Operator
-                    compressed: false
+                Group By Operator
+                  aggregations: count(_col1)
+                  keys: _col0 (type: tinyint)
+                  mode: complete
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column
stats: NONE
+                  Limit
+                    Number of rows: 20
                     Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column
stats: NONE
-                    table:
-                        input format: org.apache.hadoop.mapred.TextInputFormat
-                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column
stats: NONE
+                      table:
+                          input format: org.apache.hadoop.mapred.TextInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-0
     Fetch Operator

http://git-wip-us.apache.org/repos/asf/hive/blob/b72766d5/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
index fecfe0a..aaea86d 100644
--- a/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out
@@ -1247,7 +1247,8 @@ STAGE PLANS:
   Stage: Stage-1
     Spark
       Edges:
-        Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1)
+        Reducer 2 <- Map 1 (GROUP, 2)
+        Reducer 3 <- Reducer 2 (GROUP, 1)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
@@ -1260,30 +1261,47 @@ STAGE PLANS:
                     outputColumnNames: _col0
                     Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE Column
stats: NONE
                     Group By Operator
-                      aggregations: count(DISTINCT _col0)
                       keys: _col0 (type: int)
                       mode: hash
-                      outputColumnNames: _col0, _col1
+                      outputColumnNames: _col0
                       Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE
Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
                         Statistics: Num rows: 2000 Data size: 3504000 Basic stats: COMPLETE
Column stats: NONE
             Execution mode: vectorized
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
-                aggregations: count(DISTINCT KEY._col0:0._col0)
+                keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                Statistics: Num rows: 1000 Data size: 1752000 Basic stats: COMPLETE Column
stats: NONE
+                Group By Operator
+                  aggregations: count(_col0)
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats:
NONE
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
 
   Stage: Stage-0
     Fetch Operator


Mime
View raw message