hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gop...@apache.org
Subject [49/82] [abbrv] hive git commit: HIVE-10677 : hive.exec.parallel=true has problem when it is used for analyze table column stats (Pengcheng Xiong via Ashutosh Chauhan)
Date Fri, 29 May 2015 00:50:56 GMT
HIVE-10677 : hive.exec.parallel=true has problem when it is used for analyze table column stats
(Pengcheng Xiong via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/d75a5414
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/d75a5414
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/d75a5414

Branch: refs/heads/llap
Commit: d75a5414552a23a6c07413deee6234bcb2b1ff74
Parents: d823fc8
Author: Pengcheng Xiong <pxiong@hortonworks.com>
Authored: Thu May 21 13:30:00 2015 -0700
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Sat May 23 12:34:35 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/TaskCompiler.java      |  4 +-
 .../clientpositive/exec_parallel_column_stats.q |  5 ++
 .../clientpositive/columnstats_partlvl.q.out    | 14 +++---
 .../clientpositive/columnstats_partlvl_dp.q.out |  8 +--
 .../clientpositive/columnstats_tbllvl.q.out     | 14 +++---
 .../clientpositive/compute_stats_date.q.out     |  2 +-
 .../display_colstats_tbllvl.q.out               |  6 +--
 .../exec_parallel_column_stats.q.out            | 51 ++++++++++++++++++++
 .../temp_table_display_colstats_tbllvl.q.out    |  6 +--
 9 files changed, 84 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
index b4d5382..ba11e41 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
@@ -329,7 +329,9 @@ public abstract class TaskCompiler {
         colName, colType, isTblLevel);
     cStatsWork = new ColumnStatsWork(fetch, cStatsDesc);
     cStatsTask = (ColumnStatsTask) TaskFactory.get(cStatsWork, conf);
-    rootTasks.add(cStatsTask);
+    // This is a column stats task. According to the semantic, there should be
+    // only one MR task in the rootTask.
+    rootTasks.get(0).addDependentTask(cStatsTask);
   }
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q
new file mode 100644
index 0000000..ceacc24
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/exec_parallel_column_stats.q
@@ -0,0 +1,5 @@
+set hive.exec.parallel=true;
+
+explain analyze table src compute statistics for columns;
+
+analyze table src compute statistics for columns;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index 3c22d40..e0c4cfe 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -38,7 +38,7 @@ analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -105,7 +105,7 @@ TOK_ANALYZE
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -238,7 +238,7 @@ analyze table Employee_Part partition (employeeSalary=4000.0) compute
statistics
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -305,7 +305,7 @@ TOK_ANALYZE
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -438,7 +438,7 @@ analyze table Employee_Part partition (employeeSalary=2000.0) compute
statistics
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -518,7 +518,7 @@ analyze table Employee_Part  compute statistics for columns
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -600,7 +600,7 @@ analyze table Employee_Part  compute statistics for columns
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 18a6909..aaf9d91 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -76,7 +76,7 @@ analyze table Employee_Part partition (employeeSalary='4000.0', country)
compute
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -149,7 +149,7 @@ analyze table Employee_Part partition (employeeSalary='2000.0') compute
statisti
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -233,7 +233,7 @@ analyze table Employee_Part partition (employeeSalary) compute statistics
for co
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -314,7 +314,7 @@ analyze table Employee_Part partition (employeeSalary,country) compute
statistic
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
index 002823c..19283bb 100644
--- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
@@ -46,7 +46,7 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP,
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -104,7 +104,7 @@ TOK_ANALYZE
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -223,7 +223,7 @@ analyze table default.UserVisits_web_text_none compute statistics for
columns
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -323,7 +323,7 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -447,7 +447,7 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns
sour
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -506,7 +506,7 @@ TOK_ANALYZE
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -625,7 +625,7 @@ analyze table dummydb.UserVisits_in_dummy_db compute statistics for columns
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/compute_stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out
index 53cc107..b57a862 100644
--- a/ql/src/test/results/clientpositive/compute_stats_date.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out
@@ -56,7 +56,7 @@ analyze table tab_date compute statistics for columns fl_date
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
index 53f06d9..7c91248 100644
--- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
@@ -62,7 +62,7 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP,
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -120,7 +120,7 @@ TOK_ANALYZE
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -295,7 +295,7 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out
new file mode 100644
index 0000000..2e96bb6
--- /dev/null
+++ b/ql/src/test/results/clientpositive/exec_parallel_column_stats.q.out
@@ -0,0 +1,51 @@
+PREHOOK: query: explain analyze table src compute statistics for columns
+PREHOOK: type: QUERY
+POSTHOOK: query: explain analyze table src compute statistics for columns
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+  Stage-1 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-0
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: key, value
+              Group By Operator
+                aggregations: compute_stats(key, 16), compute_stats(value, 16)
+                mode: hash
+                outputColumnNames: _col0, _col1
+                Reduce Output Operator
+                  sort order: 
+                  value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>),
_col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-1
+    Column Stats Work
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.src
+
+PREHOOK: query: analyze table src compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table src compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####

http://git-wip-us.apache.org/repos/asf/hive/blob/d75a5414/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
index 3f63cbb..cfa88ab 100644
--- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
@@ -70,7 +70,7 @@ analyze table UserVisits_web_text_none compute statistics for columns sourceIP,
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -128,7 +128,7 @@ TOK_ANALYZE
 
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0
@@ -303,7 +303,7 @@ analyze table empty_tab compute statistics for columns a,b,c,d,e
 POSTHOOK: type: QUERY
 STAGE DEPENDENCIES:
   Stage-0 is a root stage
-  Stage-1 is a root stage
+  Stage-1 depends on stages: Stage-0
 
 STAGE PLANS:
   Stage: Stage-0


Mime
View raw message