hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From w...@apache.org
Subject [5/7] hive git commit: HIVE-17475: Disable mapjoin using hint (Deepak Jaiswal reviewed by Jason Dere)
Date Fri, 08 Sep 2017 22:35:58 GMT
HIVE-17475: Disable mapjoin using hint (Deepak Jaiswal reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a74107ef
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a74107ef
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a74107ef

Branch: refs/heads/hive-14535
Commit: a74107ef1dfacd88921dbac1c694641302af6044
Parents: 5e57a2a
Author: Jason Dere <jdere@hortonworks.com>
Authored: Fri Sep 8 14:03:27 2017 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Fri Sep 8 14:03:27 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../hive/ql/optimizer/ConvertJoinMapJoin.java   |   4 +-
 .../hadoop/hive/ql/parse/ParseContext.java      |  10 +
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  27 +
 .../test/queries/clientpositive/mapjoin_hint.q  |  69 ++
 .../clientpositive/llap/mapjoin_hint.q.out      | 643 +++++++++++++++++++
 6 files changed, 753 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 663c95b..d9e760f 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -542,6 +542,7 @@ minillaplocal.query.files=\
   load_dyn_part5.q,\
   lvj_mapjoin.q,\
   mapjoin_decimal.q,\
+  mapjoin_hint.q,\
   mapjoin_emit_interval.q,\
   mergejoin_3way.q,\
   mrr.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
index a2414f3..9412e5f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java
@@ -103,7 +103,9 @@ public class ConvertJoinMapJoin implements NodeProcessor {
     joinOp.getConf().setMemoryMonitorInfo(memoryMonitorInfo);
 
     TezBucketJoinProcCtx tezBucketJoinProcCtx = new TezBucketJoinProcCtx(context.conf);
-    if (!context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)) {
+    boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN)
&
+            !context.parseContext.getDisableMapJoin();
+    if (!hiveConvertJoin) {
       // we are just converting to a common merge join operator. The shuffle
       // join in map-reduce case.
       Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize);

http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index adf7f84..9ab42f2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -133,6 +133,8 @@ public class ParseContext {
           new HashMap<>();
 
   private Map<String, List<SemiJoinHint>> semiJoinHints;
+  private boolean disableMapJoin;
+
   public ParseContext() {
   }
 
@@ -705,4 +707,12 @@ public class ParseContext {
   public Map<String, List<SemiJoinHint>> getSemiJoinHints() {
     return semiJoinHints;
   }
+
+  public void setDisableMapJoin(boolean disableMapJoin) {
+    this.disableMapJoin = disableMapJoin;
+  }
+
+  public boolean getDisableMapJoin() {
+    return disableMapJoin;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index cebb0af..1c74779 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -9118,6 +9118,31 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   }
 
   /**
+   * disableMapJoinWithHint
+   * @param hints
+   * @return true if hint to disable hint is provided, else false
+   * @throws SemanticException
+   */
+  private boolean disableMapJoinWithHint(List<ASTNode> hints) throws SemanticException
{
+    if (hints == null || hints.size() == 0) return false;
+    for (ASTNode hintNode : hints) {
+      for (Node node : hintNode.getChildren()) {
+        ASTNode hint = (ASTNode) node;
+        if (hint.getChild(0).getType() != HintParser.TOK_MAPJOIN) continue;
+        Tree args = hint.getChild(1);
+        if (args.getChildCount() == 1) {
+          String text = args.getChild(0).getText();
+          if (text.equalsIgnoreCase("None")) {
+            // Hint to disable mapjoin.
+            return true;
+          }
+        }
+      }
+    }
+    return false;
+  }
+
+  /**
    * Merges node to target
    */
   private void mergeJoins(QB qb, QBJoinTree node, QBJoinTree target, int pos, int[] tgtToNodeExprMap)
{
@@ -11377,6 +11402,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
     // Set the semijoin hints in parse context
     pCtx.setSemiJoinHints(parseSemiJoinHint(getQB().getParseInfo().getHintList()));
+    // Set the mapjoin hint if it needs to be disabled.
+    pCtx.setDisableMapJoin(disableMapJoinWithHint(getQB().getParseInfo().getHintList()));
 
     // 5. Take care of view creation
     if (createVwDesc != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/test/queries/clientpositive/mapjoin_hint.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mapjoin_hint.q b/ql/src/test/queries/clientpositive/mapjoin_hint.q
new file mode 100644
index 0000000..75bebfa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mapjoin_hint.q
@@ -0,0 +1,69 @@
+set hive.compute.query.using.stats=false;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.optimize.ppd=true;
+set hive.ppd.remove.duplicatefilters=true;
+set hive.tez.dynamic.partition.pruning=true;
+set hive.tez.dynamic.semijoin.reduction=true;
+set hive.optimize.metadataonly=false;
+set hive.optimize.index.filter=true;
+set hive.stats.autogather=true;
+set hive.tez.bigtable.minsize.semijoin.reduction=1;
+set hive.tez.min.bloom.filter.entries=1;
+set hive.stats.fetch.column.stats=true;
+
+-- Create Tables
+create table srcpart_date (key string, value string) partitioned by (ds string ) stored as
ORC;
+CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds string) STORED
as ORC;
+
+-- Add Partitions
+alter table srcpart_date add partition (ds = "2008-04-08");
+alter table srcpart_date add partition (ds = "2008-04-09");
+
+alter table srcpart_small add partition (ds = "2008-04-08");
+alter table srcpart_small add partition (ds = "2008-04-09");
+
+-- Load
+insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select key, value from
srcpart where ds = "2008-04-08";
+insert overwrite table srcpart_date partition (ds = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09";
+insert overwrite table srcpart_small partition (ds = "2008-04-09") select key, value from
srcpart where ds = "2008-04-09" limit 20;
+
+
+analyze table srcpart_date compute statistics for columns;
+analyze table srcpart_small compute statistics for columns;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=100000000000;
+
+--HIVE-17475
+EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date join srcpart_small on (srcpart_date.key
= srcpart_small.key1);
+EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key = srcpart_small.key1);
+
+
+-- Ensure that hint works even with CBO on, on a query with subquery.
+create table tnull(i int, c char(2));
+insert into tnull values(NULL, NULL), (NULL, NULL);
+
+create table tempty(c char(2));
+
+CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+;
+
+LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into table part_null;
+
+insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE BRUSHED BRASS',
23, 'MED BAG',1464.48,'hely blith');
+
+explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name from part_null
where p_name is null);
+explain select * from part where p_name = (select p_name from part_null where p_name is null);

http://git-wip-us.apache.org/repos/asf/hive/blob/a74107ef/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
new file mode 100644
index 0000000..6a43f0d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_hint.q.out
@@ -0,0 +1,643 @@
+PREHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds string
) stored as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: create table srcpart_date (key string, value string) partitioned by (ds
string ) stored as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_date
+PREHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds
string) STORED as ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: CREATE TABLE srcpart_small(key1 STRING, value1 STRING) partitioned by (ds
string) STORED as ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srcpart_small
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_date
+POSTHOOK: query: alter table srcpart_date add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-08")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-08
+PREHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09")
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Output: default@srcpart_small
+POSTHOOK: query: alter table srcpart_small add partition (ds = "2008-04-09")
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-08" ) select
key, value from srcpart where ds = "2008-04-08"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-08).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_date partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09"
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_date PARTITION(ds=2008-04-09).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+PREHOOK: Output: default@srcpart_small@ds=2008-04-09
+POSTHOOK: query: insert overwrite table srcpart_small partition (ds = "2008-04-09") select
key, value from srcpart where ds = "2008-04-09" limit 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).key1 SIMPLE [(srcpart)srcpart.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: srcpart_small PARTITION(ds=2008-04-09).value1 SIMPLE [(srcpart)srcpart.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: analyze table srcpart_date compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_date
+PREHOOK: Input: default@srcpart_date@ds=2008-04-08
+PREHOOK: Input: default@srcpart_date@ds=2008-04-09
+PREHOOK: Output: default@srcpart_date
+PREHOOK: Output: default@srcpart_date@ds=2008-04-08
+PREHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_date compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_date
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_date@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_date
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_date@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: analyze table srcpart_small compute statistics for columns
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcpart_small
+PREHOOK: Input: default@srcpart_small@ds=2008-04-08
+PREHOOK: Input: default@srcpart_small@ds=2008-04-09
+PREHOOK: Output: default@srcpart_small
+PREHOOK: Output: default@srcpart_small@ds=2008-04-08
+PREHOOK: Output: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table srcpart_small compute statistics for columns
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcpart_small
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Input: default@srcpart_small@ds=2008-04-09
+POSTHOOK: Output: default@srcpart_small
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-08
+POSTHOOK: Output: default@srcpart_small@ds=2008-04-09
+#### A masked pattern was here ####
+PREHOOK: query: EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date join srcpart_small
on (srcpart_date.key = srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select /*+ mapjoin(None)*/ count(*) from srcpart_date join srcpart_small
on (srcpart_date.key = srcpart_small.key1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Reducer 5 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min)
AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (key is not null and (key BETWEEN DynamicValue(RS_7_srcpart_small_key1_min)
AND DynamicValue(RS_7_srcpart_small_key1_max) and in_bloom_filter(key, DynamicValue(RS_7_srcpart_small_key1_bloom_filter))))
(type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats:
PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+                      Select Operator
+                        expressions: _col0 (type: string)
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+                        Group By Operator
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
+                          mode: hash
+                          outputColumnNames: _col0, _col1, _col2
+                          Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column
stats: PARTIAL
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE
Column stats: PARTIAL
+                            value expressions: _col0 (type: string), _col1 (type: string),
_col2 (type: binary)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column stats:
PARTIAL
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
PARTIAL
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
PARTIAL
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
PARTIAL
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
PARTIAL
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2,
expectedEntries=40)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
PARTIAL
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats:
PARTIAL
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type:
binary)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key
= srcpart_small.key1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN select count(*) from srcpart_date join srcpart_small on (srcpart_date.key
= srcpart_small.key1)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 3 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE Column
stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 2000 Data size: 174000 Basic stats: COMPLETE
Column stats: COMPLETE
+                      Map Join Operator
+                        condition map:
+                             Inner Join 0 to 1
+                        keys:
+                          0 _col0 (type: string)
+                          1 _col0 (type: string)
+                        input vertices:
+                          1 Map 3
+                        Statistics: Num rows: 129 Data size: 1032 Basic stats: COMPLETE Column
stats: PARTIAL
+                        Group By Operator
+                          aggregations: count()
+                          mode: hash
+                          outputColumnNames: _col0
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: PARTIAL
+                          Reduce Output Operator
+                            sort order: 
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column
stats: PARTIAL
+                            value expressions: _col0 (type: bigint)
+            Execution mode: llap
+            LLAP IO: all inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_small
+                  filterExpr: key1 is not null (type: boolean)
+                  Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats:
PARTIAL
+                  Filter Operator
+                    predicate: key1 is not null (type: boolean)
+                    Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+                    Select Operator
+                      expressions: key1 (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column
stats: PARTIAL
+            Execution mode: llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
PARTIAL
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats:
PARTIAL
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: create table tnull(i int, c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tnull
+POSTHOOK: query: create table tnull(i int, c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tnull
+PREHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tnull
+POSTHOOK: query: insert into tnull values(NULL, NULL), (NULL, NULL)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tnull
+POSTHOOK: Lineage: tnull.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: tnull.i EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+PREHOOK: query: create table tempty(c char(2))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tempty
+POSTHOOK: query: create table tempty(c char(2))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tempty
+PREHOOK: query: CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@part_null
+POSTHOOK: query: CREATE TABLE part_null(
+p_partkey INT,
+p_name STRING,
+p_mfgr STRING,
+p_brand STRING,
+p_type STRING,
+p_size INT,
+p_container STRING,
+p_retailprice DOUBLE,
+p_comment STRING
+)
+ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@part_null
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite into
table part_null
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@part_null
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny_nulls.txt' overwrite
into table part_null
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@part_null
+PREHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE
BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith')
+PREHOOK: type: QUERY
+PREHOOK: Output: default@part_null
+POSTHOOK: query: insert into part_null values(78487,NULL,'Manufacturer#6','Brand#52','LARGE
BRUSHED BRASS', 23, 'MED BAG',1464.48,'hely blith')
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@part_null
+POSTHOOK: Lineage: part_null.p_brand SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_comment SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col9,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_container SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col7,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_mfgr SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_partkey EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_retailprice EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col8,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_size EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6,
type:string, comment:), ]
+POSTHOOK: Lineage: part_null.p_type SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5,
type:string, comment:), ]
+Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer
2' is a cross product
+PREHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name
from part_null where p_name is null)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select /*+ mapjoin(None)*/ * from part where p_name = (select p_name
from part_null where p_name is null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 4
(CUSTOM_SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  filterExpr: (p_name = null) (type: boolean)
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (p_name = null) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats:
COMPLETE
+                    Select Operator
+                      expressions: p_partkey (type: int), p_mfgr (type: string), p_brand
(type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice
(type: double), p_comment (type: string)
+                      outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7,
_col8
+                      Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column
stats: COMPLETE
+                        value expressions: _col0 (type: int), _col2 (type: string), _col3
(type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type:
double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: part_null
+                  filterExpr: p_name is null (type: boolean)
+                  Statistics: Num rows: 32 Data size: 5888 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: p_name is null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats:
NONE
+                    Select Operator
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column
stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column
stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column
stats: NONE
+                          value expressions: _col0 (type: bigint)
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column
stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                     Inner Join 0 to 2
+                keys:
+                  0 
+                  1 
+                  2 
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+                Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats:
NONE
+                Select Operator
+                  expressions: _col0 (type: int), null (type: string), _col2 (type: string),
_col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7
(type: double), _col8 (type: string)
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8
+                  Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats:
NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column stats:
NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats:
NONE
+                Filter Operator
+                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column
stats: NONE
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[24][bigTable=?] in task 'Reducer 3' is a cross product
+PREHOOK: query: explain select * from part where p_name = (select p_name from part_null where
p_name is null)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from part where p_name = (select p_name from part_null
where p_name is null)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (CUSTOM_SIMPLE_EDGE), Map 4 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: part
+                  filterExpr: (p_name = null) (type: boolean)
+                  Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column
stats: COMPLETE
+                  Filter Operator
+                    predicate: (p_name = null) (type: boolean)
+                    Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats:
COMPLETE
+                    Select Operator
+                      expressions: p_partkey (type: int), p_mfgr (type: string), p_brand
(type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice
(type: double), p_comment (type: string)
+                      outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7,
_col8
+                      Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column
stats: COMPLETE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column
stats: COMPLETE
+                        value expressions: _col0 (type: int), _col2 (type: string), _col3
(type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type:
double), _col8 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: part_null
+                  filterExpr: p_name is null (type: boolean)
+                  Statistics: Num rows: 32 Data size: 5888 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: p_name is null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats:
NONE
+                    Select Operator
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column
stats: NONE
+                      Group By Operator
+                        aggregations: count()
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column
stats: NONE
+                        Reduce Output Operator
+                          sort order: 
+                          Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column
stats: NONE
+                          value expressions: _col0 (type: bigint)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: part_null
+                  filterExpr: p_name is null (type: boolean)
+                  Statistics: Num rows: 32 Data size: 5888 Basic stats: COMPLETE Column stats:
NONE
+                  Filter Operator
+                    predicate: p_name is null (type: boolean)
+                    Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats:
NONE
+                    Select Operator
+                      Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column
stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column
stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats:
NONE
+                Filter Operator
+                  predicate: (sq_count_check(_col0) <= 1) (type: boolean)
+                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats:
NONE
+                    Map Join Operator
+                      condition map:
+                           Inner Join 0 to 1
+                           Inner Join 0 to 2
+                      keys:
+                        0 
+                        1 
+                        2 
+                      outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7,
_col8
+                      input vertices:
+                        0 Map 1
+                        2 Map 4
+                      Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column
stats: NONE
+                      Select Operator
+                        expressions: _col0 (type: int), null (type: string), _col2 (type:
string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string),
_col7 (type: double), _col8 (type: string)
+                        outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8
+                        Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column
stats: NONE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 1 Data size: 959 Basic stats: COMPLETE Column
stats: NONE
+                          table:
+                              input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+


Mime
View raw message