hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From vg...@apache.org
Subject hive git commit: HIVE-20514: Query with outer join filter is failing with dynamic partition join(Vineet Garg, reviewed by Jason Dere)
Date Mon, 10 Sep 2018 18:19:32 GMT
Repository: hive
Updated Branches:
  refs/heads/master c30dcbb4b -> 116d2393f


HIVE-20514: Query with outer join filter is failing with dynamic partition join(Vineet Garg,
reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/116d2393
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/116d2393
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/116d2393

Branch: refs/heads/master
Commit: 116d2393f4dd06eaac7ef905bfdae10ae4e7b2ea
Parents: c30dcbb
Author: Vineet Garg <vgarg@apache.org>
Authored: Mon Sep 10 11:18:45 2018 -0700
Committer: Vineet Garg <vgarg@apache.org>
Committed: Mon Sep 10 11:18:45 2018 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/MapJoinProcessor.java     |  19 +--
 .../clientpositive/tez_dynpart_hashjoin_1.q     |  16 +++
 .../llap/tez_dynpart_hashjoin_1.q.out           | 140 +++++++++++++++++++
 .../llap/tez_dynpart_hashjoin_3.q.out           |   4 +-
 4 files changed, 169 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
index bae80f3..019372b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java
@@ -1158,15 +1158,18 @@ public class MapJoinProcessor extends Transform {
     }
 
     Map<Byte, List<ExprNodeDesc>> filters = desc.getFilters();
-    Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
-    for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
-      byte srcTag = entry.getKey();
-      List<ExprNodeDesc> filter = entry.getValue();
-
-      Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
-      newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
+    if(adjustParentsChildren) {
+      // backtrack and update filter expressions only if RS is to be removed
+      Map<Byte, List<ExprNodeDesc>> newFilters = new HashMap<Byte, List<ExprNodeDesc>>();
+      for (Map.Entry<Byte, List<ExprNodeDesc>> entry : filters.entrySet()) {
+        byte srcTag = entry.getKey();
+        List<ExprNodeDesc> filter = entry.getValue();
+
+        Operator<?> terminal = oldReduceSinkParentOps.get(srcTag);
+        newFilters.put(srcTag, ExprNodeDescUtils.backtrack(filter, op, terminal));
+      }
+      desc.setFilters(filters = newFilters);
     }
-    desc.setFilters(filters = newFilters);
 
     // create dumpfile prefix needed to create descriptor
     String dumpFilePrefix = "";

http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
index ea3dfce..47c0038 100644
--- a/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
+++ b/ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_1.q
@@ -60,6 +60,22 @@ set hive.auto.convert.join.noconditionaltask.size=20000;
 set hive.exec.reducers.bytes.per.reducer=20000;
 set hive.stats.fetch.column.stats=false;
 -- Try with dynamically partitioned hashjoin
+
+-- hashjoin with filter
+explain select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint;
+
+select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint;
+
 explain
 select
   *

http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
index cfa87a7..d204b47 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out
@@ -415,6 +415,146 @@ NULL	6
 -8915	1
 -3799	1
 10782	1
+PREHOOK: query: explain select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                    Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: smallint),
cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1
(type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type:
timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: int)
+                        Statistics: Num rows: ###Masked### Data size: ###Masked### Basic
stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: tinyint), _col1 (type: smallint),
_col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7
(type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean),
_col11 (type: boolean)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: cint BETWEEN 1000000 AND 3000000 (type: boolean)
+                    Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: ctinyint (type: tinyint), csmallint (type: smallint),
cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1
(type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type:
timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6,
_col7, _col8, _col9, _col10, _col11
+                      Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col2 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col2 (type: int)
+                        Statistics: Num rows: ###Masked### Data size: ###Masked### Basic
stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: tinyint), _col1 (type: smallint),
_col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7
(type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean),
_col11 (type: boolean)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Map Join Operator
+                condition map:
+                     Left Outer Join 0 to 1
+                filter predicates:
+                  0 {(UDFToInteger(VALUE._col1) <> KEY.reducesinkkey0)}
+                  1 
+                keys:
+                  0 KEY.reducesinkkey0 (type: int)
+                  1 KEY.reducesinkkey0 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19,
_col20, _col21, _col22, _col23
+                input vertices:
+                  1 Map 4
+                Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE
Column stats: NONE
+                HybridGraceHashJoin: true
+                Reduce Output Operator
+                  key expressions: _col2 (type: int)
+                  sort order: +
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3
(type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type:
string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11
(type: boolean), _col12 (type: tinyint), _col13 (type: smallint), _col14 (type: int), _col15
(type: bigint), _col16 (type: float), _col17 (type: double), _col18 (type: string), _col19
(type: string), _col20 (type: timestamp), _col21 (type: timestamp), _col22 (type: boolean),
_col23 (type: boolean)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0
(type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: double),
VALUE._col5 (type: string), VALUE._col6 (type: string), VALUE._col7 (type: timestamp), VALUE._col8
(type: timestamp), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), VALUE._col11
(type: tinyint), VALUE._col12 (type: smallint), VALUE._col13 (type: int), VALUE._col14 (type:
bigint), VALUE._col15 (type: float), VALUE._col16 (type: double), VALUE._col17 (type: string),
VALUE._col18 (type: string), VALUE._col19 (type: timestamp), VALUE._col20 (type: timestamp),
VALUE._col21 (type: boolean), VALUE._col22 (type: boolean)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7,
_col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19,
_col20, _col21, _col22, _col23
+                Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE
Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats:
COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: select
+  *
+from alltypesorc a left outer join alltypesorc b on a.cint = b.cint and a.csmallint != a.cint
+where
+  a.cint between 1000000 and 3000000
+order by a.cint
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+11	NULL	1000828	1531084669	11.0	NULL	wM316f6NqGIkoP388j3F6	poWQQo3Upvt3Wh	1969-12-31 16:00:02.351
NULL	false	true	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+NULL	-3799	1248059	1864027286	NULL	-3799.0	Uhps6mMh3IfHB3j7yH62K	4KWs6gw7lv2WYd66P	NULL	1969-12-31
15:59:54.622	false	true	NULL	-3799	1248059	1864027286	NULL	-3799.0	Uhps6mMh3IfHB3j7yH62K	4KWs6gw7lv2WYd66P
NULL	1969-12-31 15:59:54.622	false	true
+NULL	10782	1286921	1864027286	NULL	10782.0	ODLrXI8882q8LS8	4KWs6gw7lv2WYd66P	NULL	1969-12-31
15:59:52.138	true	true	NULL	10782	1286921	1864027286	NULL	10782.0	ODLrXI8882q8LS8	4KWs6gw7lv2WYd66P
NULL	1969-12-31 15:59:52.138	true	true
+NULL	-13036	1288927	-1645852809	NULL	-13036.0	yinBY725P7V2	xH7445Rals48VOulSyR5F	NULL	1969-12-31
16:00:00.763	true	false	NULL	-13036	1288927	-1645852809	NULL	-13036.0	yinBY725P7V2	xH7445Rals48VOulSyR5F
NULL	1969-12-31 16:00:00.763	true	false
+11	NULL	1310786	-413875656	11.0	NULL	W0rvA4H1xn0xMG4uk0	8yVVjG	1969-12-31 16:00:02.351	NULL
false	true	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+-51	NULL	2089466	-240556350	-51.0	NULL	cXX24dH7tblSj46j2g	C31eea0wrHHqvj	1969-12-31 16:00:08.451
NULL	true	true	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+NULL	-8915	2101183	1864027286	NULL	-8915.0	x7By66525	4KWs6gw7lv2WYd66P	NULL	1969-12-31 16:00:05.831
false	true	NULL	-8915	2101183	1864027286	NULL	-8915.0	x7By66525	4KWs6gw7lv2WYd66P	NULL	1969-12-31
16:00:05.831	false	true
+8	NULL	2229621	-381406148	8.0	NULL	q7onkS7QRPh5ghOK	oKb0bi	1969-12-31 16:00:15.892	NULL	true
false	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+8	NULL	2433892	-1611863517	8.0	NULL	674ILv3V2TxFqXP6wSbL	VLprkK2XfX	1969-12-31 16:00:15.892
NULL	false	true	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+-51	NULL	2949963	-1580871111	-51.0	NULL	0K68k3bdl7jO7	TPPAu	1969-12-31 16:00:08.451	NULL
true	false	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 PREHOOK: query: explain
 select
   *

http://git-wip-us.apache.org/repos/asf/hive/blob/116d2393/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
index 2a03d37..990e357 100644
--- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
+++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out
@@ -182,13 +182,13 @@ STAGE PLANS:
                     Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats:
COMPLETE
                     value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3
(type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type:
string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11
(type: boolean)
         Reducer 3 
-            Execution mode: llap
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
               Map Join Operator
                 condition map:
                      Left Outer Join 0 to 1
                 filter predicates:
-                  0 {(_col2 < 100)}
+                  0 {(KEY.reducesinkkey0 < 100)}
                   1 
                 keys:
                   0 KEY.reducesinkkey0 (type: int)


Mime
View raw message