hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-15327: Outerjoin might produce wrong result depending on joinEmitInterval value (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
Date Fri, 02 Dec 2016 19:27:48 GMT
Repository: hive
Updated Branches:
  refs/heads/master 4e0754681 -> 7065407e6


HIVE-15327: Outerjoin might produce wrong result depending on joinEmitInterval value (Jesus
Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7065407e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7065407e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7065407e

Branch: refs/heads/master
Commit: 7065407e63b5dfca6e775ce2120e7c66333578c6
Parents: 4e07546
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Thu Dec 1 11:48:56 2016 +0000
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Fri Dec 2 12:46:37 2016 +0000

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +
 .../hadoop/hive/ql/exec/JoinOperator.java       |   3 +-
 .../queries/clientpositive/join_emit_interval.q |  31 +++
 .../clientpositive/mapjoin_emit_interval.q      |  32 +++
 .../clientpositive/join_emit_interval.q.out     | 231 +++++++++++++++++
 .../llap/join_emit_interval.q.out               | 259 +++++++++++++++++++
 .../llap/mapjoin_emit_interval.q.out            | 247 ++++++++++++++++++
 7 files changed, 804 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e4910e4..27ece51 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -180,6 +180,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\
   insert_values_tmp_table.q,\
   join0.q,\
   join1.q,\
+  join_emit_interval.q,\
   join_nullsafe.q,\
   leftsemijoin.q,\
   limit_pushdown.q,\
@@ -509,6 +510,7 @@ minillaplocal.query.files=acid_globallimit.q,\
   load_dyn_part5.q,\
   lvj_mapjoin.q,\
   mapjoin_decimal.q,\
+  mapjoin_emit_interval.q,\
   mergejoin_3way.q,\
   mrr.q,\
   multiMapJoin1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
index 82056c4..0282763 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
@@ -101,7 +101,8 @@ public class JoinOperator extends CommonJoinOperator<JoinDesc> implements
Serial
       // Are we consuming too much memory
       if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag
>= 0) &&
           !hasLeftSemiJoin) {
-        if (sz == joinEmitInterval && !hasFilter(alias)) {
+        if (sz == joinEmitInterval && !hasFilter(condn[alias-1].getLeft()) &&
+                !hasFilter(condn[alias-1].getRight())) {
           // The input is sorted by alias, so if we are already in the last join
           // operand,
           // we can emit some results now.

http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/queries/clientpositive/join_emit_interval.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join_emit_interval.q b/ql/src/test/queries/clientpositive/join_emit_interval.q
new file mode 100644
index 0000000..c59d97d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join_emit_interval.q
@@ -0,0 +1,31 @@
+set hive.strict.checks.cartesian.product=false;
+set hive.join.emit.interval=1;
+
+CREATE TABLE test1 (key INT, value INT, col_1 STRING);
+INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car');
+
+CREATE TABLE test2 (key INT, value INT, col_2 STRING);
+INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None');
+
+
+-- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+-- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);

http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q b/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q
new file mode 100644
index 0000000..fe2a32a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/mapjoin_emit_interval.q
@@ -0,0 +1,32 @@
+set hive.auto.convert.join=true;
+set hive.strict.checks.cartesian.product=false;
+set hive.join.emit.interval=1;
+
+CREATE TABLE test1 (key INT, value INT, col_1 STRING);
+INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car');
+
+CREATE TABLE test2 (key INT, value INT, col_2 STRING);
+INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None');
+
+
+-- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102);
+
+-- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);
+
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102);

http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/results/clientpositive/join_emit_interval.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join_emit_interval.q.out b/ql/src/test/results/clientpositive/join_emit_interval.q.out
new file mode 100644
index 0000000..263ba04
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join_emit_interval.q.out
@@ -0,0 +1,231 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: test1
+            Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: int), col_1 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: int)
+                sort order: +
+                Map-reduce partition columns: _col1 (type: int)
+                Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: _col0 (type: int), _col2 (type: string)
+          TableScan
+            alias: test2
+            Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: int), col_2 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: _col1 (type: int)
+                sort order: +
+                Map-reduce partition columns: _col1 (type: int)
+                Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: _col0 (type: int), _col2 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          filter predicates:
+            0 {VALUE._col0 BETWEEN 100 AND 102}
+            1 
+          keys:
+            0 _col1 (type: int)
+            1 _col1 (type: int)
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+98	NULL	None	NULL	NULL	NULL
+NULL	NULL	None	NULL	NULL	NULL
+99	0	Alice	NULL	NULL	NULL
+100	1	Bob	NULL	NULL	NULL
+101	2	Car	103	2	Ema
+101	2	Car	102	2	Del
+99	2	Mat	NULL	NULL	NULL
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a
cross product
+PREHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: test1
+            Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: int), col_1 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+          TableScan
+            alias: test2
+            Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: int), value (type: int), col_2 (type: string)
+              outputColumnNames: _col0, _col1, _col2
+              Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Outer Join0 to 1
+          filter predicates:
+            0 {VALUE._col0 BETWEEN 100 AND 102}
+            1 
+          keys:
+            0 
+            1 
+          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+          Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a
cross product
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+101	2	Car	105	NULL	None
+101	2	Car	104	3	Fli
+101	2	Car	103	2	Ema
+101	2	Car	102	2	Del
+100	1	Bob	105	NULL	None
+100	1	Bob	104	3	Fli
+100	1	Bob	103	2	Ema
+100	1	Bob	102	2	Del
+99	2	Mat	NULL	NULL	NULL
+99	0	Alice	NULL	NULL	NULL
+98	NULL	None	NULL	NULL	NULL
+NULL	NULL	None	NULL	NULL	NULL

http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out
new file mode 100644
index 0000000..8158efe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out
@@ -0,0 +1,259 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test1
+                  Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_1 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: int)
+                      Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: int), _col2 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: test2
+                  Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_2 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: int)
+                      Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: int), _col2 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {VALUE._col0 BETWEEN 100 AND 102}
+                  1 
+                keys:
+                  0 _col1 (type: int)
+                  1 _col1 (type: int)
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats:
NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL	NULL	None	NULL	NULL	NULL
+98	NULL	None	NULL	NULL	NULL
+99	0	Alice	NULL	NULL	NULL
+100	1	Bob	NULL	NULL	NULL
+99	2	Mat	NULL	NULL	NULL
+101	2	Car	102	2	Del
+101	2	Car	103	2	Ema
+Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a
cross product
+PREHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test1
+                  Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_1 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int), _col2 (type:
string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: test2
+                  Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_2 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int), _col2 (type:
string)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Outer Join0 to 1
+                filter predicates:
+                  0 {VALUE._col0 BETWEEN 100 AND 102}
+                  1 
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats:
NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats:
NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a
cross product
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL	NULL	None	NULL	NULL	NULL
+98	NULL	None	NULL	NULL	NULL
+99	0	Alice	NULL	NULL	NULL
+99	2	Mat	NULL	NULL	NULL
+100	1	Bob	102	2	Del
+100	1	Bob	103	2	Ema
+100	1	Bob	104	3	Fli
+100	1	Bob	105	NULL	None
+101	2	Car	102	2	Del
+101	2	Car	103	2	Ema
+101	2	Car	104	3	Fli
+101	2	Car	105	NULL	None

http://git-wip-us.apache.org/repos/asf/hive/blob/7065407e/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out
new file mode 100644
index 0000000..9059dac
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out
@@ -0,0 +1,247 @@
+PREHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (key INT, value INT, col_1 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@test1
+POSTHOOK: query: INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'),
+    (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@test1
+POSTHOOK: Lineage: test1.col_1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: test1.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: test1.value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@test2
+POSTHOOK: query: INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'),
+    (104, 3, 'Fli'), (105, NULL, 'None')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@test2
+POSTHOOK: Lineage: test2.col_2 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: test2.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: test2.value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+PREHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Equi-condition and condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test1
+                  Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_1 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {_col0 BETWEEN 100 AND 102}
+                        1 
+                      keys:
+                        0 _col1 (type: int)
+                        1 _col1 (type: int)
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column
stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column
stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test2
+                  Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_2 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      key expressions: _col1 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col1 (type: int)
+                      Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: int), _col2 (type: string)
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.value=test2.value AND test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL	NULL	None	NULL	NULL	NULL
+98	NULL	None	NULL	NULL	NULL
+99	0	Alice	NULL	NULL	NULL
+99	2	Mat	NULL	NULL	NULL
+100	1	Bob	NULL	NULL	NULL
+101	2	Car	102	2	Del
+101	2	Car	103	2	Ema
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- Condition on one input (left outer join)
+EXPLAIN
+SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Map 2 (BROADCAST_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: test1
+                  Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_1 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats:
NONE
+                    Map Join Operator
+                      condition map:
+                           Left Outer Join0 to 1
+                      filter predicates:
+                        0 {_col0 BETWEEN 100 AND 102}
+                        1 
+                      keys:
+                        0 
+                        1 
+                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+                      input vertices:
+                        1 Map 2
+                      Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column
stats: NONE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column
stats: NONE
+                        table:
+                            input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 2 
+            Map Operator Tree:
+                TableScan
+                  alias: test2
+                  Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                  Select Operator
+                    expressions: key (type: int), value (type: int), col_2 (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats:
NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column
stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int), _col2 (type:
string)
+            Execution mode: llap
+            LLAP IO: no inputs
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product
+PREHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT *
+FROM test1 LEFT OUTER JOIN test2
+ON (test1.key between 100 and 102)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+#### A masked pattern was here ####
+NULL	NULL	None	NULL	NULL	NULL
+98	NULL	None	NULL	NULL	NULL
+99	0	Alice	NULL	NULL	NULL
+99	2	Mat	NULL	NULL	NULL
+100	1	Bob	102	2	Del
+100	1	Bob	105	NULL	None
+100	1	Bob	104	3	Fli
+100	1	Bob	103	2	Ema
+101	2	Car	102	2	Del
+101	2	Car	105	NULL	None
+101	2	Car	104	3	Fli
+101	2	Car	103	2	Ema


Mime
View raw message