hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jpull...@apache.org
Subject hive git commit: HIVE-10841: [WHERE col is not null] does not work sometimes for queries with many JOIN statements
Date Mon, 15 Jun 2015 22:41:42 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1.0 496a6f7af -> 9692e89ef


HIVE-10841: [WHERE col is not null] does not work sometimes for queries with many JOIN statements


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9692e89e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9692e89e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9692e89e

Branch: refs/heads/branch-1.0
Commit: 9692e89ef6674228da01c3c11e1fc9f2e5f7942f
Parents: 496a6f7
Author: jpullokk <jpullokk@apache.org>
Authored: Mon Jun 15 15:39:58 2015 -0700
Committer: jpullokk <jpullokk@apache.org>
Committed: Mon Jun 15 15:41:06 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |  25 ++
 ql/src/test/queries/clientpositive/join42.q     |  36 +++
 ql/src/test/results/clientpositive/join42.q.out | 312 +++++++++++++++++++
 3 files changed, 373 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9692e89e/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 00bf6c8..63d5214 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -8314,12 +8314,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     if (tree.getJoinSrc() == null) {
       return;
     }
+
     // make array with QBJoinTree : outer most(0) --> inner most(n)
     List<QBJoinTree> trees = new ArrayList<QBJoinTree>();
     for (;tree != null; tree = tree.getJoinSrc()) {
       trees.add(tree);
     }
+
     // merging from 'target'(inner) to 'node'(outer)
+    boolean mergedQBJTree = false;
     for (int i = trees.size() - 1; i >= 0; i--) {
       QBJoinTree target = trees.get(i);
       if (target == null) {
@@ -8349,6 +8352,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
           }
           mergeJoins(qb, node, target, pos, mergeDetails.getSecond());
           trees.set(j, null);
+          mergedQBJTree = true;
           continue; // continue merging with next alias
         }
         /*
@@ -8361,6 +8365,27 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         }
       }
     }
+
+    // Now that we reordered QBJoinTrees, update leftaliases of all
+    // QBJoinTree from innermost to outer
+    if ((trees.size() > 1) && mergedQBJTree) {
+      QBJoinTree curQBJTree = null;
+      QBJoinTree prevQBJTree = null;
+      for (int i = trees.size() - 1; i >= 0; i--) {
+        curQBJTree = trees.get(i);
+        if (curQBJTree != null) {
+          if (prevQBJTree != null) {
+            ArrayList<String> newCurLeftAliases = new ArrayList<String>();
+            newCurLeftAliases.addAll(Arrays.asList(prevQBJTree.getLeftAliases()));
+            newCurLeftAliases.addAll(Arrays.asList(prevQBJTree.getRightAliases()));
+            curQBJTree
+                .setLeftAliases(newCurLeftAliases.toArray(new String[newCurLeftAliases.size()]));
+          }
+          prevQBJTree = curQBJTree;
+        }
+      }
+    }
+
     // reconstruct join tree
     QBJoinTree current = null;
     for (int i = 0; i < trees.size(); i++) {

http://git-wip-us.apache.org/repos/asf/hive/blob/9692e89e/ql/src/test/queries/clientpositive/join42.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join42.q b/ql/src/test/queries/clientpositive/join42.q
new file mode 100644
index 0000000..f379058
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/join42.q
@@ -0,0 +1,36 @@
+create table L as select 4436 id;
+create table LA as select 4436 loan_id, 4748 aid, 4415 pi_id;
+create table FR as select 4436 loan_id;
+create table A as select 4748 id;
+create table PI as select 4415 id;
+
+create table acct as select 4748 aid, 10 acc_n, 122 brn;
+insert into table acct values(4748, null, null);
+insert into table acct values(4748, null, null);
+
+--[HIVE-10841] (WHERE col is not null) does not work sometimes for queries with many JOIN
statements
+explain select
+  acct.ACC_N,
+  acct.brn
+FROM L
+JOIN LA ON L.id = LA.loan_id
+JOIN FR ON L.id = FR.loan_id
+JOIN A ON LA.aid = A.id
+JOIN PI ON PI.id = LA.pi_id
+JOIN acct ON A.id = acct.aid
+WHERE
+  L.id = 4436
+  and acct.brn is not null;
+
+select
+  acct.ACC_N,
+  acct.brn
+FROM L
+JOIN LA ON L.id = LA.loan_id
+JOIN FR ON L.id = FR.loan_id
+JOIN A ON LA.aid = A.id
+JOIN PI ON PI.id = LA.pi_id
+JOIN acct ON A.id = acct.aid
+WHERE
+  L.id = 4436
+  and acct.brn is not null;

http://git-wip-us.apache.org/repos/asf/hive/blob/9692e89e/ql/src/test/results/clientpositive/join42.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/join42.q.out b/ql/src/test/results/clientpositive/join42.q.out
new file mode 100644
index 0000000..6845399
--- /dev/null
+++ b/ql/src/test/results/clientpositive/join42.q.out
@@ -0,0 +1,312 @@
+PREHOOK: query: create table L as select 4436 id
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@L
+POSTHOOK: query: create table L as select 4436 id
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@L
+PREHOOK: query: create table LA as select 4436 loan_id, 4748 aid, 4415 pi_id
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@LA
+POSTHOOK: query: create table LA as select 4436 loan_id, 4748 aid, 4415 pi_id
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@LA
+PREHOOK: query: create table FR as select 4436 loan_id
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@FR
+POSTHOOK: query: create table FR as select 4436 loan_id
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@FR
+PREHOOK: query: create table A as select 4748 id
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@A
+POSTHOOK: query: create table A as select 4748 id
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@A
+PREHOOK: query: create table PI as select 4415 id
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@PI
+POSTHOOK: query: create table PI as select 4415 id
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@PI
+PREHOOK: query: create table acct as select 4748 aid, 10 acc_n, 122 brn
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acct
+POSTHOOK: query: create table acct as select 4748 aid, 10 acc_n, 122 brn
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acct
+PREHOOK: query: insert into table acct values(4748, null, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@acct
+POSTHOOK: query: insert into table acct values(4748, null, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@acct
+POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+PREHOOK: query: insert into table acct values(4748, null, null)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@acct
+POSTHOOK: query: insert into table acct values(4748, null, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__2
+POSTHOOK: Output: default@acct
+POSTHOOK: Lineage: acct.acc_n EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: acct.aid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: acct.brn EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+PREHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries
with many JOIN statements
+explain select
+  acct.ACC_N,
+  acct.brn
+FROM L
+JOIN LA ON L.id = LA.loan_id
+JOIN FR ON L.id = FR.loan_id
+JOIN A ON LA.aid = A.id
+JOIN PI ON PI.id = LA.pi_id
+JOIN acct ON A.id = acct.aid
+WHERE
+  L.id = 4436
+  and acct.brn is not null
+PREHOOK: type: QUERY
+POSTHOOK: query: --[HIVE-10841] (WHERE col is not null) does not work sometimes for queries
with many JOIN statements
+explain select
+  acct.ACC_N,
+  acct.brn
+FROM L
+JOIN LA ON L.id = LA.loan_id
+JOIN FR ON L.id = FR.loan_id
+JOIN A ON LA.aid = A.id
+JOIN PI ON PI.id = LA.pi_id
+JOIN acct ON A.id = acct.aid
+WHERE
+  L.id = 4436
+  and acct.brn is not null
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-4 is a root stage
+  Stage-1 depends on stages: Stage-4
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: la
+            Statistics: Num rows: 1 Data size: 14 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((loan_id is not null and aid is not null) and pi_id is not null)
and (loan_id = 4436)) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                key expressions: 4436 (type: int)
+                sort order: +
+                Map-reduce partition columns: 4436 (type: int)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+                value expressions: aid (type: int), pi_id (type: int)
+          TableScan
+            alias: fr
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (loan_id = 4436) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                key expressions: 4436 (type: int)
+                sort order: +
+                Map-reduce partition columns: 4436 (type: int)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+          TableScan
+            alias: l
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (id = 4436) (type: boolean)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              Reduce Output Operator
+                key expressions: 4436 (type: int)
+                sort order: +
+                Map-reduce partition columns: 4436 (type: int)
+                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 0 to 2
+          condition expressions:
+            0 
+            1 {VALUE._col0} {VALUE._col1}
+            2 
+          outputColumnNames: _col5, _col6
+          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: acct
+            Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (aid is not null and brn is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: aid (type: int)
+                sort order: +
+                Map-reduce partition columns: aid (type: int)
+                Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats:
NONE
+                value expressions: acc_n (type: int), brn (type: int)
+          TableScan
+            alias: a
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: id is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: id (type: int)
+                sort order: +
+                Map-reduce partition columns: id (type: int)
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+          TableScan
+            Reduce Output Operator
+              key expressions: _col5 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col5 (type: int)
+              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
+              value expressions: _col6 (type: int)
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 1 to 2
+          condition expressions:
+            0 {VALUE._col5}
+            1 
+            2 {VALUE._col0} {VALUE._col1}
+          outputColumnNames: _col6, _col19, _col20
+          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              key expressions: _col6 (type: int)
+              sort order: +
+              Map-reduce partition columns: _col6 (type: int)
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col19 (type: int), _col20 (type: int)
+          TableScan
+            alias: pi
+            Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: id is not null (type: boolean)
+              Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+              Reduce Output Operator
+                key expressions: id (type: int)
+                sort order: +
+                Map-reduce partition columns: id (type: int)
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats:
NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          condition expressions:
+            0 {VALUE._col18} {VALUE._col19}
+            1 
+          outputColumnNames: _col19, _col20
+          Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          Select Operator
+            expressions: _col19 (type: int), _col20 (type: int)
+            outputColumnNames: _col0, _col1
+            Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select
+  acct.ACC_N,
+  acct.brn
+FROM L
+JOIN LA ON L.id = LA.loan_id
+JOIN FR ON L.id = FR.loan_id
+JOIN A ON LA.aid = A.id
+JOIN PI ON PI.id = LA.pi_id
+JOIN acct ON A.id = acct.aid
+WHERE
+  L.id = 4436
+  and acct.brn is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@a
+PREHOOK: Input: default@acct
+PREHOOK: Input: default@fr
+PREHOOK: Input: default@l
+PREHOOK: Input: default@la
+PREHOOK: Input: default@pi
+#### A masked pattern was here ####
+POSTHOOK: query: select
+  acct.ACC_N,
+  acct.brn
+FROM L
+JOIN LA ON L.id = LA.loan_id
+JOIN FR ON L.id = FR.loan_id
+JOIN A ON LA.aid = A.id
+JOIN PI ON PI.id = LA.pi_id
+JOIN acct ON A.id = acct.aid
+WHERE
+  L.id = 4436
+  and acct.brn is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@a
+POSTHOOK: Input: default@acct
+POSTHOOK: Input: default@fr
+POSTHOOK: Input: default@l
+POSTHOOK: Input: default@la
+POSTHOOK: Input: default@pi
+#### A masked pattern was here ####
+10	122


Mime
View raw message