hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject hive git commit: HIVE-12477: Left Semijoins are incompatible with a cross-product (Jesus Camacho Rodriguez, reviewed by Sergey Shelukhin)
Date Mon, 07 Dec 2015 09:54:53 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2.0 6775ccdfb -> 3111e889a


HIVE-12477: Left Semijoins are incompatible with a cross-product (Jesus Camacho Rodriguez,
reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3111e889
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3111e889
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3111e889

Branch: refs/heads/branch-2.0
Commit: 3111e889a3691669cd50ad17567424fab77b53ab
Parents: 6775ccd
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Mon Dec 7 10:54:21 2015 +0100
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Mon Dec 7 10:54:21 2015 +0100

----------------------------------------------------------------------
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   7 +-
 ql/src/test/queries/clientpositive/semijoin3.q  |  28 ++++
 .../test/results/clientpositive/semijoin3.q.out | 156 +++++++++++++++++++
 3 files changed, 190 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3111e889/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index e1a0c4a..dca3081 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -55,7 +55,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.common.ObjectPair;
-import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.StatsSetupConst.StatDB;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -2367,6 +2366,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
           LOG.warn(ErrorMsg.OUTERJOIN_USES_FILTERS.getErrorCodedMsg());
           joinTree.getFiltersForPushing().get(1).add(joinCond);
         }
+      } else if (type.equals(JoinType.LEFTSEMI)) {
+        joinTree.getExpressions().get(0).add(leftCondn);
+        joinTree.getExpressions().get(1).add(rightCondn);
+        boolean nullsafe = joinCond.getToken().getType() == HiveParser.EQUAL_NS;
+        joinTree.getNullSafes().add(nullsafe);
+        joinTree.getFiltersForPushing().get(1).add(joinCond);
       } else {
         joinTree.getFiltersForPushing().get(1).add(joinCond);
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/3111e889/ql/src/test/queries/clientpositive/semijoin3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin3.q b/ql/src/test/queries/clientpositive/semijoin3.q
new file mode 100644
index 0000000..a502b9b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/semijoin3.q
@@ -0,0 +1,28 @@
+create table t1 as select cast(key as int) key, value from src;
+
+create table t2 as select cast(key as int) key, value from src;
+
+set hive.cbo.enable=false;
+
+explain
+select count(1)
+from
+  (select key
+  from t1
+  where key = 0) t1
+left semi join
+  (select key
+  from t2
+  where key = 0) t2
+on 1 = 1;
+
+select count(1)
+from
+  (select key
+  from t1
+  where key = 0) t1
+left semi join
+  (select key
+  from t2
+  where key = 0) t2
+on 1 = 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/3111e889/ql/src/test/results/clientpositive/semijoin3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/semijoin3.q.out b/ql/src/test/results/clientpositive/semijoin3.q.out
new file mode 100644
index 0000000..d7aad9e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/semijoin3.q.out
@@ -0,0 +1,156 @@
+PREHOOK: query: create table t1 as select cast(key as int) key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 as select cast(key as int) key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 as select cast(key as int) key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 as select cast(key as int) key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: explain
+select count(1)
+from
+  (select key
+  from t1
+  where key = 0) t1
+left semi join
+  (select key
+  from t2
+  where key = 0) t2
+on 1 = 1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(1)
+from
+  (select key
+  from t1
+  where key = 0) t1
+left semi join
+  (select key
+  from t2
+  where key = 0) t2
+on 1 = 1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: (key = 0) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: 1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: 1 (type: int)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column
stats: NONE
+          TableScan
+            alias: t2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats:
NONE
+            Filter Operator
+              predicate: (key = 0) (type: boolean)
+              Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+              Select Operator
+                Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats:
NONE
+                Reduce Output Operator
+                  key expressions: 1 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: 1 (type: int)
+                  Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column
stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Left Semi Join 0 to 1
+          keys:
+            0 1 (type: int)
+            1 1 (type: int)
+          Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+          Group By Operator
+            aggregations: count(1)
+            mode: hash
+            outputColumnNames: _col0
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: bigint)
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: count(VALUE._col0)
+          mode: mergepartial
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(1)
+from
+  (select key
+  from t1
+  where key = 0) t1
+left semi join
+  (select key
+  from t2
+  where key = 0) t2
+on 1 = 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select count(1)
+from
+  (select key
+  from t1
+  where key = 0) t1
+left semi join
+  (select key
+  from t2
+  where key = 0) t2
+on 1 = 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+3


Mime
View raw message