hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [22/23] hive git commit: HIVE-17726: Using exists may lead to incorrect results (Vineet Garg, reviewed by Ashutosh Chauhan)
Date Fri, 13 Oct 2017 00:07:13 GMT
HIVE-17726: Using exists may lead to incorrect results (Vineet Garg, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9375cf3c
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9375cf3c
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9375cf3c

Branch: refs/heads/hive-14535
Commit: 9375cf3c09a14f7629b38dcbe2fe9fde2aae647e
Parents: 697922b
Author: Vineet Garg <vgarg@apache.com>
Authored: Thu Oct 12 14:08:38 2017 -0700
Committer: Vineet Garg <vgarg@apache.com>
Committed: Thu Oct 12 14:09:32 2017 -0700

----------------------------------------------------------------------
 .../calcite/rules/HiveRelDecorrelator.java      | 300 ++-----
 .../queries/clientpositive/subquery_exists.q    |  24 +
 .../test/queries/clientpositive/subquery_in.q   |  50 +-
 .../clientpositive/constprog_partitioner.q.out  |  10 +-
 .../clientpositive/llap/explainuser_1.q.out     |  98 +--
 .../clientpositive/llap/subquery_exists.q.out   | 385 ++++++++-
 .../clientpositive/llap/subquery_in.q.out       | 657 ++-------------
 .../clientpositive/llap/subquery_multi.q.out    |  48 +-
 .../clientpositive/llap/subquery_notin.q.out    | 170 ++--
 .../clientpositive/llap/subquery_scalar.q.out   |  30 +-
 .../clientpositive/llap/subquery_select.q.out   |  42 +-
 .../clientpositive/llap/subquery_views.q.out    |  82 +-
 .../llap/vector_mapjoin_reduce.q.out            |  12 +-
 .../clientpositive/perf/spark/query16.q.out     | 209 ++++-
 .../clientpositive/perf/spark/query94.q.out     | 209 ++++-
 .../clientpositive/perf/tez/query14.q.out       |   4 +-
 .../clientpositive/perf/tez/query16.q.out       | 263 +++---
 .../clientpositive/perf/tez/query23.q.out       |   2 +-
 .../clientpositive/perf/tez/query94.q.out       | 263 +++---
 .../spark/constprog_partitioner.q.out           |  10 +-
 .../spark/spark_explainuser_1.q.out             |  98 +--
 .../clientpositive/spark/subquery_exists.q.out  | 390 ++++++++-
 .../clientpositive/spark/subquery_in.q.out      | 611 ++------------
 .../spark/vector_mapjoin_reduce.q.out           |  10 +-
 .../clientpositive/subquery_exists.q.out        | 420 +++++++++-
 .../clientpositive/subquery_exists_having.q.out |  28 +-
 .../clientpositive/subquery_in_having.q.out     | 803 ++++++++++---------
 .../clientpositive/subquery_notexists.q.out     |  32 +-
 .../subquery_notexists_having.q.out             |  32 +-
 .../clientpositive/subquery_notin_having.q.out  |  26 +-
 .../subquery_unqualcolumnrefs.q.out             |  82 +-
 .../clientpositive/vector_mapjoin_reduce.q.out  |   8 +-
 32 files changed, 2876 insertions(+), 2532 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9375cf3c/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
index 0fd3217..b922faa 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java
@@ -44,12 +44,11 @@ import org.apache.calcite.rel.core.Values;
 import org.apache.calcite.rel.logical.LogicalAggregate;
 import org.apache.calcite.rel.logical.LogicalCorrelate;
 import org.apache.calcite.rel.logical.LogicalFilter;
-import org.apache.calcite.rel.logical.LogicalIntersect;
 import org.apache.calcite.rel.logical.LogicalJoin;
 import org.apache.calcite.rel.logical.LogicalProject;
-import org.apache.calcite.rel.logical.LogicalUnion;
 import org.apache.calcite.rel.metadata.RelMdUtil;
 import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rel.rules.FilterCorrelateRule;
 import org.apache.calcite.rel.rules.FilterJoinRule;
 import org.apache.calcite.rel.rules.FilterProjectTransposeRule;
 import org.apache.calcite.rel.type.RelDataType;
@@ -62,7 +61,6 @@ import org.apache.calcite.rex.RexFieldAccess;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
-import org.apache.calcite.rex.RexOver;
 import org.apache.calcite.rex.RexShuttle;
 import org.apache.calcite.rex.RexSubQuery;
 import org.apache.calcite.rex.RexUtil;
@@ -87,11 +85,10 @@ import org.apache.calcite.util.Util;
 import org.apache.calcite.util.mapping.Mappings;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -122,7 +119,6 @@ import java.util.NavigableMap;
 import java.util.Objects;
 import java.util.Set;
 import java.util.SortedMap;
-import java.util.Stack;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import javax.annotation.Nonnull;
@@ -181,8 +177,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
 
   private final HashSet<LogicalCorrelate> generatedCorRels = Sets.newHashSet();
 
-  private Stack valueGen = new Stack();
-
   //~ Constructors -----------------------------------------------------------
 
   private HiveRelDecorrelator (
@@ -269,8 +263,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       return planner2.findBestExp();
     }
 
-    assert(valueGen.isEmpty());
-
     return root;
   }
 
@@ -328,14 +320,8 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     return planner.findBestExp();
   }
 
-  protected RexNode decorrelateExpr(RexNode exp, boolean valueGenerator) {
-    DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
-    shuttle.setValueGenerator(valueGenerator);
-    return exp.accept(shuttle);
-  }
   protected RexNode decorrelateExpr(RexNode exp) {
     DecorrelateRexShuttle shuttle = new DecorrelateRexShuttle();
-    shuttle.setValueGenerator(false);
     return exp.accept(shuttle);
   }
 
@@ -1121,11 +1107,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
         try {
           findCorrelationEquivalent(correlation, ((Filter) rel).getCondition());
         } catch (Util.FoundOne e) {
-          // we need to keep predicate kind e.g. EQUAL or NOT EQUAL
-          // so that later while decorrelating LogicalCorrelate appropriate join predicate
-          // is generated
-          def.setPredicateKind((SqlKind)((Pair)e.getNode()).getValue());
-          map.put(def, (Integer)((Pair) e.getNode()).getKey());
+          map.put(def, (Integer) e.getNode());
         }
       }
       // If all correlation variables are now satisfied, skip creating a value
@@ -1164,22 +1146,16 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
   private void findCorrelationEquivalent(CorRef correlation, RexNode e)
           throws Util.FoundOne {
     switch (e.getKind()) {
-    // for now only EQUAL and NOT EQUAL corr predicates are optimized
-    case NOT_EQUALS:
-      if((boolean)valueGen.peek()) {
-        // we will need value generator
-        break;
-      }
-    case EQUALS:
+      case EQUALS:
         final RexCall call = (RexCall) e;
         final List<RexNode> operands = call.getOperands();
         if (references(operands.get(0), correlation)
                 && operands.get(1) instanceof RexInputRef) {
-          throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(1)).getIndex(), e.getKind()));
+          throw new Util.FoundOne(((RexInputRef) operands.get(1)).getIndex());
         }
         if (references(operands.get(1), correlation)
                 && operands.get(0) instanceof RexInputRef) {
-          throw new Util.FoundOne(Pair.of(((RexInputRef) operands.get(0)).getIndex(), e.getKind()));
+          throw new Util.FoundOne(((RexInputRef) operands.get(0)).getIndex());
         }
         break;
       case AND:
@@ -1248,22 +1224,17 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
         return null;
       }
 
-      Frame oldInputFrame = frame;
       // If this LogicalFilter has correlated reference, create value generator
       // and produce the correlated variables in the new output.
       if (cm.mapRefRelToCorRef.containsKey(rel)) {
         frame = decorrelateInputWithValueGenerator(rel);
       }
 
-      boolean valueGenerator = true;
-      if(frame.r == oldInputFrame.r) {
-        // this means correated value generator wasn't generated
-        valueGenerator = false;
-      }
-        // Replace the filter expression to reference output of the join
-        // Map filter to the new filter over join
-        relBuilder.push(frame.r).filter(
-            (decorrelateExpr(rel.getCondition(), valueGenerator)));
+      // Replace the filter expression to reference output of the join
+      // Map filter to the new filter over join
+      relBuilder.push(frame.r).filter(
+          simplifyComparison(decorrelateExpr(rel.getCondition())));
+
       // Filter does not change the input ordering.
       // Filter rel does not permute the input.
       // All corvars produced by filter will have the same output positions in the
@@ -1273,6 +1244,39 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     }
   }
 
+  private RexNode simplifyComparison(RexNode op) {
+    switch(op.getKind()) {
+    case EQUALS:
+    case GREATER_THAN:
+    case GREATER_THAN_OR_EQUAL:
+    case LESS_THAN:
+    case LESS_THAN_OR_EQUAL:
+    case NOT_EQUALS:
+      RexCall e = (RexCall) op;
+      final List<RexNode> operands = new ArrayList<>(e.operands);
+
+      // Simplify "x <op> x"
+      final RexNode o0 = operands.get(0);
+      final RexNode o1 = operands.get(1);
+      // this should only be called when we are creating filter (decorrelate filter)
+      // since in that case null/unknown is treated as false we don't care about
+      // nullability of operands and will always rewrite op=op to op is not null
+      if (RexUtil.eq(o0, o1) )
+        switch (e.getKind()) {
+        case EQUALS:
+        case GREATER_THAN_OR_EQUAL:
+        case LESS_THAN_OR_EQUAL:
+          // "x = x" simplifies to "x is not null" (similarly <= and >=)
+          return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0);
+        default:
+          // "x != x" simplifies to "false" (similarly < and >)
+          return rexBuilder.makeLiteral(false);
+        }
+    }
+    return op;
+  }
+
+
     /**
      * Rewrite LogicalFilter.
      *
@@ -1309,15 +1313,9 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
 
     }
 
-    boolean valueGenerator = true;
-    if(frame.r == oldInput) {
-      // this means correated value generator wasn't generated
-      valueGenerator = false;
-    }
-
     // Replace the filter expression to reference output of the join
     // Map filter to the new filter over join
-    relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition(), valueGenerator));
+    relBuilder.push(frame.r).filter(decorrelateExpr(rel.getCondition()));
 
 
     // Filter does not change the input ordering.
@@ -1347,9 +1345,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     final RelNode oldLeft = rel.getInput(0);
     final RelNode oldRight = rel.getInput(1);
 
-    boolean mightRequireValueGen = new findIfValueGenRequired().traverse(oldRight);
-    valueGen.push(mightRequireValueGen);
-
     final Frame leftFrame = getInvoke(oldLeft, rel);
     final Frame rightFrame = getInvoke(oldRight, rel);
 
@@ -1386,24 +1381,11 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       }
       final int newLeftPos = leftFrame.oldToNewOutputs.get(corDef.field);
       final int newRightPos = rightOutput.getValue();
-      if(corDef.getPredicateKind() == SqlKind.NOT_EQUALS) {
-        conditions.add(
-            rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS,
-                RexInputRef.of(newLeftPos, newLeftOutput),
-                new RexInputRef(newLeftFieldCount + newRightPos,
-                    newRightOutput.get(newRightPos).getType())));
-
-      }
-      else {
-        assert(corDef.getPredicateKind() == null
-          || corDef.getPredicateKind() == SqlKind.EQUALS);
-        conditions.add(
-            rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
-                RexInputRef.of(newLeftPos, newLeftOutput),
-                new RexInputRef(newLeftFieldCount + newRightPos,
-                    newRightOutput.get(newRightPos).getType())));
-
-      }
+      conditions.add(
+              rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+                      RexInputRef.of(newLeftPos, newLeftOutput),
+                      new RexInputRef(newLeftFieldCount + newRightPos,
+                              newRightOutput.get(newRightPos).getType())));
 
       // remove this cor var from output position mapping
       corDefOutputs.remove(corDef);
@@ -1446,8 +1428,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
             LogicalJoin.create(leftFrame.r, rightFrame.r, condition,
                     ImmutableSet.<CorrelationId>of(), rel.getJoinType().toJoinType());
 
-    valueGen.pop();
-
     return register(rel, newJoin, mapOldToNewOutputs, corDefOutputs);
   }
 
@@ -1840,66 +1820,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
 
   /** Shuttle that decorrelates. */
   private class DecorrelateRexShuttle extends RexShuttle {
-    private boolean valueGenerator;
-    public void setValueGenerator(boolean valueGenerator) {
-      this.valueGenerator = valueGenerator;
-    }
-
-    // DecorrelateRexShuttle ends up decorrelating expressions cor.col1 <> $4
-    // to $4=$4 if value generator is not generated, $4<>$4 is further simplified
-    // to false. This is wrong and messes up the whole tree. To prevent this visitCall
-    // is overridden to rewrite/simply such predicates to is not null.
-    // we also need to take care that we do this only for correlated predicates and
-    // not user specified explicit predicates
-    // TODO:  This code should be removed once CALCITE-1851 is fixed and
-    // there is support of not equal
-    @Override  public RexNode visitCall(final RexCall call) {
-      if(!valueGenerator) {
-        switch (call.getKind()) {
-        case EQUALS:
-        case NOT_EQUALS:
-          final List<RexNode> operands = new ArrayList<>(call.operands);
-          RexNode o0 = operands.get(0);
-          RexNode o1 = operands.get(1);
-          boolean isCorrelated = false;
-          if (o0 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o0) != null)) {
-            o0 = decorrFieldAccess((RexFieldAccess) o0);
-            isCorrelated = true;
-
-          }
-          if (o1 instanceof RexFieldAccess && (cm.mapFieldAccessToCorRef.get(o1) != null)) {
-            o1 = decorrFieldAccess((RexFieldAccess) o1);
-            isCorrelated = true;
-          }
-          if (isCorrelated && RexUtil.eq(o0, o1)) {
-            return rexBuilder.makeCall(SqlStdOperatorTable.IS_NOT_NULL, o0);
-          }
-
-          final List<RexNode> newOperands = new ArrayList<>();
-          newOperands.add(o0);
-          newOperands.add(o1);
-          boolean[] update = { false };
-          List<RexNode> clonedOperands = visitList(newOperands, update);
-
-          return relBuilder.call(call.getOperator(), clonedOperands);
-        }
-      }
-      return super.visitCall(call);
-    }
-
     @Override public RexNode visitFieldAccess(RexFieldAccess fieldAccess) {
-      return decorrFieldAccess(fieldAccess);
-    }
-
-    @Override public RexNode visitInputRef(RexInputRef inputRef) {
-      final RexInputRef ref = getNewForOldInputRef(inputRef);
-      if (ref.getIndex() == inputRef.getIndex()
-              && ref.getType() == inputRef.getType()) {
-        return inputRef; // re-use old object, to prevent needless expr cloning
-      }
-      return ref;
-    }
-    private RexNode decorrFieldAccess(RexFieldAccess fieldAccess) {
       int newInputOutputOffset = 0;
       for (RelNode input : currentRel.getInputs()) {
         final Frame frame = map.get(input);
@@ -1914,7 +1835,7 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
               // This input rel does produce the cor var referenced.
               // Assume fieldAccess has the correct type info.
               return new RexInputRef(newInputPos + newInputOutputOffset,
-                  frame.r.getRowType().getFieldList().get(newInputPos)
+                      frame.r.getRowType().getFieldList().get(newInputPos)
                       .getType());
             }
           }
@@ -1928,6 +1849,15 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       }
       return fieldAccess;
     }
+
+    @Override public RexNode visitInputRef(RexInputRef inputRef) {
+      final RexInputRef ref = getNewForOldInputRef(inputRef);
+      if (ref.getIndex() == inputRef.getIndex()
+              && ref.getType() == inputRef.getType()) {
+        return inputRef; // re-use old object, to prevent needless expr cloning
+      }
+      return ref;
+    }
   }
 
   /** Shuttle that removes correlations. */
@@ -2952,12 +2882,10 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
   static class CorDef implements Comparable<CorDef> {
     public final CorrelationId corr;
     public final int field;
-    private SqlKind predicateKind;
 
     CorDef(CorrelationId corr, int field) {
       this.corr = corr;
       this.field = field;
-      this.predicateKind = null;
     }
 
     @Override public String toString() {
@@ -2982,13 +2910,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
       }
       return Integer.compare(field, o.field);
     }
-    public SqlKind getPredicateKind() {
-      return predicateKind;
-    }
-    public void setPredicateKind(SqlKind predKind) {
-      this.predicateKind = predKind;
-
-    }
   }
 
   /** A map of the locations of
@@ -3066,107 +2987,6 @@ public class HiveRelDecorrelator implements ReflectiveVisitor {
     }
   }
 
-  private static class findIfValueGenRequired extends HiveRelShuttleImpl {
-    private boolean mightRequireValueGen ;
-    findIfValueGenRequired() { this.mightRequireValueGen = true; }
-
-    private boolean hasRexOver(List<RexNode> projects) {
-      for(RexNode expr : projects) {
-        if(expr instanceof  RexOver) {
-          return true;
-        }
-      }
-      return false;
-    }
-    @Override public RelNode visit(HiveJoin rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-    public RelNode visit(HiveSortLimit rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-    public RelNode visit(HiveUnion rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-    public RelNode visit(LogicalUnion rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-    public RelNode visit(LogicalIntersect rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-
-    public RelNode visit(HiveIntersect rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-
-    @Override public RelNode visit(LogicalJoin rel) {
-      mightRequireValueGen = true;
-      return rel;
-    }
-    @Override public RelNode visit(HiveProject rel) {
-      if(!(hasRexOver(((HiveProject)rel).getProjects()))) {
-        mightRequireValueGen = false;
-        return super.visit(rel);
-      }
-      else {
-        mightRequireValueGen = true;
-        return rel;
-      }
-    }
-    @Override public RelNode visit(LogicalProject rel) {
-      if(!(hasRexOver(((LogicalProject)rel).getProjects()))) {
-        mightRequireValueGen = false;
-        return super.visit(rel);
-      }
-      else {
-        mightRequireValueGen = true;
-        return rel;
-      }
-    }
-    @Override public RelNode visit(HiveAggregate rel) {
-      // if there are aggregate functions or grouping sets we will need
-      // value generator
-      if((((HiveAggregate)rel).getAggCallList().isEmpty() == true
-          && ((HiveAggregate)rel).indicator == false)) {
-        this.mightRequireValueGen = false;
-        return super.visit(rel);
-      }
-      else {
-        // need to reset to true in case previous aggregate/project
-        // has set it to false
-        this.mightRequireValueGen = true;
-        return rel;
-      }
-    }
-    @Override public RelNode visit(LogicalAggregate rel) {
-      if((((LogicalAggregate)rel).getAggCallList().isEmpty() == true
-          && ((LogicalAggregate)rel).indicator == false)) {
-        this.mightRequireValueGen = false;
-        return super.visit(rel);
-      }
-      else {
-        // need to reset to true in case previous aggregate/project
-        // has set it to false
-        this.mightRequireValueGen = true;
-        return rel;
-      }
-    }
-    @Override public RelNode visit(LogicalCorrelate rel) {
-      // this means we are hitting nested subquery so don't
-      // need to go further
-      return rel;
-    }
-
-    public boolean traverse(RelNode root) {
-      root.accept(this);
-      return mightRequireValueGen;
-    }
-  }
   /** Builds a {@link org.apache.calcite.sql2rel.RelDecorrelator.CorelMap}. */
   private static class CorelMapBuilder extends HiveRelShuttleImpl {
     final SortedMap<CorrelationId, RelNode> mapCorToCorRel =

http://git-wip-us.apache.org/repos/asf/hive/blob/9375cf3c/ql/src/test/queries/clientpositive/subquery_exists.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_exists.q b/ql/src/test/queries/clientpositive/subquery_exists.q
index 0dfab7a..19c42f0 100644
--- a/ql/src/test/queries/clientpositive/subquery_exists.q
+++ b/ql/src/test/queries/clientpositive/subquery_exists.q
@@ -85,3 +85,27 @@ explain select * from t where exists (select count(*) from src where 1=2);
 select * from t where exists (select count(*) from src where 1=2);
 
 drop table t;
+
+drop table if exists tx1;
+create table tx1 (a integer,b integer);
+insert into tx1	values  (1, 1),
+                        (1, 2),
+                        (1, 3);
+
+select count(*) as result,3 as expected from tx1 u
+    where exists (select * from tx1 v where u.a=v.a and u.b <> v.b);
+explain select count(*) as result,3 as expected from tx1 u
+    where exists (select * from tx1 v where u.a=v.a and u.b <> v.b);
+
+drop table tx1;
+
+create table t1(i int, j int);
+insert into t1 values(4,1);
+
+create table t2(i int, j int);
+insert into t2 values(4,2),(4,3),(4,5);
+
+explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j);
+select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j);
+drop table t1;
+drop table t2;

http://git-wip-us.apache.org/repos/asf/hive/blob/9375cf3c/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index 33cc2fe..4ba170a 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -74,47 +74,25 @@ from part b where b.p_size in
 ;
 
 -- distinct, corr
-explain
-select *
-from src b
+explain 
+select * 
+from src b 
 where b.key in
-        (select distinct a.key
-         from src a
+        (select distinct a.key 
+         from src a 
          where b.value = a.value and a.key > '9'
         )
 ;
 
-select *
-from src b
+select * 
+from src b 
 where b.key in
-        (select distinct a.key
-         from src a
+        (select distinct a.key 
+         from src a 
          where b.value = a.value and a.key > '9'
         )
 ;
 
--- corr, non equi predicate, should not have a join with outer to generate
--- corr values
-explain
-select *
-from src b
-where b.key in
-        (select distinct a.key
-         from src a
-         where b.value <> a.key and a.key > '9'
-        )
-;
-
-select *
-from src b
-where b.key in
-        (select distinct a.key
-         from src a
-         where b.value <> a.key and a.key > '9'
-        )
-;
-
-
 -- non agg, non corr, windowing
 select p_mfgr, p_name, p_size 
 from part 
@@ -289,13 +267,3 @@ select * from t where i IN (select sum(i) from tt where tt.j = t.j);
 
 drop table t;
 drop table tt;
-
--- since inner query has aggregate it will be joined with outer to get all possible corrrelated values
-explain select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
-select * from part where p_size IN (select max(p_size) from part p where p.p_type <> part.p_name);
-
--- inner query has join so should have a join with outer query to fetch all corr values
-explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
-select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name);
-
-

http://git-wip-us.apache.org/repos/asf/hive/blob/9375cf3c/ql/src/test/results/clientpositive/constprog_partitioner.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/constprog_partitioner.q.out b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
index 98b896c..87618df 100644
--- a/ql/src/test/results/clientpositive/constprog_partitioner.q.out
+++ b/ql/src/test/results/clientpositive/constprog_partitioner.q.out
@@ -107,22 +107,22 @@ STAGE PLANS:
             alias: lineitem
             Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
-              predicate: ((l_shipmode = 'AIR') and l_linenumber is not null) (type: boolean)
-              Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+              predicate: ((l_linenumber = l_linenumber) and (l_shipmode = 'AIR')) (type: boolean)
+              Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: l_orderkey (type: int), l_linenumber (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   keys: _col0 (type: int), _col1 (type: int)
                   mode: hash
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: int), _col1 (type: int)
                     sort order: ++
                     Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
-                    Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE
       Reduce Operator Tree:
         Join Operator
           condition map:

http://git-wip-us.apache.org/repos/asf/hive/blob/9375cf3c/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index b9c6cd9..4452dea 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -1681,11 +1681,11 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_19]
-        Select Operator [SEL_18] (rows=366 width=178)
+        Select Operator [SEL_18] (rows=434 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_17] (rows=366 width=179)
+          Filter Operator [FIL_17] (rows=434 width=178)
             predicate:_col3 is null
-            Merge Join Operator [MERGEJOIN_22] (rows=500 width=179)
+            Merge Join Operator [MERGEJOIN_22] (rows=500 width=178)
               Conds:RS_14._col1=RS_15._col0(Left Outer),Output:["_col0","_col1","_col3"]
             <-Map 1 [SIMPLE_EDGE] llap
               SHUFFLE [RS_14]
@@ -1697,21 +1697,21 @@ Stage-0
             <-Reducer 3 [SIMPLE_EDGE] llap
               SHUFFLE [RS_15]
                 PartitionCols:_col0
-                Select Operator [SEL_13] (rows=83 width=95)
+                Select Operator [SEL_13] (rows=41 width=95)
                   Output:["_col0","_col1"]
-                  Group By Operator [GBY_12] (rows=83 width=91)
+                  Group By Operator [GBY_12] (rows=41 width=91)
                     Output:["_col0"],keys:_col1
-                    Select Operator [SEL_8] (rows=83 width=178)
+                    Select Operator [SEL_8] (rows=41 width=178)
                       Output:["_col1"]
-                      Group By Operator [GBY_7] (rows=83 width=178)
+                      Group By Operator [GBY_7] (rows=41 width=178)
                         Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                       <-Map 1 [SIMPLE_EDGE] llap
                         SHUFFLE [RS_6]
                           PartitionCols:_col0
-                          Group By Operator [GBY_5] (rows=83 width=178)
+                          Group By Operator [GBY_5] (rows=41 width=178)
                             Output:["_col0","_col1"],keys:value, key
-                            Filter Operator [FIL_21] (rows=166 width=178)
-                              predicate:(value > 'val_2')
+                            Filter Operator [FIL_21] (rows=83 width=178)
+                              predicate:((value = value) and (value > 'val_2'))
                                Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: explain select * 
@@ -1745,11 +1745,11 @@ Stage-0
     Stage-1
       Reducer 3 llap
       File Output Operator [FS_18]
-        Select Operator [SEL_17] (rows=183 width=178)
+        Select Operator [SEL_17] (rows=234 width=178)
           Output:["_col0","_col1"]
-          Filter Operator [FIL_16] (rows=183 width=179)
+          Filter Operator [FIL_16] (rows=234 width=178)
             predicate:_col4 is null
-            Merge Join Operator [MERGEJOIN_21] (rows=250 width=179)
+            Merge Join Operator [MERGEJOIN_21] (rows=250 width=178)
               Conds:RS_13._col0, _col1=RS_14._col0, _col1(Left Outer),Output:["_col0","_col1","_col4"]
             <-Reducer 2 [ONE_TO_ONE_EDGE] llap
               FORWARD [RS_13]
@@ -1768,17 +1768,17 @@ Stage-0
             <-Reducer 4 [ONE_TO_ONE_EDGE] llap
               FORWARD [RS_14]
                 PartitionCols:_col0, _col1
-                Select Operator [SEL_12] (rows=83 width=182)
+                Select Operator [SEL_12] (rows=20 width=182)
                   Output:["_col0","_col1","_col2"]
-                  Group By Operator [GBY_11] (rows=83 width=178)
+                  Group By Operator [GBY_11] (rows=20 width=178)
                     Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                   <-Map 1 [SIMPLE_EDGE] llap
                     SHUFFLE [RS_10]
                       PartitionCols:_col0, _col1
-                      Group By Operator [GBY_9] (rows=83 width=178)
+                      Group By Operator [GBY_9] (rows=20 width=178)
                         Output:["_col0","_col1"],keys:key, value
-                        Filter Operator [FIL_20] (rows=166 width=178)
-                          predicate:((value > 'val_12') and key is not null)
+                        Filter Operator [FIL_20] (rows=41 width=178)
+                          predicate:((key = key) and (value = value) and (value > 'val_12'))
                            Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: create view cv1 as 
@@ -1820,7 +1820,7 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_11]
-        Merge Join Operator [MERGEJOIN_16] (rows=134 width=178)
+        Merge Join Operator [MERGEJOIN_16] (rows=32 width=178)
           Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
           SHUFFLE [RS_7]
@@ -1832,12 +1832,12 @@ Stage-0
         <-Map 3 [SIMPLE_EDGE] llap
           SHUFFLE [RS_8]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_6] (rows=83 width=178)
+            Group By Operator [GBY_6] (rows=20 width=178)
               Output:["_col0","_col1"],keys:_col0, _col1
-              Select Operator [SEL_4] (rows=166 width=178)
+              Select Operator [SEL_4] (rows=41 width=178)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_15] (rows=166 width=178)
-                  predicate:((value > 'val_9') and key is not null)
+                Filter Operator [FIL_15] (rows=41 width=178)
+                  predicate:((key = key) and (value = value) and (value > 'val_9'))
                   TableScan [TS_2] (rows=500 width=178)
                     default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
@@ -1870,7 +1870,7 @@ Stage-0
     Stage-1
       Reducer 2 llap
       File Output Operator [FS_11]
-        Merge Join Operator [MERGEJOIN_16] (rows=134 width=178)
+        Merge Join Operator [MERGEJOIN_16] (rows=32 width=178)
           Conds:RS_7._col0, _col1=RS_8._col0, _col1(Left Semi),Output:["_col0","_col1"]
         <-Map 1 [SIMPLE_EDGE] llap
           SHUFFLE [RS_7]
@@ -1882,12 +1882,12 @@ Stage-0
         <-Map 3 [SIMPLE_EDGE] llap
           SHUFFLE [RS_8]
             PartitionCols:_col0, _col1
-            Group By Operator [GBY_6] (rows=83 width=178)
+            Group By Operator [GBY_6] (rows=20 width=178)
               Output:["_col0","_col1"],keys:_col0, _col1
-              Select Operator [SEL_4] (rows=166 width=178)
+              Select Operator [SEL_4] (rows=41 width=178)
                 Output:["_col0","_col1"]
-                Filter Operator [FIL_15] (rows=166 width=178)
-                  predicate:((value > 'val_9') and key is not null)
+                Filter Operator [FIL_15] (rows=41 width=178)
+                  predicate:((key = key) and (value = value) and (value > 'val_9'))
                   TableScan [TS_2] (rows=500 width=178)
                     default@src_cbo,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
 
@@ -1963,12 +1963,12 @@ Stage-0
           <-Map 6 [SIMPLE_EDGE] llap
             SHUFFLE [RS_19]
               PartitionCols:_col0, _col1
-              Group By Operator [GBY_17] (rows=4 width=8)
+              Group By Operator [GBY_17] (rows=2 width=8)
                 Output:["_col0","_col1"],keys:_col0, _col1
-                Select Operator [SEL_12] (rows=14 width=8)
+                Select Operator [SEL_12] (rows=7 width=8)
                   Output:["_col0","_col1"]
-                  Filter Operator [FIL_30] (rows=14 width=96)
-                    predicate:((l_shipmode = 'AIR') and l_linenumber is not null)
+                  Filter Operator [FIL_30] (rows=7 width=96)
+                    predicate:((l_linenumber = l_linenumber) and (l_shipmode = 'AIR'))
                     TableScan [TS_10] (rows=100 width=96)
                       default@lineitem,lineitem,Tbl:COMPLETE,Col:COMPLETE,Output:["l_orderkey","l_linenumber","l_shipmode"]
           <-Reducer 3 [SIMPLE_EDGE] llap
@@ -2256,14 +2256,14 @@ Stage-0
       File Output Operator [FS_26]
         Select Operator [SEL_25] (rows=13 width=223)
           Output:["_col0","_col1","_col2"]
-          Filter Operator [FIL_24] (rows=13 width=231)
+          Filter Operator [FIL_24] (rows=13 width=227)
             predicate:(not CASE WHEN ((_col4 = 0)) THEN (false) WHEN (_col4 is null) THEN (false) WHEN (_col8 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col5 < _col4)) THEN (true) ELSE (false) END)
-            Merge Join Operator [MERGEJOIN_32] (rows=26 width=230)
+            Merge Join Operator [MERGEJOIN_32] (rows=26 width=227)
               Conds:RS_21._col0, _col1=RS_22._col0, _col1(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5","_col8"]
             <-Reducer 2 [SIMPLE_EDGE] llap
               SHUFFLE [RS_21]
                 PartitionCols:_col0, _col1
-                Merge Join Operator [MERGEJOIN_31] (rows=26 width=229)
+                Merge Join Operator [MERGEJOIN_31] (rows=26 width=226)
                   Conds:RS_18._col1=RS_19._col0(Left Outer),Output:["_col0","_col1","_col2","_col4","_col5"]
                 <-Map 1 [SIMPLE_EDGE] llap
                   SHUFFLE [RS_18]
@@ -2275,36 +2275,36 @@ Stage-0
                 <-Reducer 4 [ONE_TO_ONE_EDGE] llap
                   FORWARD [RS_19]
                     PartitionCols:_col0
-                    Group By Operator [GBY_7] (rows=2 width=114)
+                    Group By Operator [GBY_7] (rows=1 width=114)
                       Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0
                     <-Map 1 [SIMPLE_EDGE] llap
                       SHUFFLE [RS_6]
                         PartitionCols:_col0
-                        Group By Operator [GBY_5] (rows=2 width=114)
+                        Group By Operator [GBY_5] (rows=1 width=114)
                           Output:["_col0","_col1","_col2"],aggregations:["count()","count(p_name)"],keys:p_mfgr
-                          Select Operator [SEL_4] (rows=8 width=223)
+                          Select Operator [SEL_4] (rows=4 width=223)
                             Output:["p_name","p_mfgr"]
-                            Filter Operator [FIL_29] (rows=8 width=223)
-                              predicate:((p_size < 10) and p_mfgr is not null)
+                            Filter Operator [FIL_29] (rows=4 width=223)
+                              predicate:((p_mfgr = p_mfgr) and (p_size < 10))
                                Please refer to the previous TableScan [TS_0]
             <-Reducer 5 [ONE_TO_ONE_EDGE] llap
               FORWARD [RS_22]
                 PartitionCols:_col0, _col1
-                Select Operator [SEL_17] (rows=4 width=223)
+                Select Operator [SEL_17] (rows=2 width=223)
                   Output:["_col0","_col1","_col2"]
-                  Filter Operator [FIL_16] (rows=4 width=219)
+                  Filter Operator [FIL_16] (rows=2 width=219)
                     predicate:_col0 is not null
-                    Group By Operator [GBY_14] (rows=4 width=219)
+                    Group By Operator [GBY_14] (rows=2 width=219)
                       Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
                     <-Map 1 [SIMPLE_EDGE] llap
                       SHUFFLE [RS_13]
                         PartitionCols:_col0, _col1
-                        Group By Operator [GBY_12] (rows=4 width=219)
+                        Group By Operator [GBY_12] (rows=2 width=219)
                           Output:["_col0","_col1"],keys:p_name, p_mfgr
-                          Select Operator [SEL_11] (rows=8 width=223)
+                          Select Operator [SEL_11] (rows=4 width=223)
                             Output:["p_name","p_mfgr"]
-                            Filter Operator [FIL_30] (rows=8 width=223)
-                              predicate:((p_size < 10) and p_mfgr is not null)
+                            Filter Operator [FIL_30] (rows=4 width=223)
+                              predicate:((p_mfgr = p_mfgr) and (p_size < 10))
                                Please refer to the previous TableScan [TS_0]
 
 PREHOOK: query: explain select p_name, p_size 
@@ -2463,7 +2463,7 @@ Stage-0
                               Select Operator [SEL_12] (rows=1 width=114)
                                 Output:["_col0","_col1"]
                                 Filter Operator [FIL_40] (rows=1 width=114)
-                                  predicate:(((_col2 - _col1) > 600.0) and _col1 is not null)
+                                  predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1))
                                   Group By Operator [GBY_10] (rows=5 width=114)
                                     Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
                                   <-Map 1 [SIMPLE_EDGE] llap
@@ -2482,7 +2482,7 @@ Stage-0
                         Select Operator [SEL_24] (rows=1 width=110)
                           Output:["_col0","_col1"]
                           Filter Operator [FIL_41] (rows=1 width=114)
-                            predicate:(((_col2 - _col1) > 600.0) and _col1 is not null)
+                            predicate:(((_col2 - _col1) > 600.0) and (_col1 = _col1))
                             Group By Operator [GBY_22] (rows=5 width=114)
                               Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"],keys:KEY._col0
                             <-Map 1 [SIMPLE_EDGE] llap

http://git-wip-us.apache.org/repos/asf/hive/blob/9375cf3c/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
index 840ecd4..53bbad2 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out
@@ -50,22 +50,22 @@ STAGE PLANS:
                   alias: a
                   Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
-                    predicate: ((value > 'val_9') and key is not null) (type: boolean)
-                    Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                    predicate: ((key = key) and (value = value) and (value > 'val_9')) (type: boolean)
+                    Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 41 Data size: 7298 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         keys: _col0 (type: string), _col1 (type: string)
                         mode: hash
                         outputColumnNames: _col0, _col1
-                        Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: string), _col1 (type: string)
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
-                          Statistics: Num rows: 83 Data size: 14774 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 20 Data size: 3560 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -78,10 +78,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: string)
                   1 _col0 (type: string), _col1 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 32 Data size: 5696 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 134 Data size: 23852 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 32 Data size: 5696 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1061,3 +1061,374 @@ POSTHOOK: query: drop table t
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@t
 POSTHOOK: Output: default@t
+PREHOOK: query: drop table if exists tx1
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tx1
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table tx1 (a integer,b integer)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tx1
+POSTHOOK: query: create table tx1 (a integer,b integer)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tx1
+PREHOOK: query: insert into tx1	values  (1, 1),
+                        (1, 2),
+                        (1, 3)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tx1
+POSTHOOK: query: insert into tx1	values  (1, 1),
+                        (1, 2),
+                        (1, 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tx1
+POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tx1.b EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: select count(*) as result,3 as expected from tx1 u
+    where exists (select * from tx1 v where u.a=v.a and u.b <> v.b)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+#### A masked pattern was here ####
+POSTHOOK: query: select count(*) as result,3 as expected from tx1 u
+    where exists (select * from tx1 v where u.a=v.a and u.b <> v.b)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+#### A masked pattern was here ####
+3	3
+PREHOOK: query: explain select count(*) as result,3 as expected from tx1 u
+    where exists (select * from tx1 v where u.a=v.a and u.b <> v.b)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select count(*) as result,3 as expected from tx1 u
+    where exists (select * from tx1 v where u.a=v.a and u.b <> v.b)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+        Reducer 4 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+        Reducer 5 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col1 (type: int)
+                  Group By Operator
+                    keys: a (type: int), b (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int), _col1 (type: int)
+                  1 _col0 (type: int), _col1 (type: int)
+                Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: bigint), 3 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col1, _col2, _col3
+                residual filter predicates: {(_col3 <> _col1)}
+                Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col2 (type: int), _col3 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: int), _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE
+        Reducer 5 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int), KEY._col1 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col1 (type: int)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: drop table tx1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tx1
+PREHOOK: Output: default@tx1
+POSTHOOK: query: drop table tx1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tx1
+POSTHOOK: Output: default@tx1
+PREHOOK: query: create table t1(i int, j int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1(i int, j int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 values(4,1)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 values(4,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.i EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t1.j EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: create table t2(i int, j int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2(i int, j int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: insert into t2 values(4,2),(4,3),(4,5)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@t2
+POSTHOOK: query: insert into t2 values(4,2),(4,3),(4,5)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@t2
+POSTHOOK: Lineage: t2.i EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: t2.j EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+        Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE)
+        Reducer 6 <- Map 5 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: i (type: int), j (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: t2
+                  Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: i (type: int), j (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: t1
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: j (type: int)
+                    mode: hash
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int)
+                      sort order: +
+                      Map-reduce partition columns: _col0 (type: int)
+                      Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int), _col1 (type: int)
+                  1 _col0 (type: int), _col1 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 3 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+        Reducer 4 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col2
+                residual filter predicates: {(_col1 <> _col2)}
+                Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col2 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    keys: _col0 (type: int), _col1 (type: int)
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: int), _col1 (type: int)
+                      sort order: ++
+                      Map-reduce partition columns: _col0 (type: int), _col1 (type: int)
+                      Statistics: Num rows: 3 Data size: 39 Basic stats: COMPLETE Column stats: NONE
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int)
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join MERGEJOIN[27][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product
+PREHOOK: query: select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from t1 where t1.i in (select t2.i from t2 where t2.j <> t1.j)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+4	1
+PREHOOK: query: drop table t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: drop table t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: drop table t2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t2
+PREHOOK: Output: default@t2
+POSTHOOK: query: drop table t2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: default@t2


Mime
View raw message