hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jcama...@apache.org
Subject [2/2] hive git commit: HIVE-17766: Support non-equi LEFT SEMI JOIN (Jesus Camacho Rodriguez, reviewed by Vineet Garg, Ashutosh Chauhan)
Date Tue, 31 Oct 2017 15:27:05 GMT
HIVE-17766: Support non-equi LEFT SEMI JOIN (Jesus Camacho Rodriguez, reviewed by Vineet Garg, Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5b8ffe2d
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5b8ffe2d
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5b8ffe2d

Branch: refs/heads/master
Commit: 5b8ffe2d9e7ad04a28b7255c38f0fa345da322b0
Parents: b57837a
Author: Jesus Camacho Rodriguez <jcamacho@apache.org>
Authored: Thu Oct 26 14:07:43 2017 -0700
Committer: Jesus Camacho Rodriguez <jcamacho@apache.org>
Committed: Tue Oct 31 08:25:32 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   2 +
 .../hadoop/hive/ql/exec/CommonJoinOperator.java |   7 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  20 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 174 +++--
 .../hive/ql/parse/TypeCheckProcFactory.java     |   7 +-
 .../clientpositive/nested_column_pruning.q      |  22 +
 ql/src/test/queries/clientpositive/semijoin6.q  |  51 ++
 ql/src/test/queries/clientpositive/semijoin7.q  |  52 ++
 .../results/clientpositive/llap/lineage3.q.out  |   2 +-
 .../results/clientpositive/llap/semijoin6.q.out | 750 +++++++++++++++++++
 .../results/clientpositive/llap/semijoin7.q.out | 726 ++++++++++++++++++
 .../clientpositive/nested_column_pruning.q.out  | 264 +++++++
 .../clientpositive/perf/spark/query83.q.out     |  48 +-
 .../clientpositive/spark/subquery_multi.q.out   |  50 +-
 14 files changed, 2111 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 4b521f0..462f332 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -610,6 +610,8 @@ minillaplocal.query.files=\
   schema_evol_text_vecrow_table.q,\
   selectDistinctStar.q,\
   semijoin.q,\
+  semijoin6.q,\
+  semijoin7.q,\
   semijoin_hint.q,\
   smb_cache.q,\
   special_character_in_tabnames_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
index 3573d07..05ebe06 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
@@ -605,9 +605,9 @@ public abstract class CommonJoinOperator<T extends JoinDesc> extends
         innerJoin(skip, left, right);
       } else if (type == JoinDesc.LEFT_SEMI_JOIN) {
         if (innerJoin(skip, left, right)) {
-          // if left-semi-join found a match, skipping the rest of the rows in the
-          // rhs table of the semijoin
-          done = true;
+          // if left-semi-join found a match and we do not have any additional predicates,
+          // skipping the rest of the rows in the rhs table of the semijoin
+          done = !needsPostEvaluation;
         }
       } else if (type == JoinDesc.LEFT_OUTER_JOIN ||
           (type == JoinDesc.FULL_OUTER_JOIN && rightNull)) {
@@ -641,6 +641,7 @@ public abstract class CommonJoinOperator<T extends JoinDesc> extends
             // This is only executed for outer joins with residual filters
             boolean forward = createForwardJoinObject(skipVectors[numAliases - 1]);
             producedRow |= forward;
+            done = (type == JoinDesc.LEFT_SEMI_JOIN) && forward;
             if (!rightNull &&
                     (type == JoinDesc.RIGHT_OUTER_JOIN || type == JoinDesc.FULL_OUTER_JOIN)) {
               if (forward) {

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 46493ac..80351be 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -113,6 +113,7 @@ import org.apache.calcite.sql.SqlLiteral;
 import org.apache.calcite.sql.SqlNode;
 import org.apache.calcite.sql.SqlOperator;
 import org.apache.calcite.sql.SqlWindow;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
 import org.apache.calcite.sql.parser.SqlParserPos;
 import org.apache.calcite.sql.type.SqlTypeName;
 import org.apache.calcite.tools.Frameworks;
@@ -2121,17 +2122,20 @@ public class CalcitePlanner extends SemanticAnalyzer {
         RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel,
             calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null);
 
-        if (!nonEquiConds.isAlwaysTrue()) {
-          throw new SemanticException("Non equality condition not supported in Semi-Join"
-              + nonEquiConds);
-        }
-
         RelNode[] inputRels = new RelNode[] { leftRel, rightRel };
         final List<Integer> leftKeys = new ArrayList<Integer>();
         final List<Integer> rightKeys = new ArrayList<Integer>();
-        calciteJoinCond = HiveCalciteUtil.projectNonColumnEquiConditions(
-            HiveRelFactories.HIVE_PROJECT_FACTORY, inputRels, leftJoinKeys, rightJoinKeys, 0,
-            leftKeys, rightKeys);
+        RexNode remainingEquiCond = HiveCalciteUtil.projectNonColumnEquiConditions(HiveRelFactories.HIVE_PROJECT_FACTORY,
+            inputRels, leftJoinKeys, rightJoinKeys, 0, leftKeys, rightKeys);
+        // Adjust right input fields in nonEquiConds if previous call modified the input
+        if (inputRels[0] != leftRel) {
+          nonEquiConds = RexUtil.shift(nonEquiConds, leftRel.getRowType().getFieldCount(),
+              inputRels[0].getRowType().getFieldCount() - leftRel.getRowType().getFieldCount());
+        }
+        calciteJoinCond = remainingEquiCond != null ?
+            RexUtil.composeConjunction(cluster.getRexBuilder(),
+                ImmutableList.of(remainingEquiCond, nonEquiConds), false) :
+            nonEquiConds;
         topRel = HiveSemiJoin.getSemiJoin(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
             inputRels[0], inputRels[1], calciteJoinCond, ImmutableIntList.copyOf(leftKeys),
             ImmutableIntList.copyOf(rightKeys));

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index e40ec5d..549b38d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -2827,15 +2827,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
   }
 
   /**
-   * Parse the join condition. If the condition is a join condition, throw an
-   * error if it is not an equality. Otherwise, break it into left and right
-   * expressions and store in the join tree. If the condition is a join filter,
-   * add it to the filter list of join tree. The join condition can contains
-   * conditions on both the left and tree trees and filters on either.
-   * Currently, we only support equi-joins, so we throw an error if the
-   * condition involves both subtrees and is not a equality. Also, we only
-   * support AND i.e ORs are not supported currently as their semantics are not
-   * very clear, may lead to data explosion and there is no usecase.
+   * Parse the join condition. For equality conjuncts, break them into left and
+   * right expressions and store in the join tree. For other conditions, either
+   * add them to the post-conditions if they apply to more than one input, add
+   * them to the filter conditions of a given input if it applies only on
+   * one of them and should not be pushed, e.g., left outer join with condition
+   * that applies only to left input, or push them below the join if they
+   * apply only to one input and can be pushed, e.g., left outer join with
+   * condition that applies only to right input.
    *
    * @param joinTree
    *          jointree to be populated
@@ -2855,6 +2854,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
     switch (joinCond.getToken().getType()) {
     case HiveParser.KW_OR:
+      parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(0),
+          new ArrayList<String>(), new ArrayList<String>(),
+          null, aliasToOpInfo);
+      parseJoinCondPopulateAlias(joinTree, (ASTNode) joinCond.getChild(1),
+          new ArrayList<String>(), new ArrayList<String>(),
+          null, aliasToOpInfo);
       joinTree.addPostJoinFilter(joinCond);
       break;
 
@@ -8148,6 +8153,9 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     HashMap<Byte, List<ExprNodeDesc>> filterMap =
         new HashMap<Byte, List<ExprNodeDesc>>();
 
+    // Only used for semijoin with residual predicates
+    List<ColumnInfo> topSelectInputColumns = new ArrayList<>();
+
     for (int pos = 0; pos < right.length; ++pos) {
       Operator<?> input = right[pos] == null ? left : right[pos];
       if (input == null) {
@@ -8167,7 +8175,10 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       Byte tag = (byte) rsDesc.getTag();
 
       // check whether this input operator produces output
-      if (omitOpts != null && omitOpts.contains(pos)) {
+      // If it has residual, we do not skip this output,
+      // we will add a Select on top of the join
+      if (omitOpts != null && omitOpts.contains(pos)
+          && join.getPostJoinFilters().size() == 0) {
         exprMap.put(tag, valueDesc);
         filterMap.put(tag, filterDesc);
         rightOps[pos] = input;
@@ -8211,6 +8222,11 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         valueDesc.add(desc);
         outputColumnNames.add(internalName);
         reversedExprs.put(internalName, tag);
+
+        // Populate semijoin select if needed
+        if (omitOpts == null || !omitOpts.contains(pos)) {
+          topSelectInputColumns.add(info);
+        }
       }
       for (ASTNode cond : join.getFilters().get(tag)) {
         filterDesc.add(genExprNodeDesc(cond, inputRR));
@@ -8232,8 +8248,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     desc.setFilterMap(join.getFilterMap());
     // Add filters that apply to more than one input
     if (join.getPostJoinFilters().size() != 0 &&
-            (!join.getNoOuterJoin() ||
-                    HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PUSH_RESIDUAL_INNER))) {
+            (!join.getNoOuterJoin() || !join.getNoSemiJoin()
+                    || HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PUSH_RESIDUAL_INNER))) {
       LOG.debug("Generate JOIN with post-filtering conditions");
       List<ExprNodeDesc> residualFilterExprs = new ArrayList<ExprNodeDesc>();
       for (ASTNode cond : join.getPostJoinFilters()) {
@@ -8256,7 +8272,34 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       }
       desc.setNullSafes(nullsafes);
     }
-    return putOpInsertMap(joinOp, outputRR);
+
+    Operator<?> topOp = putOpInsertMap(joinOp, outputRR);
+    if (omitOpts != null && !omitOpts.isEmpty()
+        && desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) {
+      // Adding a select operator to top of semijoin to ensure projection of only correct columns
+      final List<ExprNodeDesc> topSelectExprs = new ArrayList<>();
+      final List<String> topSelectOutputColNames = new ArrayList<>();
+      final RowResolver topSelectRR = new RowResolver();
+      final Map<String, ExprNodeDesc> topSelectColExprMap = new HashMap<String, ExprNodeDesc>();
+      for (ColumnInfo colInfo : topSelectInputColumns) {
+        ExprNodeColumnDesc columnExpr = new ExprNodeColumnDesc(colInfo);
+        topSelectExprs.add(columnExpr);
+        topSelectOutputColNames.add(colInfo.getInternalName());
+        topSelectColExprMap.put(colInfo.getInternalName(), columnExpr);
+        String[] nm = outputRR.reverseLookup(columnExpr.getColumn());
+        String[] nm2 = outputRR.getAlternateMappings(columnExpr.getColumn());
+        topSelectRR.put(nm[0], nm[1], colInfo);
+        if (nm2 != null) {
+          topSelectRR.addMappingOnly(nm2[0], nm2[1], colInfo);
+        }
+      }
+      final SelectDesc topSelect = new SelectDesc(topSelectExprs, topSelectOutputColNames);
+      topOp = putOpInsertMap(OperatorFactory.getAndMakeChild(topSelect,
+          new RowSchema(topSelectRR.getColumnInfos()), topOp), topSelectRR);
+      topOp.setColumnExprMap(topSelectColExprMap);
+    }
+
+    return topOp;
   }
 
   private ExprNodeDesc[][] genJoinKeys(QBJoinTree joinTree, Operator[] inputs)
@@ -8442,8 +8485,16 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
       srcOps[i] = genJoinReduceSinkChild(qb, joinKeys[i], srcOps[i], srcs, joinTree.getNextTag());
     }
 
-    JoinOperator joinOp = (JoinOperator) genJoinOperatorChildren(joinTree,
-      joinSrcOp, srcOps, omitOpts, joinKeys);
+    Operator<?> topOp = genJoinOperatorChildren(joinTree,
+        joinSrcOp, srcOps, omitOpts, joinKeys);
+    JoinOperator joinOp;
+    if (topOp instanceof JoinOperator) {
+      joinOp = (JoinOperator) topOp;
+    } else {
+      // We might generate a Select operator on top of the join operator for
+      // semijoin
+      joinOp = (JoinOperator) topOp.getParentOperators().get(0);
+    }
     joinOp.getConf().setQBJoinTreeProps(joinTree);
     joinContext.put(joinOp, joinTree);
 
@@ -8453,14 +8504,12 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         // Safety check for postconditions
         throw new SemanticException("Post-filtering conditions should have been added to the JOIN operator");
       }
-      Operator op = joinOp;
       for(ASTNode condn : joinTree.getPostJoinFilters()) {
-        op = genFilterPlan(qb, condn, op, false);
+        topOp = genFilterPlan(qb, condn, topOp, false);
       }
-      return op;
     }
 
-    return joinOp;
+    return topOp;
   }
 
   /**
@@ -8475,7 +8524,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
    * @throws SemanticException
    */
   private Operator insertSelectForSemijoin(ArrayList<ASTNode> fields,
-      Operator input) throws SemanticException {
+      Operator<?> input) throws SemanticException {
 
     RowResolver inputRR = opParseCtx.get(input).getRowResolver();
     ArrayList<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
@@ -8487,13 +8536,38 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     // construct the list of columns that need to be projected
     for (int i = 0; i < fields.size(); ++i) {
       ASTNode field = fields.get(i);
-      ExprNodeDesc exprNode = genExprNodeDesc(field, inputRR);
+      String[] nm;
+      String[] nm2;
+      ExprNodeDesc expr = genExprNodeDesc(field, inputRR);
+      if (expr instanceof ExprNodeColumnDesc) {
+        // In most of the cases, this is a column reference
+        ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) expr;
+        nm = inputRR.reverseLookup(columnExpr.getColumn());
+        nm2 = inputRR.getAlternateMappings(columnExpr.getColumn());
+      } else if (expr instanceof ExprNodeConstantDesc) {
+        // However, it can be a constant too. In that case, we need to track
+        // the column that it originated from in the input operator so we can
+        // propagate the aliases.
+        ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) expr;
+        String inputCol = constantExpr.getFoldedFromCol();
+        nm = inputRR.reverseLookup(inputCol);
+        nm2 = inputRR.getAlternateMappings(inputCol);
+      } else {
+        // We might generate other types that are not recognized, e.g., a field reference
+        // if it is a nested field, but since this is just an additional optimization,
+        // we bail out without introducing the Select + GroupBy below the right input
+        // of the left semijoin
+        return input;
+      }
       String colName = getColumnInternalName(i);
       outputColumnNames.add(colName);
-      ColumnInfo colInfo = new ColumnInfo(colName, exprNode.getTypeInfo(), "", false);
-      outputRR.putExpression(field, colInfo);
-      colList.add(exprNode);
-      colExprMap.put(colName, exprNode);
+      ColumnInfo colInfo = new ColumnInfo(colName, expr.getTypeInfo(), "", false);
+      outputRR.put(nm[0], nm[1], colInfo);
+      if (nm2 != null) {
+        outputRR.addMappingOnly(nm2[0], nm2[1], colInfo);
+      }
+      colList.add(expr);
+      colExprMap.put(colName, expr);
     }
 
     // create selection operator
@@ -8505,43 +8579,55 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return output;
   }
 
-  private Operator genMapGroupByForSemijoin(QB qb, ArrayList<ASTNode> fields, // the
-      // ASTNode
-      // of
-      // the
-      // join
-      // key
-      // "tab.col"
-      Operator inputOperatorInfo, GroupByDesc.Mode mode)
+  private Operator genMapGroupByForSemijoin(QB qb, ArrayList<ASTNode> fields,
+      Operator<?> input, GroupByDesc.Mode mode)
       throws SemanticException {
 
-    RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo)
+    RowResolver groupByInputRowResolver = opParseCtx.get(input)
         .getRowResolver();
     RowResolver groupByOutputRowResolver = new RowResolver();
     ArrayList<ExprNodeDesc> groupByKeys = new ArrayList<ExprNodeDesc>();
     ArrayList<String> outputColumnNames = new ArrayList<String>();
     ArrayList<AggregationDesc> aggregations = new ArrayList<AggregationDesc>();
     Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
-    qb.getParseInfo();
-
-    groupByOutputRowResolver.setIsExprResolver(true); // join keys should only
-    // be columns but not be
-    // expressions
 
     for (int i = 0; i < fields.size(); ++i) {
       // get the group by keys to ColumnInfo
       ASTNode colName = fields.get(i);
-      ExprNodeDesc grpByExprNode = genExprNodeDesc(colName,
-          groupByInputRowResolver);
+      String[] nm;
+      String[] nm2;
+      ExprNodeDesc grpByExprNode = genExprNodeDesc(colName, groupByInputRowResolver);
+      if (grpByExprNode instanceof ExprNodeColumnDesc) {
+        // In most of the cases, this is a column reference
+        ExprNodeColumnDesc columnExpr = (ExprNodeColumnDesc) grpByExprNode;
+        nm = groupByInputRowResolver.reverseLookup(columnExpr.getColumn());
+        nm2 = groupByInputRowResolver.getAlternateMappings(columnExpr.getColumn());
+      } else if (grpByExprNode instanceof ExprNodeConstantDesc) {
+        // However, it can be a constant too. In that case, we need to track
+        // the column that it originated from in the input operator so we can
+        // propagate the aliases.
+        ExprNodeConstantDesc constantExpr = (ExprNodeConstantDesc) grpByExprNode;
+        String inputCol = constantExpr.getFoldedFromCol();
+        nm = groupByInputRowResolver.reverseLookup(inputCol);
+        nm2 = groupByInputRowResolver.getAlternateMappings(inputCol);
+      } else {
+        // We might generate other types that are not recognized, e.g., a field reference
+        // if it is a nested field, but since this is just an additional optimization,
+        // we bail out without introducing the Select + GroupBy below the right input
+        // of the left semijoin
+        return input;
+      }
       groupByKeys.add(grpByExprNode);
-
       // generate output column names
       String field = getColumnInternalName(i);
       outputColumnNames.add(field);
       ColumnInfo colInfo2 = new ColumnInfo(field, grpByExprNode.getTypeInfo(),
           "", false);
+      groupByOutputRowResolver.put(nm[0], nm[1], colInfo2);
+      if (nm2 != null) {
+        groupByOutputRowResolver.addMappingOnly(nm2[0], nm2[1], colInfo2);
+      }
       groupByOutputRowResolver.putExpression(colName, colInfo2);
-
       // establish mapping from the output column to the input column
       colExprMap.put(field, grpByExprNode);
     }
@@ -8554,7 +8640,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
         new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
             false, groupByMemoryUsage, memoryThreshold, null, false, -1, false),
         new RowSchema(groupByOutputRowResolver.getColumnInfos()),
-        inputOperatorInfo), groupByOutputRowResolver);
+        input), groupByOutputRowResolver);
 
     op.setColumnExprMap(colExprMap);
     return op;

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index 4ddf1d5..67ea32c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.parse;
 import java.math.BigDecimal;
 import java.sql.Date;
 import java.sql.Timestamp;
-import java.time.ZoneId;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -49,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
@@ -57,7 +57,6 @@ import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter;
@@ -708,7 +707,9 @@ public class TypeCheckProcFactory {
         inspector instanceof PrimitiveObjectInspector) {
       PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
       Object constant = ((ConstantObjectInspector) inspector).getWritableConstantValue();
-      return new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant));
+      ExprNodeConstantDesc constantExpr = new ExprNodeConstantDesc(colInfo.getType(), poi.getPrimitiveJavaObject(constant));
+      constantExpr.setFoldedFromCol(colInfo.getInternalName());
+      return constantExpr;
     }
     // non-constant or non-primitive constants
     ExprNodeColumnDesc column = new ExprNodeColumnDesc(colInfo);

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/test/queries/clientpositive/nested_column_pruning.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/nested_column_pruning.q b/ql/src/test/queries/clientpositive/nested_column_pruning.q
index 80a791a..aaadebd 100644
--- a/ql/src/test/queries/clientpositive/nested_column_pruning.q
+++ b/ql/src/test/queries/clientpositive/nested_column_pruning.q
@@ -1,5 +1,6 @@
 set hive.fetch.task.conversion = none;
 set hive.exec.dynamic.partition.mode = nonstrict;
+set hive.strict.checks.cartesian.product=false;
 
 -- First, create source tables
 DROP TABLE IF EXISTS dummy;
@@ -123,6 +124,27 @@ FROM nested_tbl_1 t1 JOIN nested_tbl_1 t2
 ON t1.s1.f3.f4 = t2.s1.f6
 WHERE t2.s2.f8.f9 == TRUE;
 
+EXPLAIN SELECT t1.s1.f3.f5
+FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
+ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE;
+SELECT t1.s1.f3.f5
+FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
+ON t1.s1.f3.f4 = t2.s1.f6 AND t2.s2.f8.f9 == TRUE;
+
+EXPLAIN SELECT t1.s1.f3.f5
+FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
+ON t1.s1.f1 <> t2.s2.f8.f9;
+SELECT t1.s1.f3.f5
+FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
+ON t1.s1.f1 <> t2.s2.f8.f9;
+
+EXPLAIN SELECT t1.s1.f3.f5
+FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
+ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9;
+SELECT t1.s1.f3.f5
+FROM nested_tbl_1 t1 LEFT SEMI JOIN nested_tbl_1 t2
+ON t1.s1.f3.f4 = t2.s1.f6 AND t1.s1.f1 <> t2.s2.f8.f9;
+
 -- Testing insert with aliases
 
 DROP TABLE IF EXISTS nested_tbl_3;

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/test/queries/clientpositive/semijoin6.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin6.q b/ql/src/test/queries/clientpositive/semijoin6.q
new file mode 100644
index 0000000..f90d757
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/semijoin6.q
@@ -0,0 +1,51 @@
+set hive.mapred.mode=nonstrict;
+-- SORT_QUERY_RESULTS
+
+create table tx1 (a integer,b integer);
+insert into tx1	values (1, 105), (2, 203), (3, 300), (4, 400), (null, 400), (null, null);
+
+create table tx2 (a int, b int);
+insert into tx2 values (1, 105), (1, 1900), (2, 1995), (2, 1996), (4, 400), (4, null);
+
+explain
+select * from tx1 u left semi join tx2 v on u.a=v.a;
+
+select * from tx1 u left semi join tx2 v on u.a=v.a;
+
+explain
+select * from tx1 u left semi join tx2 v on u.b <=> v.b;
+
+select * from tx1 u left semi join tx2 v on u.b <=> v.b;
+
+explain
+select * from tx1 u left semi join tx2 v on u.b <> v.b;
+
+select * from tx1 u left semi join tx2 v on u.b <> v.b;
+
+explain
+select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b;
+
+select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b;
+
+explain
+select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b;
+
+select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b;
+
+explain
+select * from tx1 u left semi join tx1 v on u.a=v.a;
+
+select * from tx1 u left semi join tx1 v on u.a=v.a;
+
+explain
+select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a;
+
+select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a;

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/test/queries/clientpositive/semijoin7.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/semijoin7.q b/ql/src/test/queries/clientpositive/semijoin7.q
new file mode 100644
index 0000000..a0e24fb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/semijoin7.q
@@ -0,0 +1,52 @@
+set hive.cbo.enable=false;
+set hive.mapred.mode=nonstrict;
+-- SORT_QUERY_RESULTS
+
+create table tx1 (a integer,b integer);
+insert into tx1	values (1, 105), (2, 203), (3, 300), (4, 400), (null, 400), (null, null);
+
+create table tx2 (a int, b int);
+insert into tx2 values (1, 105), (1, 1900), (2, 1995), (2, 1996), (4, 400), (4, null);
+
+explain
+select * from tx1 u left semi join tx2 v on u.a=v.a;
+
+select * from tx1 u left semi join tx2 v on u.a=v.a;
+
+explain
+select * from tx1 u left semi join tx2 v on u.b <=> v.b;
+
+select * from tx1 u left semi join tx2 v on u.b <=> v.b;
+
+explain
+select * from tx1 u left semi join tx2 v on u.b <> v.b;
+
+select * from tx1 u left semi join tx2 v on u.b <> v.b;
+
+explain
+select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b;
+
+select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b;
+
+explain
+select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b;
+
+select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b;
+
+explain
+select * from tx1 u left semi join tx1 v on u.a=v.a;
+
+select * from tx1 u left semi join tx1 v on u.a=v.a;
+
+explain
+select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a;
+
+select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a;

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index 2c53bec..66cc6ad 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -178,7 +178,7 @@ PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
 PREHOOK: Input: default@src1
 #### A masked pattern was here ####
-{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = (. (tok_table_or_col $hdt$_1) key))","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltyp
 esorc.ctinyint"}]}
+{"version":"1.0","engine":"tez","database":"default","hash":"8bf193b0658183be94e2428a79d91d10","queryText":"select * from src1 a\nwhere exists\n  (select cint from alltypesorc b\n   where a.key = b.ctinyint + 300)\nand key > 300","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(UDFToDouble(a.key) > 300.0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0,1],"expression":"(a.key = a.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(UDFToDouble((UDFToInteger(b.ctinyint) + 300)) = UDFToDouble(a.key))","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"a.key"},{"id":1,"vertexType":"COLUMN","vertexId":"a.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"}]}
 311	val_311
 Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: select key, value from src1

http://git-wip-us.apache.org/repos/asf/hive/blob/5b8ffe2d/ql/src/test/results/clientpositive/llap/semijoin6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/semijoin6.q.out b/ql/src/test/results/clientpositive/llap/semijoin6.q.out
new file mode 100644
index 0000000..31d2ebb
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/semijoin6.q.out
@@ -0,0 +1,750 @@
+PREHOOK: query: create table tx1 (a integer,b integer)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tx1
+POSTHOOK: query: create table tx1 (a integer,b integer)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tx1
+PREHOOK: query: insert into tx1	values (1, 105), (2, 203), (3, 300), (4, 400), (null, 400), (null, null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tx1
+POSTHOOK: query: insert into tx1	values (1, 105), (2, 203), (3, 300), (4, 400), (null, 400), (null, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tx1
+POSTHOOK: Lineage: tx1.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tx1.b EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: create table tx2 (a int, b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tx2
+POSTHOOK: query: create table tx2 (a int, b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tx2
+PREHOOK: query: insert into tx2 values (1, 105), (1, 1900), (2, 1995), (2, 1996), (4, 400), (4, null)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tx2
+POSTHOOK: query: insert into tx2 values (1, 105), (1, 1900), (2, 1995), (2, 1996), (4, 400), (4, null)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tx2
+POSTHOOK: Lineage: tx2.a EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: tx2.b EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.a=v.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.a=v.a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from tx1 u left semi join tx2 v on u.a=v.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx2 v on u.a=v.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+1	105
+2	203
+4	400
+PREHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.b <=> v.b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.b <=> v.b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Reduce Output Operator
+                    key expressions: b (type: int)
+                    sort order: +
+                    Map-reduce partition columns: b (type: int)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: a (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: b (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 b (type: int)
+                  1 _col0 (type: int)
+                nullSafes: [true]
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from tx1 u left semi join tx2 v on u.b <=> v.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx2 v on u.b <=> v.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+1	105
+4	400
+NULL	400
+NULL	NULL
+Warning: Shuffle Join MERGEJOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.b <> v.b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.b <> v.b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: b (type: int)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col2
+                residual filter predicates: {(_col1 <> _col2)}
+                Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 36 Data size: 468 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join MERGEJOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: select * from tx1 u left semi join tx2 v on u.b <> v.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx2 v on u.b <> v.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+1	105
+2	203
+3	300
+4	400
+NULL	400
+PREHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int), _col1 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                residual filter predicates: {(_col1 <> _col3)}
+                Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx2 v on u.a=v.a and u.b <> v.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+1	105
+2	203
+Warning: Shuffle Join MERGEJOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (XPROD_EDGE), Map 3 (XPROD_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: int), _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Select Operator
+                    expressions: a (type: int), b (type: int)
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Group By Operator
+                      keys: _col0 (type: int), _col1 (type: int)
+                      mode: hash
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        sort order: 
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: int), _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 
+                  1 
+                outputColumnNames: _col0, _col1, _col2, _col3
+                residual filter predicates: {((_col0 = _col2) or (_col1 <> _col3))}
+                Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 36 Data size: 612 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+Warning: Shuffle Join MERGEJOIN[12][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+PREHOOK: query: select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx2 v on u.a=v.a or u.b <> v.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+1	105
+2	203
+3	300
+4	400
+NULL	400
+PREHOOK: query: explain
+select * from tx1 u left semi join tx1 v on u.a=v.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx1 v on u.a=v.a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0
+                        Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from tx1 u left semi join tx1 v on u.a=v.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx1 v on u.a=v.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+#### A masked pattern was here ####
+1	105
+2	203
+3	300
+4	400
+PREHOOK: query: explain
+select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: u
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: int)
+                      outputColumnNames: _col0, _col1
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: int)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: int)
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: v
+                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Filter Operator
+                    predicate: a is not null (type: boolean)
+                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Select Operator
+                      expressions: a (type: int), b (type: int)
+                      outputColumnNames: _col0, _col3
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Group By Operator
+                        keys: _col0 (type: int), _col3 (type: int), _col3 (type: int), _col3 (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                          value expressions: _col1 (type: int)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Left Semi Join 0 to 1
+                keys:
+                  0 _col0 (type: int)
+                  1 _col0 (type: int)
+                outputColumnNames: _col0, _col1, _col3
+                residual filter predicates: {((_col0 + _col3) > 400)} {(CASE WHEN ((_col0 > 3)) THEN (true) WHEN ((_col3 > 1900)) THEN (true) ELSE (false) END or ((COALESCE(_col0) + COALESCE(_col3)) > 1900))}
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Select Operator
+                  expressions: _col0 (type: int), _col1 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tx1
+PREHOOK: Input: default@tx2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from tx1 u left semi join tx2 v
+on (u.a + v.b > 400)
+  and ((case when u.a > 3 then true when v.b > 1900 then true else false end)
+      or (coalesce(u.a) + coalesce(v.b) > 1900))
+  and u.a = v.a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tx1
+POSTHOOK: Input: default@tx2
+#### A masked pattern was here ####
+1	105
+2	203
+4	400


Mime
View raw message