hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject [18/21] hive git commit: HIVE-15192 : Use Calcite to de-correlate and plan subqueries (Vineet Garg via Ashutosh Chauhan)
Date Fri, 16 Dec 2016 18:28:31 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
new file mode 100644
index 0000000..5f58aae
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -0,0 +1,329 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptRuleOperand;
+import org.apache.calcite.plan.RelOptUtil;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.Filter;
+import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.RelFactories;
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.rex.LogicVisitor;
+import org.apache.calcite.rex.RexInputRef;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexShuttle;
+import org.apache.calcite.rex.RexSubQuery;
+import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.tools.RelBuilderFactory;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveSubQRemoveRelBuilder;
+
+/**
+ * NOTE: this rule is replicated from Calcite's SubqueryRemoveRule
+ * Transform that converts IN, EXISTS and scalar sub-queries into joins.
+ * TODO:
+ *  Reason this is replicated instead of using Calcite's is
+ *    Calcite creates null literal with null type but hive needs it to be properly typed
+ *    Need fix for Calcite-1493
+ *
+ * <p>Sub-queries are represented by {@link RexSubQuery} expressions.
+ *
+ * <p>A sub-query may or may not be correlated. If a sub-query is correlated,
+ * the wrapped {@link RelNode} will contain a {@link RexCorrelVariable} before
+ * the rewrite, and the product of the rewrite will be a {@link Correlate}.
+ * The Correlate can be removed using {@link RelDecorrelator}.
+ */
+public abstract class HiveSubQueryRemoveRule extends RelOptRule{
+
+    public static final HiveSubQueryRemoveRule FILTER =
+            new HiveSubQueryRemoveRule(
+                    operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE,
+                            any()),
+                    HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter") {
+                public void onMatch(RelOptRuleCall call) {
+                    final Filter filter = call.rel(0);
+                    //final RelBuilder builder = call.builder();
+                    final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
+                    final RexSubQuery e =
+                            RexUtil.SubQueryFinder.find(filter.getCondition());
+                    assert e != null;
+                    final RelOptUtil.Logic logic =
+                            LogicVisitor.find(RelOptUtil.Logic.TRUE,
+                                    ImmutableList.of(filter.getCondition()), e);
+                    builder.push(filter.getInput());
+                    final int fieldCount = builder.peek().getRowType().getFieldCount();
+                    final RexNode target = apply(e, filter.getVariablesSet(), logic,
+                            builder, 1, fieldCount);
+                    final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+                    builder.filter(shuttle.apply(filter.getCondition()));
+                    builder.project(fields(builder, filter.getRowType().getFieldCount()));
+                    call.transformTo(builder.build());
+                }
+            };
+
+    private HiveSubQueryRemoveRule(RelOptRuleOperand operand,
+                               RelBuilderFactory relBuilderFactory,
+                               String description) {
+        super(operand, relBuilderFactory, description);
+    }
+
+    protected RexNode apply(RexSubQuery e, Set<CorrelationId> variablesSet,
+                            RelOptUtil.Logic logic,
+                            HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
+        switch (e.getKind()) {
+            case SCALAR_QUERY:
+                builder.push(e.rel);
+                final RelMetadataQuery mq = RelMetadataQuery.instance();
+                final Boolean unique = mq.areColumnsUnique(builder.peek(),
+                        ImmutableBitSet.of());
+                if (unique == null || !unique) {
+                    builder.aggregate(builder.groupKey(),
+                            builder.aggregateCall(SqlStdOperatorTable.SINGLE_VALUE, false, null,
+                                    null, builder.field(0)));
+                }
+                builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+                return field(builder, inputCount, offset);
+
+            case IN:
+            case EXISTS:
+                // Most general case, where the left and right keys might have nulls, and
+                // caller requires 3-valued logic return.
+                //
+                // select e.deptno, e.deptno in (select deptno from emp)
+                //
+                // becomes
+                //
+                // select e.deptno,
+                //   case
+                //   when ct.c = 0 then false
+                //   when dt.i is not null then true
+                //   when e.deptno is null then null
+                //   when ct.ck < ct.c then null
+                //   else false
+                //   end
+                // from e
+                // left join (
+                //   (select count(*) as c, count(deptno) as ck from emp) as ct
+                //   cross join (select distinct deptno, true as i from emp)) as dt
+                //   on e.deptno = dt.deptno
+                //
+                // If keys are not null we can remove "ct" and simplify to
+                //
+                // select e.deptno,
+                //   case
+                //   when dt.i is not null then true
+                //   else false
+                //   end
+                // from e
+                // left join (select distinct deptno, true as i from emp) as dt
+                //   on e.deptno = dt.deptno
+                //
+                // We could further simplify to
+                //
+                // select e.deptno,
+                //   dt.i is not null
+                // from e
+                // left join (select distinct deptno, true as i from emp) as dt
+                //   on e.deptno = dt.deptno
+                //
+                // but have not yet.
+                //
+                // If the logic is TRUE we can just kill the record if the condition
+                // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner
+                // join:
+                //
+                // select e.deptno,
+                //   true
+                // from e
+                // inner join (select distinct deptno from emp) as dt
+                //   on e.deptno = dt.deptno
+                //
+
+                builder.push(e.rel);
+                final List<RexNode> fields = new ArrayList<>();
+                switch (e.getKind()) {
+                    case IN:
+                        fields.addAll(builder.fields());
+                }
+
+                // First, the cross join
+                switch (logic) {
+                    case TRUE_FALSE_UNKNOWN:
+                    case UNKNOWN_AS_TRUE:
+                        // Since EXISTS/NOT EXISTS are not affected by presence of
+                        // null keys we do not need to generate count(*), count(c)
+                        if (e.getKind() == SqlKind.EXISTS) {
+                            logic = RelOptUtil.Logic.TRUE_FALSE;
+                            break;
+                        }
+                        builder.aggregate(builder.groupKey(),
+                                builder.count(false, "c"),
+                                builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck",
+                                        builder.fields()));
+                        builder.as("ct");
+                        if( !variablesSet.isEmpty())
+                        {
+                            //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
+                            builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+                        }
+                        else
+                            builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
+
+                        offset += 2;
+                        builder.push(e.rel);
+                        break;
+                }
+
+                // Now the left join
+                switch (logic) {
+                    case TRUE:
+                        if (fields.isEmpty()) {
+                            builder.project(builder.alias(builder.literal(true), "i"));
+                            builder.aggregate(builder.groupKey(0));
+                        } else {
+                            builder.aggregate(builder.groupKey(fields));
+                        }
+                        break;
+                    default:
+                        fields.add(builder.alias(builder.literal(true), "i"));
+                        builder.project(fields);
+                        builder.distinct();
+                }
+                builder.as("dt");
+                final List<RexNode> conditions = new ArrayList<>();
+                for (Pair<RexNode, RexNode> pair
+                        : Pair.zip(e.getOperands(), builder.fields())) {
+                    conditions.add(
+                            builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
+                }
+                switch (logic) {
+                    case TRUE:
+                        builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet);
+                        return builder.literal(true);
+                }
+                builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
+
+                final List<RexNode> keyIsNulls = new ArrayList<>();
+                for (RexNode operand : e.getOperands()) {
+                    if (operand.getType().isNullable()) {
+                        keyIsNulls.add(builder.isNull(operand));
+                    }
+                }
+                final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
+                switch (logic) {
+                    case TRUE_FALSE_UNKNOWN:
+                    case UNKNOWN_AS_TRUE:
+                        operands.add(
+                                builder.equals(builder.field("ct", "c"), builder.literal(0)),
+                                builder.literal(false));
+                        //now that we are using LEFT OUTER JOIN to join inner count, count(*)
+                        // with outer table, we wouldn't be able to tell if count is zero
+                        // for inner table since inner join with correlated values will get rid
+                        // of all values where join cond is not true (i.e where actual inner table
+                        // will produce zero result). To  handle this case we need to check both
+                        // count is zero or count is null
+                        operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false));
+                        break;
+                }
+                operands.add(builder.isNotNull(builder.field("dt", "i")),
+                        builder.literal(true));
+                if (!keyIsNulls.isEmpty()) {
+                    //Calcite creates null literal with Null type here but because HIVE doesn't support null type
+                    // it is appropriately typed boolean
+                    operands.add(builder.or(keyIsNulls), e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN));
+                    // we are creating filter here so should not be returning NULL. Not sure why Calcite return NULL
+                    //operands.add(builder.or(keyIsNulls), builder.literal(false));
+                }
+                Boolean b = true;
+                switch (logic) {
+                    case TRUE_FALSE_UNKNOWN:
+                        b = null;
+                        // fall through
+                    case UNKNOWN_AS_TRUE:
+                        operands.add(
+                                builder.call(SqlStdOperatorTable.LESS_THAN,
+                                        builder.field("ct", "ck"), builder.field("ct", "c")),
+                                builder.literal(b));
+                        break;
+                }
+                operands.add(builder.literal(false));
+                return builder.call(SqlStdOperatorTable.CASE, operands.build());
+
+            default:
+                throw new AssertionError(e.getKind());
+        }
+    }
+
+    /** Returns a reference to a particular field, by offset, across several
+     * inputs on a {@link RelBuilder}'s stack. */
+    private RexInputRef field(HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
+        for (int inputOrdinal = 0;;) {
+            final RelNode r = builder.peek(inputCount, inputOrdinal);
+            if (offset < r.getRowType().getFieldCount()) {
+                return builder.field(inputCount, inputOrdinal, offset);
+            }
+            ++inputOrdinal;
+            offset -= r.getRowType().getFieldCount();
+        }
+    }
+
+    /** Returns a list of expressions that project the first {@code fieldCount}
+     * fields of the top input on a {@link RelBuilder}'s stack. */
+    private static List<RexNode> fields(HiveSubQRemoveRelBuilder builder, int fieldCount) {
+        final List<RexNode> projects = new ArrayList<>();
+        for (int i = 0; i < fieldCount; i++) {
+            projects.add(builder.field(i));
+        }
+        return projects;
+    }
+
+    /** Shuttle that replaces occurrences of a given
+     * {@link org.apache.calcite.rex.RexSubQuery} with a replacement
+     * expression. */
+    private static class ReplaceSubQueryShuttle extends RexShuttle {
+        private final RexSubQuery subQuery;
+        private final RexNode replacement;
+
+        public ReplaceSubQueryShuttle(RexSubQuery subQuery, RexNode replacement) {
+            this.subQuery = subQuery;
+            this.replacement = replacement;
+        }
+
+        @Override public RexNode visitSubQuery(RexSubQuery subQuery) {
+            return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery;
+        }
+    }
+}
+
+// End SubQueryRemoveRule.java

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
index d494c9f..73a9b0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/HiveOpConverter.java
@@ -266,6 +266,11 @@ public class HiveOpConverter {
     TableScanOperator ts = (TableScanOperator) OperatorFactory.get(
         semanticAnalyzer.getOpContext(), tsd, new RowSchema(colInfos));
 
+    //now that we let Calcite process subqueries we might have more than one
+    // tablescan with same alias.
+    if(topOps.get(tableAlias) != null) {
+      tableAlias = tableAlias +  this.uniqueCounter ;
+    }
     topOps.put(tableAlias, ts);
 
     if (LOG.isDebugEnabled()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 679cec1..d36eb0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -32,12 +32,14 @@ import org.apache.calcite.avatica.util.TimeUnit;
 import org.apache.calcite.avatica.util.TimeUnitRange;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.rex.RexBuilder;
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexUtil;
+import org.apache.calcite.rex.RexSubQuery;
 import org.apache.calcite.sql.SqlCollation;
 import org.apache.calcite.sql.SqlIntervalQualifier;
 import org.apache.calcite.sql.SqlKind;
@@ -54,6 +56,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
 import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
@@ -68,6 +71,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseBinary;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
@@ -117,22 +121,43 @@ public class RexNodeConverter {
   private final ImmutableList<InputCtx> inputCtxs;
   private final boolean                 flattenExpr;
 
+  //outerRR belongs to outer query and is required to resolve correlated references
+  private final RowResolver             outerRR;
+  private final ImmutableMap<String, Integer> outerNameToPosMap;
+  private int correlatedId;
+
   //Constructor used by HiveRexExecutorImpl
   public RexNodeConverter(RelOptCluster cluster) {
     this(cluster, new ArrayList<InputCtx>(), false);
   }
 
+  //subqueries will need outer query's row resolver
+  public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
+                          ImmutableMap<String, Integer> outerNameToPosMap,
+      ImmutableMap<String, Integer> nameToPosMap, RowResolver hiveRR, RowResolver outerRR, int offset, boolean flattenExpr, int correlatedId) {
+    this.cluster = cluster;
+    this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, hiveRR , offset));
+    this.flattenExpr = flattenExpr;
+    this.outerRR = outerRR;
+    this.outerNameToPosMap = outerNameToPosMap;
+    this.correlatedId = correlatedId;
+  }
+
   public RexNodeConverter(RelOptCluster cluster, RelDataType inpDataType,
       ImmutableMap<String, Integer> nameToPosMap, int offset, boolean flattenExpr) {
     this.cluster = cluster;
     this.inputCtxs = ImmutableList.of(new InputCtx(inpDataType, nameToPosMap, null, offset));
     this.flattenExpr = flattenExpr;
+    this.outerRR = null;
+    this.outerNameToPosMap = null;
   }
 
   public RexNodeConverter(RelOptCluster cluster, List<InputCtx> inpCtxLst, boolean flattenExpr) {
     this.cluster = cluster;
     this.inputCtxs = ImmutableList.<InputCtx> builder().addAll(inpCtxLst).build();
     this.flattenExpr = flattenExpr;
+    this.outerRR  = null;
+    this.outerNameToPosMap = null;
   }
 
   public RexNode convert(ExprNodeDesc expr) throws SemanticException {
@@ -144,12 +169,42 @@ public class RexNodeConverter {
       return convert((ExprNodeColumnDesc) expr);
     } else if (expr instanceof ExprNodeFieldDesc) {
       return convert((ExprNodeFieldDesc) expr);
+    } else if(expr instanceof  ExprNodeSubQueryDesc) {
+      return convert((ExprNodeSubQueryDesc) expr);
     } else {
       throw new RuntimeException("Unsupported Expression");
     }
     // TODO: handle ExprNodeColumnListDesc
   }
 
+  private RexNode convert(final ExprNodeSubQueryDesc subQueryDesc) throws  SemanticException {
+    if(subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.IN) {
+     /*
+      * Check.5.h :: For In and Not In the SubQuery must implicitly or
+      * explicitly only contain one select item.
+      */
+      if(subQueryDesc.getRexSubQuery().getRowType().getFieldCount() > 1) {
+        throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+                "SubQuery can contain only 1 item in Select List."));
+      }
+      //create RexNode for LHS
+      RexNode rexNodeLhs = convert(subQueryDesc.getSubQueryLhs());
+
+      //create RexSubQuery node
+      RexNode rexSubQuery = RexSubQuery.in(subQueryDesc.getRexSubQuery(),
+                                              ImmutableList.<RexNode>of(rexNodeLhs) );
+      return  rexSubQuery;
+    }
+    else if( subQueryDesc.getType() == ExprNodeSubQueryDesc.SubqueryType.EXISTS) {
+      RexNode subQueryNode = RexSubQuery.exists(subQueryDesc.getRexSubQuery());
+      return subQueryNode;
+    }
+    else {
+      throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
+              "Currently only IN and EXISTS type of subqueries are supported"));
+    }
+  }
+
   private RexNode convert(final ExprNodeFieldDesc fieldDesc) throws SemanticException {
     RexNode rexNode = convert(fieldDesc.getDesc());
     if (rexNode.getType().isStruct()) {
@@ -420,7 +475,7 @@ public class RexNodeConverter {
   private InputCtx getInputCtx(ExprNodeColumnDesc col) throws SemanticException {
     InputCtx ctxLookingFor = null;
 
-    if (inputCtxs.size() == 1) {
+    if (inputCtxs.size() == 1 && inputCtxs.get(0).hiveRR == null) {
       ctxLookingFor = inputCtxs.get(0);
     } else {
       String tableAlias = col.getTabAlias();
@@ -443,7 +498,21 @@ public class RexNodeConverter {
   }
 
   protected RexNode convert(ExprNodeColumnDesc col) throws SemanticException {
+    //if this is co-rrelated we need to make RexCorrelVariable(with id and type)
+    // id and type should be retrieved from outerRR
     InputCtx ic = getInputCtx(col);
+    if(ic == null) {
+      // we have correlated column, build data type from outer rr
+      RelDataType rowType = TypeConverter.getType(cluster, this.outerRR, null);
+      if (this.outerNameToPosMap.get(col.getColumn()) == null) {
+        throw new SemanticException(ErrorMsg.INVALID_COLUMN_NAME.getMsg(col.getColumn()));
+      }
+
+      int pos = this.outerNameToPosMap.get(col.getColumn());
+      CorrelationId colCorr = new CorrelationId(this.correlatedId);
+      RexNode corExpr = cluster.getRexBuilder().makeCorrel(rowType, colCorr);
+      return cluster.getRexBuilder().makeFieldAccess(corExpr, pos);
+    }
     int pos = ic.hiveNameToPosMap.get(col.getColumn());
     return cluster.getRexBuilder().makeInputRef(
         ic.calciteInpDataType.getFieldList().get(pos).getType(), pos + ic.offsetInCalciteSchema);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 07155fd..278f5b6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -24,9 +24,11 @@ import java.lang.reflect.UndeclaredThrowableException;
 import java.math.BigDecimal;
 import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
+import java.util.ArrayDeque;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Collections;
+import java.util.Deque;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -109,6 +111,7 @@ import org.apache.calcite.util.CompositeList;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.ImmutableIntList;
 import org.apache.calcite.util.Pair;
+import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -136,6 +139,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HivePlannerContext;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelDecorrelator;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRexExecutorImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl;
 import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -192,6 +196,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSubQueryRemoveRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule;
 import org.apache.hadoop.hive.ql.optimizer.calcite.rules.views.HiveMaterializedViewFilterScanRule;
@@ -208,7 +213,6 @@ import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec;
 import org.apache.hadoop.hive.ql.parse.QBExpr.Opcode;
-import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowExpressionSpec;
@@ -242,6 +246,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableList.Builder;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
+import com.google.common.math.IntMath;
 
 public class CalcitePlanner extends SemanticAnalyzer {
 
@@ -332,6 +337,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
         boolean reAnalyzeAST = false;
         final boolean materializedView = getQB().isMaterializedView();
 
+        // currently semi-join optimization doesn't work with subqueries
+        // so this will be turned off for if we find subqueries and will later be
+        // restored to its original state
+        boolean originalSemiOptVal = this.conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION);
         try {
           if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) {
             sinkOp = getOptimizedHiveOPDag();
@@ -431,6 +440,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
             super.genResolvedParseTree(ast, new PlannerContext());
             skipCalcitePlan = true;
           }
+          // restore semi-join opt flag
+          this.conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, originalSemiOptVal);
         }
       } else {
         this.ctx.setCboInfo("Plan not optimized by CBO.");
@@ -1000,6 +1011,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
     private final ColumnAccessInfo columnAccessInfo;
     private Map<HiveProject, Table> viewProjectToTableSchema;
 
+    //correlated vars across subqueries within same query needs to have different ID
+    // this will be used in RexNodeConverter to create cor var
+    private int subqueryId;
+
     // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or
     // just last one.
     LinkedHashMap<RelNode, RowResolver>                   relToHiveRR                   = new LinkedHashMap<RelNode, RowResolver>();
@@ -1015,6 +1030,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       RelNode calciteGenPlan = null;
       RelNode calcitePreCboPlan = null;
       RelNode calciteOptimizedPlan = null;
+      subqueryId = -1;
 
       /*
        * recreate cluster, so that it picks up the additional traitDef
@@ -1038,7 +1054,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // 1. Gen Calcite Plan
       perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER);
       try {
-        calciteGenPlan = genLogicalPlan(getQB(), true);
+        calciteGenPlan = genLogicalPlan(getQB(), true, null, null);
         resultSchema = SemanticAnalyzer.convertRowSchemaToResultSetSchema(
             relToHiveRR.get(calciteGenPlan),
             HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES));
@@ -1062,6 +1078,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // Create executor
       Executor executorProvider = new HiveRexExecutorImpl(optCluster);
 
+      //Remove subquery
+      LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+      calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
+              HiveSubQueryRemoveRule.FILTER);
+      LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
+
+      calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan);
+      LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan));
+
       // 2. Apply pre-join order optimizations
       calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan,
               mdProvider.getMetadataProvider(), executorProvider);
@@ -2018,8 +2043,10 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
 
     private RelNode genFilterRelNode(ASTNode filterExpr, RelNode srcRel,
+            ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR,
             boolean useCaching) throws SemanticException {
-      ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel), useCaching);
+      ExprNodeDesc filterCondn = genExprNodeDesc(filterExpr, relToHiveRR.get(srcRel),
+              outerRR, null, useCaching);
       if (filterCondn instanceof ExprNodeConstantDesc
           && !filterCondn.getTypeString().equals(serdeConstants.BOOLEAN_TYPE_NAME)) {
         // queries like select * from t1 where 'foo';
@@ -2035,7 +2062,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
       ImmutableMap<String, Integer> hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap
           .get(srcRel);
       RexNode convertedFilterExpr = new RexNodeConverter(cluster, srcRel.getRowType(),
-          hiveColNameCalcitePosMap, 0, true).convert(filterCondn);
+          outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel), outerRR,
+              0, true, subqueryId).convert(filterCondn);
       RexNode factoredFilterExpr = RexUtil
           .pullFactors(cluster.getRexBuilder(), convertedFilterExpr);
       RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
@@ -2047,66 +2075,46 @@ public class CalcitePlanner extends SemanticAnalyzer {
       return filterRel;
     }
 
-    private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
-        Map<String, RelNode> aliasToRel, boolean forHavingClause) throws SemanticException {
-      /*
-       * Handle Subquery predicates.
-       *
-       * Notes (8/22/14 hb): Why is this a copy of the code from {@link
-       * #genFilterPlan} - for now we will support the same behavior as non CBO
-       * route. - but plan to allow nested SubQueries(Restriction.9.m) and
-       * multiple SubQuery expressions(Restriction.8.m). This requires use to
-       * utilize Calcite's Decorrelation mechanics, and for Calcite to fix/flush
-       * out Null semantics(CALCITE-373) - besides only the driving code has
-       * been copied. Most of the code which is SubQueryUtils and QBSubQuery is
-       * reused.
-       */
-      int numSrcColumns = srcRel.getRowType().getFieldCount();
-      List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
-      if (subQueriesInOriginalTree.size() > 0) {
+    private void subqueryRestritionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
+                                         boolean forHavingClause, Map<String, RelNode> aliasToRel ) throws SemanticException {
+        List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
+        if (subQueriesInOriginalTree.size() > 0) {
 
         /*
          * Restriction.9.m :: disallow nested SubQuery expressions.
          */
-        if (qb.getSubQueryPredicateDef() != null) {
-          throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
-              subQueriesInOriginalTree.get(0), "Nested SubQuery expressions are not supported."));
-        }
+          if (qb.getSubQueryPredicateDef() != null) {
+            throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                    subQueriesInOriginalTree.get(0),
+                    "Nested SubQuery expressions are not supported."));
+          }
 
         /*
          * Restriction.8.m :: We allow only 1 SubQuery expression per Query.
          */
-        if (subQueriesInOriginalTree.size() > 1) {
+          if (subQueriesInOriginalTree.size() > 1) {
 
-          throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
-              subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
-        }
-
-        /*
-         * Clone the Search AST; apply all rewrites on the clone.
-         */
-        ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
-        List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
-
-        RowResolver inputRR = relToHiveRR.get(srcRel);
-        RowResolver outerQBRR = inputRR;
-        ImmutableMap<String, Integer> outerQBPosMap = relToHiveColNameCalcitePosMap.get(srcRel);
-
-        for (int i = 0; i < subQueries.size(); i++) {
-          ASTNode subQueryAST = subQueries.get(i);
-          ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i);
+            throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                    subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported."));
+          }
 
+          //we do not care about the transformation or rewriting of AST
+          // which following statement does
+          // we only care about the restriction checks they perform.
+          // We plan to get rid of these restrictions later
           int sqIdx = qb.incrNumSubQueryPredicates();
+          ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(0);
+
+          ASTNode clonedSearchCond = (ASTNode) SubQueryUtils.adaptor.dupTree(searchCond);
+          List<ASTNode> subQueries = SubQueryUtils.findSubQueries(clonedSearchCond);
+          ASTNode subQueryAST = subQueries.get(0);
           clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST);
 
           QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST,
-              originalSubQueryAST, ctx);
+                  originalSubQueryAST, ctx);
+
+          RowResolver inputRR = relToHiveRR.get(srcRel);
 
-          if (!forHavingClause) {
-            qb.setWhereClauseSubQueryPredicate(subQuery);
-          } else {
-            qb.setHavingClauseSubQueryPredicate(subQuery);
-          }
           String havingInputAlias = null;
 
           if (forHavingClause) {
@@ -2115,78 +2123,78 @@ public class CalcitePlanner extends SemanticAnalyzer {
           }
 
           subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias,
-              aliasToRel.keySet());
-
-          QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
-          qbSQ.setSubQueryDef(subQuery.getSubQuery());
-          Phase1Ctx ctx_1 = initPhase1Ctx();
-          doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1, null);
-          getMetaData(qbSQ);
-          RelNode subQueryRelNode = genLogicalPlan(qbSQ, false);
-          aliasToRel.put(subQuery.getAlias(), subQueryRelNode);
-          RowResolver sqRR = relToHiveRR.get(subQueryRelNode);
-
-          /*
-           * Check.5.h :: For In and Not In the SubQuery must implicitly or
-           * explicitly only contain one select item.
-           */
-          if (subQuery.getOperator().getType() != SubQueryType.EXISTS
-              && subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
-              && sqRR.getColumnInfos().size() - subQuery.getNumOfCorrelationExprsAddedToSQSelect() > 1) {
-            throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(subQueryAST,
-                "SubQuery can contain only 1 item in Select List."));
-          }
-
-          /*
-           * If this is a Not In SubQuery Predicate then Join in the Null Check
-           * SubQuery. See QBSubQuery.NotInCheck for details on why and how this
-           * is constructed.
-           */
-          if (subQuery.getNotInCheck() != null) {
-            QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
-            notInCheck.setSQRR(sqRR);
-            QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
-            qbSQ_nic.setSubQueryDef(notInCheck.getSubQuery());
-            ctx_1 = initPhase1Ctx();
-            doPhase1(notInCheck.getSubQueryAST(), qbSQ_nic, ctx_1, null);
-            getMetaData(qbSQ_nic);
-            RelNode subQueryNICRelNode = genLogicalPlan(qbSQ_nic, false);
-            aliasToRel.put(notInCheck.getAlias(), subQueryNICRelNode);
-            srcRel = genJoinRelNode(srcRel, subQueryNICRelNode,
-            // set explicitly to inner until we figure out SemiJoin use
-            // notInCheck.getJoinType(),
-                JoinType.INNER, notInCheck.getJoinConditionAST());
-            inputRR = relToHiveRR.get(srcRel);
-            if (forHavingClause) {
-              aliasToRel.put(havingInputAlias, srcRel);
-            }
-          }
-
-          /*
-           * Gen Join between outer Operator and SQ op
-           */
-          subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias);
-          srcRel = genJoinRelNode(srcRel, subQueryRelNode, subQuery.getJoinType(),
-              subQuery.getJoinConditionAST());
-          searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond);
-
-          srcRel = genFilterRelNode(searchCond, srcRel, forHavingClause);
-
-          /*
-           * For Not Exists and Not In, add a projection on top of the Left
-           * Outer Join.
-           */
-          if (subQuery.getOperator().getType() != SubQueryType.NOT_EXISTS
-              || subQuery.getOperator().getType() != SubQueryType.NOT_IN) {
-            srcRel = projectLeftOuterSide(srcRel, numSrcColumns);
+                  aliasToRel.keySet());
+
+          // Missing Check: Check.5.h :: For In and Not In the SubQuery must implicitly or
+          // explicitly only contain one select item.
+        }
+    }
+    private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause,
+                                       Map<ASTNode, RelNode> subQueryToRelNode,
+                                       Map<String, RelNode> aliasToRel) throws SemanticException {
+
+        //disallow subqueries which HIVE doesn't currently support
+        subqueryRestritionCheck(qb, node, srcRel, forHavingClause, aliasToRel);
+        Deque<ASTNode> stack = new ArrayDeque<ASTNode>();
+        stack.push(node);
+
+        boolean isSubQuery = false;
+
+        while (!stack.isEmpty()) {
+          ASTNode next = stack.pop();
+
+          switch(next.getType()) {
+            case HiveParser.TOK_SUBQUERY_EXPR:
+              String sbQueryAlias = "sq_" + qb.incrNumSubQueryPredicates();
+              QB qbSQ = new QB(qb.getId(), sbQueryAlias, true);
+              Phase1Ctx ctx1 = initPhase1Ctx();
+              doPhase1((ASTNode)next.getChild(1), qbSQ, ctx1, null);
+              getMetaData(qbSQ);
+              subqueryId++;
+              RelNode subQueryRelNode = genLogicalPlan(qbSQ, false,  relToHiveColNameCalcitePosMap.get(srcRel),
+                      relToHiveRR.get(srcRel));
+              subQueryToRelNode.put(next, subQueryRelNode);
+              isSubQuery = true;
+              break;
+            default:
+              int childCount = next.getChildCount();
+              for(int i = childCount - 1; i >= 0; i--) {
+                stack.push((ASTNode) next.getChild(i));
+              }
           }
-        }
-        relToHiveRR.put(srcRel, outerQBRR);
-        relToHiveColNameCalcitePosMap.put(srcRel, outerQBPosMap);
-        return srcRel;
       }
-
-      return genFilterRelNode(searchCond, srcRel, forHavingClause);
+      return isSubQuery;
+    }
+    private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel,
+        Map<String, RelNode> aliasToRel, ImmutableMap<String, Integer> outerNameToPosMap,
+        RowResolver outerRR, boolean forHavingClause) throws SemanticException {
+
+      Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
+      boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause,
+                                                subQueryToRelNode, aliasToRel);
+      if(isSubQuery) {
+        ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel),
+                outerRR, subQueryToRelNode, forHavingClause);
+
+        ImmutableMap<String, Integer> hiveColNameCalcitePosMap = this.relToHiveColNameCalcitePosMap
+                .get(srcRel);
+        RexNode convertedFilterLHS = new RexNodeConverter(cluster, srcRel.getRowType(),
+                outerNameToPosMap, hiveColNameCalcitePosMap, relToHiveRR.get(srcRel),
+                outerRR, 0, true, subqueryId).convert(subQueryExpr);
+
+        RelNode filterRel = new HiveFilter(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
+                srcRel, convertedFilterLHS);
+
+        this.relToHiveColNameCalcitePosMap.put(filterRel, this.relToHiveColNameCalcitePosMap
+                .get(srcRel));
+        relToHiveRR.put(filterRel, relToHiveRR.get(srcRel));
+
+        // semi-join opt doesn't work with subqueries
+        conf.setBoolVar(ConfVars.SEMIJOIN_CONVERSION, false);
+        return filterRel;
+      } else {
+        return genFilterRelNode(searchCond, srcRel, outerNameToPosMap, outerRR, forHavingClause);
+      }
     }
 
     private RelNode projectLeftOuterSide(RelNode srcRel, int numColumns) throws SemanticException {
@@ -2213,14 +2221,15 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
 
     private RelNode genFilterLogicalPlan(QB qb, RelNode srcRel, Map<String, RelNode> aliasToRel,
-        boolean forHavingClause) throws SemanticException {
+              ImmutableMap<String, Integer> outerNameToPosMap, RowResolver outerRR,
+                                         boolean forHavingClause) throws SemanticException {
       RelNode filterRel = null;
 
       Iterator<ASTNode> whereClauseIterator = getQBParseInfo(qb).getDestToWhereExpr().values()
           .iterator();
       if (whereClauseIterator.hasNext()) {
         filterRel = genFilterRelNode(qb, (ASTNode) whereClauseIterator.next().getChild(0), srcRel,
-            aliasToRel, forHavingClause);
+            aliasToRel, outerNameToPosMap, outerRR, forHavingClause);
       }
 
       return filterRel;
@@ -3521,7 +3530,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
     private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException {
       switch (qbexpr.getOpcode()) {
       case NULLOP:
-        return genLogicalPlan(qbexpr.getQB(), false);
+        return genLogicalPlan(qbexpr.getQB(), false, null, null);
       case UNION:
       case INTERSECT:
       case INTERSECTALL:
@@ -3536,7 +3545,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
     }
 
-    private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException {
+    private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
+                                   ImmutableMap<String, Integer> outerNameToPosMap,
+                                   RowResolver outerRR) throws SemanticException {
       RelNode srcRel = null;
       RelNode filterRel = null;
       RelNode gbRel = null;
@@ -3609,7 +3620,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
 
       // 2. Build Rel for where Clause
-      filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false);
+      filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, outerNameToPosMap, outerRR, false);
       srcRel = (filterRel == null) ? srcRel : filterRel;
       RelNode starSrcRel = srcRel;
 
@@ -3717,7 +3728,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
           // Special handling of grouping function
           targetNode = rewriteGroupingFunctionAST(getGroupByForClause(qbp, destClauseName), targetNode);
         }
-        gbFilter = genFilterRelNode(qb, targetNode, srcRel, aliasToRel, true);
+        gbFilter = genFilterRelNode(qb, targetNode, srcRel, aliasToRel, null, null, true);
       }
 
       return gbFilter;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
index 3458fb6..21ddae6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
@@ -538,16 +538,6 @@ public class QBSubQuery implements ISubQueryJoinInfo {
     }
     if ( sharedAlias != null) {
       ASTNode whereClause = SubQueryUtils.subQueryWhere(insertClause);
-
-      if ( whereClause != null ) {
-        ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(whereClause);
-        if ( u != null ) {
-          subQueryAST.setOrigin(originalSQASTOrigin);
-          throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
-              u, "SubQuery cannot use the table alias: " + sharedAlias + "; " +
-                  "this is also an alias in the Outer Query and SubQuery contains a unqualified column reference"));
-        }
-      }
     }
 
     /*

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 1f6dfdd..a861263 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -51,6 +51,7 @@ import org.antlr.runtime.tree.TreeVisitor;
 import org.antlr.runtime.tree.TreeVisitorAction;
 import org.antlr.runtime.tree.TreeWizard;
 import org.antlr.runtime.tree.TreeWizard.ContextVisitor;
+import org.apache.calcite.rel.RelNode;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -11325,6 +11326,17 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return genExprNodeDesc(expr, input, true, false);
   }
 
+  public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input,
+                                      RowResolver outerRR, Map<ASTNode, RelNode> subqueryToRelNode,
+                                      boolean useCaching) throws SemanticException {
+
+    TypeCheckCtx tcCtx = new TypeCheckCtx(input, useCaching, false);
+    tcCtx.setOuterRR(outerRR);
+    tcCtx.setSubqueryToRelNode(subqueryToRelNode);
+    return genExprNodeDesc(expr, input, tcCtx);
+  }
+
+
   public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, boolean useCaching)
       throws SemanticException {
     return genExprNodeDesc(expr, input, useCaching, false);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
index 02896ff..9753f9e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java
@@ -18,9 +18,11 @@
 
 package org.apache.hadoop.hive.ql.parse;
 
+import org.apache.calcite.rel.RelNode;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
+import java.util.Map;
 
 /**
  * This class implements the context information that is used for typechecking
@@ -35,6 +37,19 @@ public class TypeCheckCtx implements NodeProcessorCtx {
    */
   private RowResolver inputRR;
 
+  /**
+   * RowResolver of outer query. This is used to resolve co-rrelated columns in Filter
+   * TODO:
+   *  this currently will only be able to resolve reference to parent query's column
+   *  this will not work for references to grand-parent column
+   */
+  private RowResolver outerRR;
+
+  /**
+   * Map from astnode of a subquery to it's logical plan.
+   */
+  private Map<ASTNode, RelNode> subqueryToRelNode;
+
   private final boolean useCaching;
 
   private final boolean foldExpr;
@@ -50,7 +65,7 @@ public class TypeCheckCtx implements NodeProcessorCtx {
   private String error;
 
   /**
-   * The node that generated the potential typecheck error
+   * The node that generated the potential typecheck error.
    */
   private ASTNode errorSrcNode;
 
@@ -104,6 +119,8 @@ public class TypeCheckCtx implements NodeProcessorCtx {
     this.allowWindowing = allowWindowing;
     this.allowIndexExpr = allowIndexExpr;
     this.allowSubQueryExpr = allowSubQueryExpr;
+    this.outerRR = null;
+    this.subqueryToRelNode = null;
   }
 
   /**
@@ -122,6 +139,36 @@ public class TypeCheckCtx implements NodeProcessorCtx {
   }
 
   /**
+   * @param outerRR
+   *          the outerRR to set
+   */
+  public void setOuterRR(RowResolver outerRR) {
+    this.outerRR = outerRR;
+  }
+
+  /**
+   * @return the outerRR
+   */
+  public RowResolver getOuterRR() {
+    return outerRR;
+  }
+
+  /**
+   * @param subqueryToRelNode
+   *          the subqueryToRelNode to set
+   */
+  public void setSubqueryToRelNode(Map<ASTNode, RelNode> subqueryToRelNode) {
+    this.subqueryToRelNode = subqueryToRelNode;
+  }
+
+  /**
+   * @return the outerRR
+   */
+  public Map<ASTNode, RelNode> getSubqueryToRelNode() {
+    return subqueryToRelNode;
+  }
+
+  /**
    * @param unparseTranslator
    *          the unparseTranslator to set
    */

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
index ace3eaf..6c30efd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java
@@ -30,6 +30,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import org.apache.calcite.rel.RelNode;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.math.NumberUtils;
 import org.apache.hadoop.hive.common.type.HiveChar;
@@ -43,7 +44,6 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
 import org.apache.hadoop.hive.ql.lib.Dispatcher;
 import org.apache.hadoop.hive.ql.lib.GraphWalker;
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.lib.Rule;
 import org.apache.hadoop.hive.ql.lib.RuleRegExp;
+import org.apache.hadoop.hive.ql.lib.ExpressionWalker;
 import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnListDesc;
@@ -60,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeSubQueryDesc;
 import org.apache.hadoop.hive.ql.udf.SettableUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare;
@@ -134,7 +136,10 @@ public class TypeCheckProcFactory {
     ASTNode expr = (ASTNode) nd;
     TypeCheckCtx ctx = (TypeCheckCtx) procCtx;
 
-    if (!ctx.isUseCaching()) {
+    // bypass only if outerRR is not null. Otherwise we need to look for expressions in outerRR for
+    // subqueries e.g. select min(b.value) from table b group by b.key
+    //                                  having key in (select .. where a = min(b.value)
+    if (!ctx.isUseCaching() && ctx.getOuterRR() == null) {
       return null;
     }
 
@@ -147,6 +152,12 @@ public class TypeCheckProcFactory {
 
     // If the current subExpression is pre-calculated, as in Group-By etc.
     ColumnInfo colInfo = input.getExpression(expr);
+
+    // try outer row resolver
+    RowResolver outerRR = ctx.getOuterRR();
+    if(colInfo == null && outerRR != null) {
+        colInfo = outerRR.getExpression(expr);
+    }
     if (colInfo != null) {
       desc = new ExprNodeColumnDesc(colInfo);
       ASTNode source = input.getExpressionSource(expr);
@@ -201,14 +212,14 @@ public class TypeCheckProcFactory {
         + HiveParser.TOK_INTERVAL_SECOND_LITERAL + "%"), tf.getIntervalExprProcessor());
     opRules.put(new RuleRegExp("R7", HiveParser.TOK_TABLE_OR_COL + "%"),
         tf.getColumnExprProcessor());
-    opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_OP + "%"),
+    opRules.put(new RuleRegExp("R8", HiveParser.TOK_SUBQUERY_EXPR + "%"),
         tf.getSubQueryExprProcessor());
 
     // The dispatcher fires the processor corresponding to the closest matching
     // rule and passes the context along
     Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(),
         opRules, tcCtx);
-    GraphWalker ogw = new DefaultGraphWalker(disp);
+    GraphWalker ogw = new ExpressionWalker(disp);
 
     // Create a list of top nodes
     ArrayList<Node> topNodes = Lists.<Node>newArrayList(expr);
@@ -618,6 +629,13 @@ public class TypeCheckProcFactory {
       boolean isTableAlias = input.hasTableAlias(tableOrCol);
       ColumnInfo colInfo = input.get(null, tableOrCol);
 
+      // try outer row resolver
+      if(ctx.getOuterRR() != null && colInfo == null && !isTableAlias) {
+        RowResolver outerRR = ctx.getOuterRR();
+        isTableAlias = outerRR.hasTableAlias(tableOrCol);
+        colInfo = outerRR.get(null, tableOrCol);
+      }
+
       if (isTableAlias) {
         if (colInfo != null) {
           if (parent != null && parent.getType() == HiveParser.DOT) {
@@ -1158,6 +1176,12 @@ public class TypeCheckProcFactory {
       }
       ColumnInfo colInfo = input.get(tableAlias, colName);
 
+      // Try outer Row resolver
+      if(colInfo == null && ctx.getOuterRR() != null) {
+        RowResolver outerRR = ctx.getOuterRR();
+        colInfo = outerRR.get(tableAlias, colName);
+      }
+
       if (colInfo == null) {
         ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr);
         return null;
@@ -1222,6 +1246,10 @@ public class TypeCheckProcFactory {
         return null;
       }
 
+      if(expr.getType() == HiveParser.TOK_SUBQUERY_OP || expr.getType() == HiveParser.TOK_QUERY) {
+        return null;
+      }
+
       if (expr.getType() == HiveParser.TOK_TABNAME) {
         return null;
       }
@@ -1379,11 +1407,47 @@ public class TypeCheckProcFactory {
         return desc;
       }
 
+      //TOK_SUBQUERY_EXPR should have either 2 or 3 children
+      assert(expr.getChildren().size() == 3 || expr.getChildren().size() == 2);
+      //First child should be operand
+      assert(expr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP);
+
+      ASTNode subqueryOp = (ASTNode) expr.getChild(0);
+
+      boolean isIN = (subqueryOp.getChild(0).getType() == HiveParser.KW_IN
+              || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTIN);
+      boolean isEXISTS = (subqueryOp.getChild(0).getType() == HiveParser.KW_EXISTS
+              || subqueryOp.getChild(0).getType() == HiveParser.TOK_SUBQUERY_OP_NOTEXISTS);
+
+      // subqueryToRelNode might be null if subquery expression anywhere other than
+      //  as expected in filter (where/having). We should throw an appropriate error
+      // message
+
+      Map<ASTNode, RelNode> subqueryToRelNode = ctx.getSubqueryToRelNode();
+      if(subqueryToRelNode == null) {
+        throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                        " Currently SubQuery expressions are only allowed as " +
+                                "Where and Having Clause predicates"));
+      }
+
+      //For now because subquery is only supported in filter
+      // we will create subquery expression of boolean type
+      if(isEXISTS) {
+        return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr),
+                ExprNodeSubQueryDesc.SubqueryType.EXISTS);
+      }
+      if(isIN) {
+        assert(nodeOutputs[2] != null);
+        ExprNodeDesc lhs = (ExprNodeDesc)nodeOutputs[2];
+        return new ExprNodeSubQueryDesc(TypeInfoFactory.booleanTypeInfo, subqueryToRelNode.get(expr),
+                ExprNodeSubQueryDesc.SubqueryType.IN, lhs);
+      }
+
       /*
        * Restriction.1.h :: SubQueries only supported in the SQL Where Clause.
        */
       ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode,
-          "Currently SubQuery expressions are only allowed as Where Clause predicates"),
+          "Currently only IN & EXISTS SubQuery expressions are allowed"),
           sqNode);
       return null;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java
new file mode 100644
index 0000000..aec331b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeSubQueryDesc.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.calcite.rel.RelNode;
+
+/**
+ * This encapsulate subquery expression which consists of
+ *  Relnode for subquery.
+ *  type (IN, EXISTS )
+ *  LHS operand
+ */
+public class ExprNodeSubQueryDesc extends ExprNodeDesc implements Serializable {
+  private static final long serialVersionUID = 1L;
+
+  public static enum SubqueryType{
+    IN,
+    EXISTS,
+  };
+
+  public static final int IN=1;
+  public static final int EXISTS=2;
+
+  /**
+   * RexNode corresponding to subquery.
+   */
+  private RelNode rexSubQuery;
+  private ExprNodeDesc subQueryLhs;
+  private SubqueryType type;
+
+  public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery, SubqueryType type) {
+    super(typeInfo);
+    this.rexSubQuery = subQuery;
+    this.subQueryLhs = null;
+    this.type = type;
+  }
+  public ExprNodeSubQueryDesc(TypeInfo typeInfo, RelNode subQuery,
+                              SubqueryType type, ExprNodeDesc lhs) {
+    super(typeInfo);
+    this.rexSubQuery = subQuery;
+    this.subQueryLhs = lhs;
+    this.type = type;
+
+  }
+
+  public SubqueryType getType() {
+    return type;
+  }
+
+  public ExprNodeDesc getSubQueryLhs() {
+    return subQueryLhs;
+  }
+
+  public RelNode getRexSubQuery() {
+    return rexSubQuery;
+  }
+
+  @Override
+  public ExprNodeDesc clone() {
+    return new ExprNodeSubQueryDesc(typeInfo, rexSubQuery, type, subQueryLhs);
+  }
+
+  @Override
+  public boolean isSame(Object o) {
+    if (!(o instanceof ExprNodeSubQueryDesc)) {
+      return false;
+    }
+    ExprNodeSubQueryDesc dest = (ExprNodeSubQueryDesc) o;
+    if (subQueryLhs != null && dest.getSubQueryLhs() != null) {
+      if (!subQueryLhs.equals(dest.getSubQueryLhs())) {
+        return false;
+      }
+    }
+    if (!typeInfo.equals(dest.getTypeInfo())) {
+      return false;
+    }
+    if (!rexSubQuery.equals(dest.getRexSubQuery())) {
+      return false;
+    }
+    if(type != dest.getType()) {
+      return false;
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_corr_from.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_from.q b/ql/src/test/queries/clientnegative/subquery_corr_from.q
new file mode 100644
index 0000000..7df52c9
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_from.q
@@ -0,0 +1,3 @@
+-- corr var are only supported in where/having clause
+
+select * from part po where p_size IN (select p_size from (select p_size, p_type from part pp where pp.p_brand = po.p_brand) p where p.p_type=po.p_type) ;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q b/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q
new file mode 100644
index 0000000..3519c5b
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_grandparent.q
@@ -0,0 +1,5 @@
+-- inner query can only refer to it's parent query columns
+select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where x.p_name = z.p_name))
+;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_corr_select.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_corr_select.q b/ql/src/test/queries/clientnegative/subquery_corr_select.q
new file mode 100644
index 0000000..a47cee1
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_corr_select.q
@@ -0,0 +1,2 @@
+-- correlated var is only allowed in where/having
+explain select * from part po where p_size IN (select po.p_size from part p where p.p_type=po.p_type) ;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientnegative/subquery_restrictions.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_restrictions.q b/ql/src/test/queries/clientnegative/subquery_restrictions.q
new file mode 100644
index 0000000..80870d7
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_restrictions.q
@@ -0,0 +1,92 @@
+--Restriction.1.h SubQueries only supported in the SQL Where Clause.
+select src.key in (select key from src s1 where s1.key > '9') 
+from src;
+
+select count(*) 
+from src 
+group by src.key in (select key from src s1 where s1.key > '9') ;
+
+--Restriction.2.h The subquery can only be the RHS of an expression
+----curently paser doesn't allow such queries
+--select * from part where (select p_size from part) IN (1,2);
+
+--Restriction.3.m The predicate operators supported are In, Not In, exists and Not exists.
+----select * from part where p_brand > (select key from src);  
+
+--Check.4.h For Exists and Not Exists, the Sub Query must have 1 or more correlated predicates.
+select * from src where exists (select * from part);
+
+--Check.5.h multiple columns in subquery select
+select * from src where src.key in (select * from src s1 where s1.key > '9');
+
+--Restriction.6.m The LHS in a SubQuery must have all its Column References be qualified
+--This is not restriction anymore
+
+--Restriction 7.h subquery with or condition
+select count(*) 
+from src 
+where src.key in (select key from src s1 where s1.key > '9') or src.value is not null
+;
+
+--Restriction.8.m We allow only 1 SubQuery expression per Query
+select * from part where p_size IN (select p_size from part) AND p_brand IN (select p_brand from part);
+
+--Restriction 9.m nested subquery
+select *
+from part x 
+where x.p_name in (select y.p_name from part y where exists (select z.p_name from part z where y.p_name = z.p_name))
+;
+
+--Restriction.10.h In a SubQuery references to Parent Query columns is only supported in the where clause.
+select * from part where p_size in (select p.p_size + part.p_size from part p);
+select * from part where part.p_size IN (select min(p_size) from part p group by part.p_type);
+
+
+--Restriction.11.m A SubQuery predicate that refers to a Parent Query column must be a valid Join predicate
+select * from part where p_size in (select p_size from part p where p.p_type > part.p_type);
+select * from part where part.p_size IN (select min(p_size) from part p where NOT(part.p_type = p.p_type));
+
+
+--Check.12.h SubQuery predicates cannot only refer to Parent Query columns
+select * from part where p_name IN (select p_name from part p where part.p_type <> 1);
+
+--Restriction.13.m In the case of an implied Group By on a correlated Sub- Query, the SubQuery always returns 1 row. For e.g. a count on an empty set is 0, while all other UDAFs return null. Converting such a SubQuery into a Join by computing all Groups in one shot, changes the semantics: the Group By SubQuery output will not contain rows for Groups that don’t exist.
+select * 
+from src b 
+where exists 
+  (select count(*) 
+  from src a 
+  where b.value = a.value  and a.key = b.key and a.value > 'val_9'
+  )
+;
+
+--Restriction.14.h Correlated Sub Queries cannot contain Windowing clauses.
+select p_mfgr, p_name, p_size 
+from part a 
+where a.p_size in 
+  (select first_value(p_size) over(partition by p_mfgr order by p_size) 
+   from part b 
+   where a.p_brand = b.p_brand)
+;
+
+--Restriction 15.h all unqualified column references in a SubQuery will resolve to table sources within the SubQuery.
+select *
+from src
+where src.key in (select key from src where key > '9')
+;
+
+----------------------------------------------------------------
+-- Following tests does not fall under any restrictions per-se, they just currently don't work with HIVE
+----------------------------------------------------------------
+
+-- correlated var which refers to outer query join table 
+explain select p.p_partkey, li.l_suppkey from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey where li.l_linenumber = 1 and li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_partkey = p.l_partkey) ;
+
+-- union, not in, corr
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);
+
+-- union, not in, corr, cor var in both queries
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part pp where pp.p_mfgr = part.p_comment);
+
+-- IN, union, corr
+explain select * from part where p_name IN (select p_name from part p where p.p_mfgr = part.p_comment UNION ALL select p_brand from part);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
index cbfb5d5..8936073 100644
--- a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
+++ b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q
@@ -1,5 +1,6 @@
 set hive.cbo.returnpath.hiveop=true;
 set hive.stats.fetch.column.stats=true;
+set hive.enable.semijoin.conversion=true;
 ;
 
 set hive.exec.reducers.max = 1;

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/join31.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/join31.q b/ql/src/test/queries/clientpositive/join31.q
index c79105f..aa17b4d 100644
--- a/ql/src/test/queries/clientpositive/join31.q
+++ b/ql/src/test/queries/clientpositive/join31.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+set hive.enable.semijoin.conversion=true;
 -- SORT_QUERY_RESULTS
 
 CREATE TABLE dest_j1(key STRING, cnt INT);

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/multiMapJoin2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/multiMapJoin2.q b/ql/src/test/queries/clientpositive/multiMapJoin2.q
index cf5dbb0..c66dc66 100644
--- a/ql/src/test/queries/clientpositive/multiMapJoin2.q
+++ b/ql/src/test/queries/clientpositive/multiMapJoin2.q
@@ -3,6 +3,7 @@ set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.PostExecutePrinter,org.
 set hive.auto.convert.join=true;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=6000;
+set hive.enable.semijoin.conversion=true;
 
 -- we will generate one MR job.
 EXPLAIN

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/subquery_in.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q
index c01ae70..5b22dce 100644
--- a/ql/src/test/queries/clientpositive/subquery_in.q
+++ b/ql/src/test/queries/clientpositive/subquery_in.q
@@ -1,5 +1,6 @@
 set hive.mapred.mode=nonstrict;
 set hive.explain.user=false;
+
 -- SORT_QUERY_RESULTS
 
 -- non agg, non corr
@@ -118,3 +119,104 @@ from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li o
 where li.l_linenumber = 1 and
  li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber)
 ;
+
+
+--where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+select * from part where p_brand <> 'Brand#14' AND p_size IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+
+--lhs contains non-simple expression
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type);
+select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type);
+
+explain select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
+select * from part where (p_partkey*p_size) IN (select min(p_partkey) from part group by p_type);
+
+--lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
+select count(*) as c from part as e where p_size + 100 IN (select p_partkey from part where p_name = e.p_name);
+
+-- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
+select * from part  where floor(p_retailprice) IN (select floor(min(p_retailprice)) from part group by p_type);
+
+explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+
+-- correlated query, multiple correlated variables referring to different outer var
+explain select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+select * from part where p_name IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+
+-- correlated var refers to outer table alias
+explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
+select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type IN (select p_type from part where part.p_brand = fpart.brand);
+ 
+-- correlated var refers to outer table alias which is an expression 
+explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
+select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type IN (select p_type from part where (part.p_size+1) = fpart.size);
+
+-- where plus having
+explain select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- where with having, correlated
+explain select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- subquery with order by
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
+select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand;
+
+--order by with limit
+explain select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+select * from part  where (p_size-1) IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+
+-- union, uncorr
+explain select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
+select * from src where key IN (select p_name from part UNION ALL select p_brand from part);
+
+-- corr, subquery has another subquery in from
+explain select p_mfgr, b.p_name, p_size from part b where b.p_name in 
+  (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
+select p_mfgr, b.p_name, p_size from part b where b.p_name in 
+  (select p_name from (select p_mfgr, p_name, p_size as r from part) a where r < 10 and b.p_mfgr = a.p_mfgr ) order by p_mfgr,p_size;
+
+-- join in subquery, correlated predicate with only one table
+explain select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
+select p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size);
+
+-- join in subquery, correlated predicate with both inner tables, same outer var
+explain select p_partkey from part where p_name in 
+	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
+select p_partkey from part where p_name in 
+	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_size=part.p_size);
+
+-- join in subquery, correlated predicate with both inner tables, different outer var
+explain select p_partkey from part where p_name in 
+	(select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size and p.p_type=part.p_type);
+
+-- subquery within from 
+explain select p_partkey from 
+	(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
+select p_partkey from 
+	(select p_size, p_partkey from part where p_name in (select p.p_name from part p left outer join part pp on p.p_type = pp.p_type where pp.p_size = part.p_size)) subq;
+
+
+create table tempty(i int);
+create table tnull(i int);
+insert into tnull values(NULL) , (NULL);
+
+-- empty inner table, non-null sq key, expected empty result
+select * from part where p_size IN (select i from tempty);
+
+-- empty inner table, null sq key, expected empty result
+select * from tnull where i IN (select i from tempty);
+
+-- null inner table, non-null sq key
+select * from part where p_size IN (select i from tnull);
+
+-- null inner table, null sq key
+select * from tnull where i IN (select i from tnull);
+
+drop table tempty;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/382dc208/ql/src/test/queries/clientpositive/subquery_notin.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_notin.q b/ql/src/test/queries/clientpositive/subquery_notin.q
index 3f4fb7f..c29e63f 100644
--- a/ql/src/test/queries/clientpositive/subquery_notin.q
+++ b/ql/src/test/queries/clientpositive/subquery_notin.q
@@ -1,4 +1,5 @@
 set hive.mapred.mode=nonstrict;
+
 -- non agg, non corr
 explain
 select * 
@@ -76,10 +77,10 @@ order by p_mfgr, p_size
 ;
 
 -- non agg, non corr, Group By in Parent Query
-select li.l_partkey, count(*) 
-from lineitem li 
-where li.l_linenumber = 1 and 
-  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') 
+select li.l_partkey, count(*)
+from lineitem li
+where li.l_linenumber = 1 and
+  li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR')
 group by li.l_partkey
 ;
 
@@ -103,3 +104,127 @@ from T1_v where T1_v.key not in (select T2_v.key from T2_v);
 
 select * 
 from T1_v where T1_v.key not in (select T2_v.key from T2_v);
+
+--where has multiple conjuction
+explain select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+select * from part where p_brand <> 'Brand#14' AND p_size NOT IN (select min(p_size) from part p where p.p_type = part.p_type group by p_type) AND p_size <> 340;
+
+--lhs contains non-simple expression
+explain select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type);
+select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type);
+
+explain select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type);
+select * from part where (p_partkey*p_size) NOT IN (select min(p_partkey) from part group by p_type);
+
+--lhs contains non-simple expression, corr
+explain select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name);
+select count(*) as c from part as e where p_size + 100 NOT IN (select p_partkey from part where p_name = e.p_name);
+
+-- lhs contains udf expression
+explain select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type);
+select * from part  where floor(p_retailprice) NOT IN (select floor(min(p_retailprice)) from part group by p_type);
+
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_size + 121150 = p.p_partkey );
+
+-- correlated query, multiple correlated variables referring to different outer var
+explain select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+select * from part where p_name NOT IN (select p_name from part p where p.p_size = part.p_size AND part.p_partkey= p.p_partkey );
+
+-- correlated var refers to outer table alias
+explain select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand);
+select p_name from (select p_name, p_type, p_brand as brand from part) fpart where fpart.p_type NOT IN (select p_type from part where part.p_brand = fpart.brand);
+ 
+-- correlated var refers to outer table alias which is an expression 
+explain select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size);
+select p_name from (select p_name, p_type, p_size+1 as size from part) fpart where fpart.p_type NOT IN (select p_type from part where (part.p_size+1) = fpart.size);
+
+-- where plus having
+explain select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value NOT IN (select value from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- where with having, correlated
+explain select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+select key, count(*) from src where value NOT IN (select value from src sc where sc.key = src.key ) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key );
+
+-- subquery with order by
+explain select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand;
+select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand;
+
+--order by with limit
+explain select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+select * from part  where (p_size-1) NOT IN (select min(p_size) from part group by p_type) order by p_brand limit 4;
+
+-- union, uncorr
+explain select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part);
+select * from src where key NOT IN (select p_name from part UNION ALL select p_brand from part);
+
+explain select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand);
+select count(*) as c from part as e where p_size + 100 not in ( select p_type from part where p_brand = e.p_brand);
+
+--nullability tests
+CREATE TABLE t1 (c1 INT, c2 CHAR(100));
+INSERT INTO t1 VALUES (null,null), (1,''), (2,'abcde'), (100,'abcdefghij');
+
+CREATE TABLE t2 (c1 INT);
+INSERT INTO t2 VALUES (null), (2), (100);
+
+-- uncorr
+explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2);
+SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2);
+
+-- corr
+explain SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c2=t2.c1);
+SELECT c1 FROM t1 WHERE c1 NOT IN (SELECT c1 FROM t2 where t1.c1=t2.c1);
+
+DROP TABLE t1;
+DROP TABLE t2;
+
+-- corr, nullability, should not produce any result
+create table t1(a int, b int);
+insert into t1 values(1,0), (1,0),(1,0);
+
+create table t2(a int, b int);
+insert into t2 values(2,1), (3,1), (NULL,1);
+
+explain select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a);
+select t1.a from t1 where t1.b NOT IN (select t2.a from t2 where t2.b=t1.a);
+drop table t1;
+drop table t2;
+
+
+-- coor, nullability, should produce result
+create table t7(i int, j int);
+insert into t7 values(null, 5), (4, 15);
+
+create table fixOb(i int, j int);
+insert into fixOb values(-1, 5), (-1, 15);
+
+explain select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j);
+select * from fixOb where j NOT IN (select i from t7 where t7.j=fixOb.j);
+
+drop table t7;
+drop table fixOb;
+
+create table t(i int, j int);
+insert into t values(1,2), (4,5), (7, NULL);
+
+
+-- case with empty inner result (t1.j=t.j=NULL) and null subquery key(t.j = NULL)
+explain select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j);
+select t.i from t where t.j NOT IN (select t1.i from t t1 where t1.j=t.j);
+
+-- case with empty inner result (t1.j=t.j=NULL) and non-null subquery key(t.i is never null)
+explain select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j);
+select t.i from t where t.i NOT IN (select t1.i from t t1 where t1.j=t.j);
+
+-- case with non-empty inner result and null subquery key(t.j is null)
+explain select t.i from t where t.j NOT IN (select t1.i from t t1 );
+select t.i from t where t.j NOT IN (select t1.i from t t1 );
+
+-- case with non-empty inner result and non-null subquery key(t.i is never null)
+explain select t.i from t where t.i NOT IN (select t1.i from t t1 );
+select t.i from t where t.i NOT IN (select t1.i from t t1 );
+
+drop table t1;
+


Mime
View raw message