hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject [2/2] hive git commit: HIVE-16091 : Support subqueries in project/select (Vineet Garg via Ashutosh Chauhan)
Date Wed, 15 Mar 2017 18:54:03 GMT
HIVE-16091 : Support subqueries in project/select (Vineet Garg via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <hashutosh@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/6955cf71
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/6955cf71
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/6955cf71

Branch: refs/heads/master
Commit: 6955cf7130cb54d0a5a3d1a33216f09a8321769d
Parents: a086d6c
Author: Vineet Garg <vgarg@hortonworks.com>
Authored: Wed Mar 15 11:53:21 2017 -0700
Committer: Ashutosh Chauhan <hashutosh@apache.org>
Committed: Wed Mar 15 11:53:21 2017 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |    1 +
 .../calcite/reloperators/HiveFilter.java        |    6 +-
 .../calcite/rules/HiveSubQueryRemoveRule.java   |   36 +-
 .../hadoop/hive/ql/parse/CalcitePlanner.java    |  200 +-
 .../hadoop/hive/ql/parse/SubQueryUtils.java     |   47 +
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java  |    6 +-
 .../queries/clientnegative/subquery_in_select.q |    6 -
 .../clientnegative/subquery_select_aggregate.q  |    2 +
 .../subquery_select_complex_expr.q              |    3 +
 .../clientnegative/subquery_select_distinct.q   |    2 +
 .../clientnegative/subquery_select_distinct2.q  |    2 +
 .../clientnegative/subquery_select_udf.q        |    2 +
 .../queries/clientpositive/subquery_select.q    |  133 +
 .../clientnegative/subquery_in_select.q.out     |    2 +-
 .../subquery_select_aggregate.q.out             |    1 +
 .../subquery_select_distinct.q.out              |    1 +
 .../subquery_select_distinct2.q.out             |    1 +
 .../clientnegative/subquery_select_udf.q.out    |    1 +
 .../clientpositive/llap/subquery_select.q.out   | 2942 ++++++++++++++++++
 19 files changed, 3289 insertions(+), 105 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index e445d3b..0c590c8 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -572,6 +572,7 @@ minillaplocal.query.files=acid_globallimit.q,\
   stats_based_fetch_decision.q,\
   subquery_notin.q,\
   subquery_nested_subquery.q, \
+  subquery_select.q, \
   subquery_shared_alias.q, \
   subquery_null_agg.q,\
   table_access_keys_stats.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
index b7b16b8..f19241e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveFilter.java
@@ -49,7 +49,7 @@ public class HiveFilter extends Filter implements HiveRelNode {
   public void implement(Implementor implementor) {
   }
 
-  private void findCorrelatedVar(RexNode node, Set<CorrelationId> allVars) {
+  private static void findCorrelatedVar(RexNode node, Set<CorrelationId> allVars) {
     if(node instanceof RexCall) {
       RexCall nd = (RexCall)node;
       for (RexNode rn : nd.getOperands()) {
@@ -67,7 +67,7 @@ public class HiveFilter extends Filter implements HiveRelNode {
 
   //traverse the given node to find all correlated variables
   // Note that correlated variables are supported in Filter only i.e. Where & Having
-  private void traverseFilter(RexNode node, Set<CorrelationId> allVars) {
+  private static void traverseFilter(RexNode node, Set<CorrelationId> allVars) {
       if(node instanceof RexSubQuery) {
           //we expect correlated variables in HiveFilter only for now.
           // Also check for case where operator has 0 inputs .e.g TableScan
@@ -103,7 +103,7 @@ public class HiveFilter extends Filter implements HiveRelNode {
       return allCorrVars;
   }
 
-  public Set<CorrelationId> getVariablesSet(RexSubQuery e) {
+  public static Set<CorrelationId> getVariablesSet(RexSubQuery e) {
       Set<CorrelationId> allCorrVars = new HashSet<>();
       traverseFilter(e, allCorrVars);
       return allCorrVars;

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index c1768f4..76e0780 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -26,6 +26,7 @@ import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.CorrelationId;
 import org.apache.calcite.rel.core.Filter;
 import org.apache.calcite.rel.core.JoinRelType;
+import org.apache.calcite.rel.core.Project;
 import org.apache.calcite.rex.LogicVisitor;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexNode;
@@ -44,6 +45,7 @@ import org.apache.calcite.tools.RelBuilderFactory;
 import org.apache.calcite.util.Pair;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -70,6 +72,32 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
  */
 public abstract class HiveSubQueryRemoveRule extends RelOptRule{
 
+    public static final HiveSubQueryRemoveRule PROJECT =
+            new HiveSubQueryRemoveRule(
+                    operand(Project.class, null, RexUtil.SubQueryFinder.PROJECT_PREDICATE,
+                            any()),
+                    HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Project") {
+                public void onMatch(RelOptRuleCall call) {
+                    final Project project = call.rel(0);
+                    //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite
1.11.0 is released
+                    final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null,
call.rel(0).getCluster(), null);
+                    final RexSubQuery e =
+                            RexUtil.SubQueryFinder.find(project.getProjects());
+                    assert e != null;
+                    final RelOptUtil.Logic logic =
+                            LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN,
+                                    project.getProjects(), e);
+                    builder.push(project.getInput());
+                    final int fieldCount = builder.peek().getRowType().getFieldCount();
+                    final RexNode target = apply(e, HiveFilter.getVariablesSet(e),
+                            logic, builder, 1, fieldCount, false);
+                    final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+                    builder.project(shuttle.apply(project.getProjects()),
+                            project.getRowType().getFieldNames());
+                    call.transformTo(builder.build());
+                }
+            };
+
     public static final HiveSubQueryRemoveRule FILTER =
             new HiveSubQueryRemoveRule(
                     operand(Filter.class, null, RexUtil.SubQueryFinder.FILTER_PREDICATE,
@@ -94,7 +122,7 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{
                     Set<RelNode> corrScalarQueries = filter.getCluster().getPlanner().getContext().unwrap(Set.class);
                     boolean isCorrScalarQuery = corrScalarQueries.contains(e.rel);
 
-                    final RexNode target = apply(e, ((HiveFilter)filter).getVariablesSet(e),
logic,
+                    final RexNode target = apply(e, HiveFilter.getVariablesSet(e), logic,
                             builder, 1, fieldCount, isCorrScalarQuery);
                     final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
                     builder.filter(shuttle.apply(filter.getCondition()));
@@ -368,16 +396,16 @@ public abstract class HiveSubQueryRemoveRule extends RelOptRule{
                     // we are creating filter here so should not be returning NULL. Not sure
why Calcite return NULL
                     //operands.add(builder.or(keyIsNulls), builder.literal(false));
                 }
-                Boolean b = true;
+                RexNode b = builder.literal(true);
                 switch (logic) {
                     case TRUE_FALSE_UNKNOWN:
-                        b = null;
+                        b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN);
                         // fall through
                     case UNKNOWN_AS_TRUE:
                         operands.add(
                                 builder.call(SqlStdOperatorTable.LESS_THAN,
                                         builder.field("ct", "ck"), builder.field("ct", "c")),
-                                builder.literal(b));
+                                b);
                         break;
                 }
                 operands.add(builder.literal(false));

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index bf0a11b..34a6dd4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1340,7 +1340,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       //Remove subquery
       LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
       calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null,
-              HiveSubQueryRemoveRule.FILTER);
+              HiveSubQueryRemoveRule.FILTER, HiveSubQueryRemoveRule.PROJECT);
       LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan));
 
       calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan);
@@ -2363,7 +2363,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
     }
 
     private void subqueryRestrictionCheck(QB qb, ASTNode searchCond, RelNode srcRel,
-                                         boolean forHavingClause, Map<String, RelNode>
aliasToRel,
+                                         boolean forHavingClause,
                                           Set<ASTNode> corrScalarQueries) throws SemanticException
{
         List<ASTNode> subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond);
 
@@ -2407,11 +2407,6 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
           String havingInputAlias = null;
 
-          if (forHavingClause) {
-            havingInputAlias = "gby_sq" + sqIdx;
-            aliasToRel.put(havingInputAlias, srcRel);
-          }
-
           boolean isCorrScalarWithAgg = subQuery.subqueryRestrictionsCheck(inputRR, forHavingClause,
havingInputAlias);
           if(isCorrScalarWithAgg) {
             corrScalarQueries.add(originalSubQueryAST);
@@ -2419,12 +2414,11 @@ public class CalcitePlanner extends SemanticAnalyzer {
       }
     }
     private boolean genSubQueryRelNode(QB qb, ASTNode node, RelNode srcRel, boolean forHavingClause,
-                                       Map<ASTNode, RelNode> subQueryToRelNode,
-                                       Map<String, RelNode> aliasToRel) throws SemanticException
{
+                                       Map<ASTNode, RelNode> subQueryToRelNode) throws
SemanticException {
 
         Set<ASTNode> corrScalarQueriesWithAgg = new HashSet<ASTNode>();
         //disallow subqueries which HIVE doesn't currently support
-        subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, aliasToRel, corrScalarQueriesWithAgg);
+        subqueryRestrictionCheck(qb, node, srcRel, forHavingClause, corrScalarQueriesWithAgg);
         Deque<ASTNode> stack = new ArrayDeque<ASTNode>();
         stack.push(node);
 
@@ -2474,7 +2468,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
       boolean isSubQuery = genSubQueryRelNode(qb, searchCond, srcRel, forHavingClause,
-                                                subQueryToRelNode, aliasToRel);
+                                                subQueryToRelNode);
       if(isSubQuery) {
         ExprNodeDesc subQueryExpr = genExprNodeDesc(searchCond, relToHiveRR.get(srcRel),
                 outerRR, subQueryToRelNode, forHavingClause);
@@ -2821,11 +2815,12 @@ public class CalcitePlanner extends SemanticAnalyzer {
       // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it
will match
       // SEL%SEL% rule.
       ASTNode selExprList = qb.getParseInfo().getSelForClause(detsClauseName);
+      SubQueryUtils.checkForTopLevelSubqueries(selExprList);
       if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI
           && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount()
== 1) {
         ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0);
         if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
-          srcRel = genSelectLogicalPlan(qb, srcRel, srcRel);
+          srcRel = genSelectLogicalPlan(qb, srcRel, srcRel, null,null);
           RowResolver rr = this.relToHiveRR.get(srcRel);
           qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr));
         }
@@ -3452,7 +3447,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
      *
      * @throws SemanticException
      */
-    private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel)
+    private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel,
+                                         ImmutableMap<String, Integer> outerNameToPosMap,
RowResolver outerRR)
         throws SemanticException {
       // 0. Generate a Select Node for Windowing
       // Exclude the newly-generated select columns from */etc. resolution.
@@ -3467,6 +3463,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       String selClauseName = qbp.getClauseNames().iterator().next();
       ASTNode selExprList = qbp.getSelForClause(selClauseName);
 
+      // make sure if there is subquery it is top level expression
+      SubQueryUtils.checkForTopLevelSubqueries(selExprList);
+
       final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty()
               || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty());
 
@@ -3599,98 +3598,119 @@ public class CalcitePlanner extends SemanticAnalyzer {
           // 6.3 Get rid of TOK_SELEXPR
           expr = (ASTNode) child.getChild(0);
           String[] colRef = SemanticAnalyzer.getColAlias(child, getAutogenColAliasPrfxLbl(),
-              inputRR, autogenColAliasPrfxIncludeFuncName(), i);
+                  inputRR, autogenColAliasPrfxIncludeFuncName(), i);
           tabAlias = colRef[0];
           colAlias = colRef[1];
           if (hasAsClause) {
             unparseTranslator.addIdentifierTranslation((ASTNode) child
-                .getChild(1));
+                    .getChild(1));
           }
         }
 
-        // 6.4 Build ExprNode corresponding to colums
-        if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
-          pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer
-              .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), expr, col_list,
-              excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(), true);
-          selectStar = true;
-        } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
-            && !hasAsClause
-            && !inputRR.getIsExprResolver()
-            && SemanticAnalyzer.isRegex(
-                SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf)) {
-          // In case the expression is a regex COL.
-          // This can only happen without AS clause
-          // We don't allow this for ExprResolver - the Group By case
-          pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()),
-              null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch,
-              qb.getAliases(), true);
-        } else if (expr.getType() == HiveParser.DOT
-            && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
-            && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0)
-                .getChild(0).getText().toLowerCase()))
-            && !hasAsClause
-            && !inputRR.getIsExprResolver()
-            && SemanticAnalyzer.isRegex(
-                SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf)) {
-          // In case the expression is TABLE.COL (col can be regex).
-          // This can only happen without AS clause
-          // We don't allow this for ExprResolver - the Group By case
-          pos = genColListRegex(
-              SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()),
-              SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()
-                  .toLowerCase()), expr, col_list, excludedColumns, inputRR, starRR, pos,
-              out_rwsch, qb.getAliases(), true);
-        } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI)
-            && !(srcRel instanceof HiveAggregate)) {
-          // Likely a malformed query eg, select hash(distinct c1) from t1;
-          throw new CalciteSemanticException("Distinct without an aggregation.",
-              UnsupportedFeature.Distinct_without_an_aggreggation);
-        } else {
-          // Case when this is an expression
-          TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
-          // We allow stateful functions in the SELECT list (but nowhere else)
-          tcCtx.setAllowStatefulFunctions(true);
-          if (!qbp.getDestToGroupBy().isEmpty()) {
-            // Special handling of grouping function
-            expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName), expr,
-                !cubeRollupGrpSetPresent);
-          }
-          ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
-          String recommended = recommendName(exp, colAlias);
-          if (recommended != null && out_rwsch.get(null, recommended) == null) {
-            colAlias = recommended;
-          }
-          col_list.add(exp);
-
-          ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
-              exp.getWritableObjectInspector(), tabAlias, false);
-          colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc)
exp)
-              .isSkewedCol() : false);
-          if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) {
-            throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias + "."
-                + colAlias + " => " + colInfo + " due to duplication, see previous warnings",
-                UnsupportedFeature.Duplicates_in_RR);
-          }
+          Map<ASTNode, RelNode> subQueryToRelNode = new HashMap<>();
+          boolean isSubQuery = genSubQueryRelNode(qb, expr, srcRel, false,
+                  subQueryToRelNode);
+          if(isSubQuery) {
+            ExprNodeDesc subQueryExpr = genExprNodeDesc(expr, relToHiveRR.get(srcRel),
+                    outerRR, subQueryToRelNode, false);
+            col_list.add(subQueryExpr);
+
+            ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
+                    subQueryExpr.getWritableObjectInspector(), tabAlias, false);
+            if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) {
+              throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias +
"."
+                      + colAlias + " => " + colInfo + " due to duplication, see previous
warnings",
+                      UnsupportedFeature.Duplicates_in_RR);
+            }
+          } else {
 
-          if (exp instanceof ExprNodeColumnDesc) {
-            ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
-            String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
-            if (altMapping != null) {
-              // TODO: this can overwrite the mapping. Should this be allowed?
-              out_rwsch.put(altMapping[0], altMapping[1], colInfo);
+            // 6.4 Build ExprNode corresponding to colums
+            if (expr.getType() == HiveParser.TOK_ALLCOLREF) {
+              pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : SemanticAnalyzer
+                              .getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(),
expr, col_list,
+                      excludedColumns, inputRR, starRR, pos, out_rwsch, qb.getAliases(),
true);
+              selectStar = true;
+            } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL
+                    && !hasAsClause
+                    && !inputRR.getIsExprResolver()
+                    && SemanticAnalyzer.isRegex(
+                    SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()), conf))
{
+              // In case the expression is a regex COL.
+              // This can only happen without AS clause
+              // We don't allow this for ExprResolver - the Group By case
+              pos = genColListRegex(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getText()),
+                      null, expr, col_list, excludedColumns, inputRR, starRR, pos, out_rwsch,
+                      qb.getAliases(), true);
+            } else if (expr.getType() == HiveParser.DOT
+                    && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL
+                    && inputRR.hasTableAlias(SemanticAnalyzer.unescapeIdentifier(expr.getChild(0)
+                    .getChild(0).getText().toLowerCase()))
+                    && !hasAsClause
+                    && !inputRR.getIsExprResolver()
+                    && SemanticAnalyzer.isRegex(
+                    SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()), conf))
{
+              // In case the expression is TABLE.COL (col can be regex).
+              // This can only happen without AS clause
+              // We don't allow this for ExprResolver - the Group By case
+              pos = genColListRegex(
+                      SemanticAnalyzer.unescapeIdentifier(expr.getChild(1).getText()),
+                      SemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0).getText()
+                              .toLowerCase()), expr, col_list, excludedColumns, inputRR,
starRR, pos,
+                      out_rwsch, qb.getAliases(), true);
+            } else if (ParseUtils.containsTokenOfType(expr, HiveParser.TOK_FUNCTIONDI)
+                    && !(srcRel instanceof HiveAggregate)) {
+              // Likely a malformed query eg, select hash(distinct c1) from t1;
+              throw new CalciteSemanticException("Distinct without an aggregation.",
+                      UnsupportedFeature.Distinct_without_an_aggreggation);
             }
-          }
+              else {
+              // Case when this is an expression
+              TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
+              // We allow stateful functions in the SELECT list (but nowhere else)
+              tcCtx.setAllowStatefulFunctions(true);
+              if (!qbp.getDestToGroupBy().isEmpty()) {
+                // Special handling of grouping function
+                expr = rewriteGroupingFunctionAST(getGroupByForClause(qbp, selClauseName),
expr,
+                        !cubeRollupGrpSetPresent);
+              }
+              ExprNodeDesc exp = genExprNodeDesc(expr, inputRR, tcCtx);
+              String recommended = recommendName(exp, colAlias);
+              if (recommended != null && out_rwsch.get(null, recommended) == null)
{
+                colAlias = recommended;
+              }
+              col_list.add(exp);
+
+              ColumnInfo colInfo = new ColumnInfo(SemanticAnalyzer.getColumnInternalName(pos),
+                      exp.getWritableObjectInspector(), tabAlias, false);
+              colInfo.setSkewedCol((exp instanceof ExprNodeColumnDesc) ? ((ExprNodeColumnDesc)
exp)
+                      .isSkewedCol() : false);
+              if (!out_rwsch.putWithCheck(tabAlias, colAlias, null, colInfo)) {
+                throw new CalciteSemanticException("Cannot add column to RR: " + tabAlias
+ "."
+                        + colAlias + " => " + colInfo + " due to duplication, see previous
warnings",
+                        UnsupportedFeature.Duplicates_in_RR);
+              }
 
-          pos = Integer.valueOf(pos.intValue() + 1);
-        }
+              if (exp instanceof ExprNodeColumnDesc) {
+                ExprNodeColumnDesc colExp = (ExprNodeColumnDesc) exp;
+                String[] altMapping = inputRR.getAlternateMappings(colExp.getColumn());
+                if (altMapping != null) {
+                  // TODO: this can overwrite the mapping. Should this be allowed?
+                  out_rwsch.put(altMapping[0], altMapping[1], colInfo);
+                }
+              }
+
+              pos = Integer.valueOf(pos.intValue() + 1);
+            }
+          }
       }
       selectStar = selectStar && exprList.getChildCount() == posn + 1;
 
       // 7. Convert Hive projections to Calcite
       List<RexNode> calciteColLst = new ArrayList<RexNode>();
+
       RexNodeConverter rexNodeConv = new RexNodeConverter(cluster, srcRel.getRowType(),
-          buildHiveColNameToInputPosMap(col_list, inputRR), 0, false);
+              outerNameToPosMap, buildHiveColNameToInputPosMap(col_list, inputRR), relToHiveRR.get(srcRel),
+              outerRR, 0, false, subqueryId);
       for (ExprNodeDesc colExpr : col_list) {
         calciteColLst.add(rexNodeConv.convert(colExpr));
       }
@@ -3946,7 +3966,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
       srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel;
 
       // 5. Build Rel for Select Clause
-      selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel);
+      selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel, outerNameToPosMap, outerRR);
       srcRel = (selectRel == null) ? srcRel : selectRel;
 
       // 6. Build Rel for OB Clause

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
index f0165dd..0ec24b5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
 import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryTypeDef;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver;
+import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException;
 
 public class SubQueryUtils {
 
@@ -651,6 +652,52 @@ public class SubQueryUtils {
     
     return eq;
   }
+
+  static void checkForSubqueries(ASTNode node) throws SemanticException {
+    // allow NOT but throw an error for rest
+    if(node.getType() == HiveParser.TOK_SUBQUERY_EXPR
+            && node.getParent().getType() != HiveParser.KW_NOT) {
+      throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+              "Invalid subquery. Subquery in SELECT could only be top-level expression"));
+    }
+    for(int i=0; i<node.getChildCount(); i++) {
+        checkForSubqueries((ASTNode)node.getChild(i));
+    }
+  }
+  /*
+   * Given a TOK_SELECT this checks IF there is a subquery
+   *  it is top level expression, else it throws an error
+   */
+  public static void checkForTopLevelSubqueries(ASTNode selExprList) throws SemanticException{
+   // should be either SELECT or SELECT DISTINCT
+    assert(selExprList.getType() == HiveParser.TOK_SELECT
+            || selExprList.getType() == HiveParser.TOK_SELECTDI);
+    for(int i=0; i<selExprList.getChildCount(); i++) {
+      ASTNode selExpr = (ASTNode)selExprList.getChild(i);
+      // could get either query hint or select expr
+      assert(selExpr.getType() == HiveParser.TOK_SELEXPR
+        || selExpr.getType() == HiveParser.QUERY_HINT);
+
+      if(selExpr.getType() == HiveParser.QUERY_HINT) {
+        // skip query hints
+        continue;
+      }
+
+      if(selExpr.getChildCount() == 1
+        && selExpr.getChild(0).getType() == HiveParser.TOK_SUBQUERY_EXPR) {
+        if(selExprList.getType() == HiveParser.TOK_SELECTDI) {
+          throw new CalciteSubquerySemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(
+                  "Invalid subquery. Subquery with DISTINCT clause is not supported!"));
+
+        }
+        continue; //we are good since subquery is top level expression
+      }
+      // otherwise we need to make sure that there is no subquery at any level
+      for(int j=0; j<selExpr.getChildCount(); j++) {
+        checkForSubqueries((ASTNode) selExpr.getChild(j));
+      }
+    }
+  }
   
   public static interface ISubQueryJoinInfo {
     public String getAlias();

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 6c10704..fac60c1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -504,7 +504,11 @@ public class ExprNodeDescUtils {
 		} else if (exprDesc instanceof ExprNodeFieldDesc) {
 			getExprNodeColumnDesc(((ExprNodeFieldDesc) exprDesc).getDesc(),
 					hashCodeToColumnDescMap);
-		}
+		} else if( exprDesc instanceof  ExprNodeSubQueryDesc) {
+		    getExprNodeColumnDesc(((ExprNodeSubQueryDesc) exprDesc).getSubQueryLhs(),
+                    hashCodeToColumnDescMap);
+        }
+
 	}
 
   public static boolean isConstant(ExprNodeDesc value) {

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_in_select.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_in_select.q b/ql/src/test/queries/clientnegative/subquery_in_select.q
deleted file mode 100644
index 1365389..0000000
--- a/ql/src/test/queries/clientnegative/subquery_in_select.q
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-
-select src.key in (select key from src s1 where s1.key > '9') 
-from src
-;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_aggregate.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_select_aggregate.q b/ql/src/test/queries/clientnegative/subquery_select_aggregate.q
new file mode 100644
index 0000000..8da8521
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_select_aggregate.q
@@ -0,0 +1,2 @@
+-- subqueries in UDFs are not allowed
+explain SELECT count((SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type)) from
part;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q b/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q
new file mode 100644
index 0000000..8e6cf66
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_select_complex_expr.q
@@ -0,0 +1,3 @@
+
+-- since subquery is not top level expression this should throw an error
+explain SELECT p_size, 1+(SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) from
part;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_distinct.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_select_distinct.q b/ql/src/test/queries/clientnegative/subquery_select_distinct.q
new file mode 100644
index 0000000..5682ae7
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_select_distinct.q
@@ -0,0 +1,2 @@
+-- subqueries in UDFs are not allowed
+explain SELECT distinct (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type) from
part;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_distinct2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_select_distinct2.q b/ql/src/test/queries/clientnegative/subquery_select_distinct2.q
new file mode 100644
index 0000000..90c0588
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_select_distinct2.q
@@ -0,0 +1,2 @@
+-- subqueries in UDFs are not allowed
+explain SELECT distinct p_size, (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type)
from part;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientnegative/subquery_select_udf.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/subquery_select_udf.q b/ql/src/test/queries/clientnegative/subquery_select_udf.q
new file mode 100644
index 0000000..a52a5a9
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/subquery_select_udf.q
@@ -0,0 +1,2 @@
+-- subqueries in UDFs are not allowed
+explain SELECT p_size, exp((SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type))
from part;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/queries/clientpositive/subquery_select.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/subquery_select.q b/ql/src/test/queries/clientpositive/subquery_select.q
new file mode 100644
index 0000000..cb7d7aa
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/subquery_select.q
@@ -0,0 +1,133 @@
+-- following tests test queries in SELECT
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+
+-- IN, non corr
+explain SELECT p_size, p_size IN (
+        SELECT MAX(p_size) FROM part)
+FROM part;
+
+SELECT p_size, p_size IN (
+        SELECT MAX(p_size) FROM part)
+FROM part ;
+
+-- IN, corr
+EXPLAIN SELECT p_size, p_size IN (
+        SELECT MAX(p_size) FROM part p where p.p_type = part.p_type)
+FROM part;
+
+SELECT p_size, p_size IN (
+        SELECT MAX(p_size) FROM part p where p.p_type = part.p_type)
+FROM part;
+
+-- NOT IN, non corr
+explain SELECT p_size, p_size NOT IN (
+        SELECT MAX(p_size) FROM part)
+FROM part;
+
+SELECT p_size, p_size NOT IN (
+        SELECT MAX(p_size) FROM part)
+FROM part ;
+
+-- NOT IN, corr
+EXPLAIN SELECT p_size, p_size NOT IN (
+        SELECT MAX(p_size) FROM part p where p.p_type = part.p_type)
+FROM part;
+
+SELECT p_size, p_size NOT IN (
+        SELECT MAX(p_size) FROM part p where p.p_type = part.p_type)
+FROM part;
+
+-- EXISTS, non corr
+explain SELECT p_size, EXISTS(SELECT p_size FROM part)
+FROM part;
+
+SELECT p_size, EXISTS(SELECT p_size FROM part)
+FROM part;
+
+-- EXISTS, corr
+explain SELECT p_size, EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type)
+FROM part;
+
+SELECT p_size, EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type)
+FROM part;
+
+-- NOT EXISTS, non corr
+explain SELECT p_size, NOT EXISTS(SELECT p_size FROM part)
+FROM part;
+
+SELECT p_size, NOT EXISTS(SELECT p_size FROM part)
+FROM part;
+
+-- NOT EXISTS, corr
+explain SELECT p_size, NOT EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type)
+FROM part;
+
+SELECT p_size, NOT EXISTS(SELECT p_size FROM part pp where pp.p_type = part.p_type)
+FROM part;
+
+-- SCALAR, corr
+explain SELECT p_size, (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type)
+FROM part;
+
+SELECT p_size, (SELECT max(p_size) FROM part p WHERE p.p_type = part.p_type)
+FROM part;
+
+-- SCALAR, non corr
+explain SELECT p_size, (SELECT max(p_size) FROM part)
+    FROM part;
+
+SELECT p_size, (SELECT max(p_size) FROM part)
+    FROM part;
+
+-- IN, corr with scalar
+explain
+select *
+from src b
+where b.key in
+        (select (select max(key) from src)
+         from src a
+         where b.value = a.value and a.key > '9'
+        );
+select *
+from src b
+where b.key in
+        (select (select max(key) from src)
+         from src a
+         where b.value = a.value and a.key > '9'
+        );
+
+-- corr within corr..correcionnn..
+explain
+select *
+from src b
+where b.key in
+        (select (select max(key) from src sc where sc.value = a.value)
+         from src a
+         where b.value = a.value and a.key > '9'
+        );
+
+select *
+from src b
+where b.key in
+        (select (select max(key) from src sc where sc.value = a.value)
+         from src a
+         where b.value = a.value and a.key > '9' );
+
+CREATE table tnull(i int);
+insert into tnull values(null);
+
+-- IN query returns unknown/NULL instead of true/false
+explain select p_size, p_size IN (select i from tnull) from part;
+select p_size, p_size IN (select i from tnull) from part;
+
+CREATE TABLE tempty(i int);
+
+explain select p_size, (select count(*) from tempty) from part;
+select p_size, (select count(*) from tempty) from part;
+
+explain select p_size, (select max(i) from tempty) from part;
+select p_size, (select max(i) from tempty) from part;
+
+DROP table tempty;
+DROP table tnull;

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_in_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_in_select.q.out b/ql/src/test/results/clientnegative/subquery_in_select.q.out
index 5c69690..a8e1b99 100644
--- a/ql/src/test/results/clientnegative/subquery_in_select.q.out
+++ b/ql/src/test/results/clientnegative/subquery_in_select.q.out
@@ -1 +1 @@
-FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:
Unsupported SubQuery Expression  Currently SubQuery expressions are only allowed as Where
and Having Clause predicates
+FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:
Unsupported SubQuery Expression Invalid subquery. Subquery in SELECT could only be top-level
expression

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out b/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out
new file mode 100644
index 0000000..a8e1b99
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_select_aggregate.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:
Unsupported SubQuery Expression Invalid subquery. Subquery in SELECT could only be top-level
expression

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_distinct.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_select_distinct.q.out b/ql/src/test/results/clientnegative/subquery_select_distinct.q.out
new file mode 100644
index 0000000..51c1212
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_select_distinct.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:
Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported!

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out b/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out
new file mode 100644
index 0000000..51c1212
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_select_distinct2.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:
Unsupported SubQuery Expression Invalid subquery. Subquery with DISTINCT clause is not supported!

http://git-wip-us.apache.org/repos/asf/hive/blob/6955cf71/ql/src/test/results/clientnegative/subquery_select_udf.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/subquery_select_udf.q.out b/ql/src/test/results/clientnegative/subquery_select_udf.q.out
new file mode 100644
index 0000000..a8e1b99
--- /dev/null
+++ b/ql/src/test/results/clientnegative/subquery_select_udf.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:
Unsupported SubQuery Expression Invalid subquery. Subquery in SELECT could only be top-level
expression


Mime
View raw message