impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tarmstr...@apache.org
Subject [2/6] incubator-impala git commit: IMPALA-4336: Cast exprs after unnesting union operands.
Date Thu, 03 Nov 2016 20:36:01 GMT
IMPALA-4336: Cast exprs after unnesting union operands.

The bug was that we cast the result exprs of operands before
unnesting them. If we unnested an operands, casts were missing
on those unnested operands' result exprs.

The fix is to first unnest operands and then cast result exprs.

Also clarifies the use of resultExprs vs. baseTblResultExprs.

Change-Id: I5e3ab7349df7d67d0d9c2baf4a56342d3f04e76d
Reviewed-on: http://gerrit.cloudera.org:8080/4815
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/795c085f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/795c085f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/795c085f

Branch: refs/heads/master
Commit: 795c085fa38980085e830ce46ee8ccf298849dcb
Parents: 2990696
Author: Alex Behm <alex.behm@cloudera.com>
Authored: Fri Oct 21 18:58:36 2016 -0700
Committer: Internal Jenkins <cloudera-hudson@gerrit.cloudera.org>
Committed: Thu Nov 3 08:59:45 2016 +0000

----------------------------------------------------------------------
 .../java/org/apache/impala/analysis/Expr.java   |  21 ++-
 .../org/apache/impala/analysis/UnionStmt.java   | 171 ++++++++++---------
 .../impala/planner/SingleNodePlanner.java       |   4 +-
 .../org/apache/impala/planner/UnionNode.java    |  17 +-
 .../queries/QueryTest/union.test                |  28 +++
 5 files changed, 137 insertions(+), 104 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/795c085f/fe/src/main/java/org/apache/impala/analysis/Expr.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java
index 23ae65f..fd7f86f 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Expr.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java
@@ -1110,15 +1110,20 @@ abstract public class Expr extends TreeNode<Expr> implements
ParseNode, Cloneabl
    * Otherwise returns null.
    */
   public SlotRef unwrapSlotRef(boolean implicitOnly) {
-    if (this instanceof SlotRef) {
-      return (SlotRef) this;
-    } else if (this instanceof CastExpr
-        && (!implicitOnly || ((CastExpr) this).isImplicit())
-        && getChild(0) instanceof SlotRef) {
-      return (SlotRef) getChild(0);
-    } else {
-      return null;
+    Expr unwrappedExpr = unwrapExpr(implicitOnly);
+    if (unwrappedExpr instanceof SlotRef) return (SlotRef) unwrappedExpr;
+    return null;
+  }
+
+  /**
+   * Returns the first child if this Expr is a CastExpr. Otherwise, returns 'this'.
+   */
+  public Expr unwrapExpr(boolean implicitOnly) {
+    if (this instanceof CastExpr
+        && (!implicitOnly || ((CastExpr) this).isImplicit())) {
+      return children_.get(0);
     }
+    return this;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/795c085f/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java b/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
index 4f65e0f..fffb72c 100644
--- a/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/UnionStmt.java
@@ -20,21 +20,27 @@ package org.apache.impala.analysis;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.impala.catalog.ColumnStats;
+import org.apache.impala.common.AnalysisException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.impala.catalog.ColumnStats;
-import org.apache.impala.common.AnalysisException;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
 /**
  * Representation of a union with its list of operands, and optional order by and limit.
- * A union materializes its results, and its resultExprs are slotrefs into the
+ * A union materializes its results, and its resultExprs are SlotRefs into a new
  * materialized tuple.
  * During analysis, the operands are normalized (separated into a single sequence of
  * DISTINCT followed by a single sequence of ALL operands) and unnested to the extent
  * possible. This also creates the AggregationInfo for DISTINCT operands.
+ *
+ * Use of resultExprs vs. baseTblResultExprs:
+ * We consistently use/cast the resultExprs of union operands because the final expr
+ * substitution happens during planning. The only place where baseTblResultExprs are
+ * used is in materializeRequiredSlots() because that is called before plan generation
+ * and we need to mark the slots of resolved exprs as materialized.
  */
 public class UnionStmt extends QueryStmt {
   private final static Logger LOG = LoggerFactory.getLogger(UnionStmt.class);
@@ -198,10 +204,6 @@ public class UnionStmt extends QueryStmt {
     allOperands_.clear();
   }
 
-  /**
-   * Propagates DISTINCT from left to right, and checks that all
-   * union operands are union compatible, adding implicit casts if necessary.
-   */
   @Override
   public void analyze(Analyzer analyzer) throws AnalysisException {
     if (isAnalyzed()) return;
@@ -212,43 +214,21 @@ public class UnionStmt extends QueryStmt {
     }
     Preconditions.checkState(operands_.size() > 0);
 
-    // Propagates DISTINCT from right to left
+    // Propagates DISTINCT from right to left.
     propagateDistinct();
 
-    // Make sure all operands return an equal number of exprs.
-    QueryStmt firstQuery = operands_.get(0).getQueryStmt();
-
-    try {
-      operands_.get(0).analyze(analyzer);
-    } catch (AnalysisException e) {
-      if (analyzer.getMissingTbls().isEmpty()) throw e;
-    }
+    // Analyze all operands and make sure they return an equal number of exprs.
+    analyzeOperands(analyzer);
 
-    List<List<Expr>> resultExprLists = Lists.newArrayList();
-    List<Expr> firstQueryExprs = firstQuery.getBaseTblResultExprs();
-    resultExprLists.add(firstQueryExprs);
-    for (int i = 1; i < operands_.size(); ++i) {
-      QueryStmt query = operands_.get(i).getQueryStmt();
-      try {
-        operands_.get(i).analyze(analyzer);
-        List<Expr> exprs = query.getBaseTblResultExprs();
-        if (firstQueryExprs.size() != exprs.size()) {
-          throw new AnalysisException("Operands have unequal number of columns:\n" +
-              "'" + queryStmtToSql(firstQuery) + "' has " +
-              firstQueryExprs.size() + " column(s)\n" +
-              "'" + queryStmtToSql(query) + "' has " + exprs.size() + " column(s)");
-        }
-        resultExprLists.add(exprs);
-      } catch (AnalysisException e) {
-        if (analyzer.getMissingTbls().isEmpty()) throw e;
-      }
-    }
+    // Remember the SQL string before unnesting operands.
+    toSqlString_ = toSql();
 
-    if (!analyzer.getMissingTbls().isEmpty()) {
-      throw new AnalysisException("Found missing tables. Aborting analysis.");
-    }
+    // Unnest the operands before casting the result exprs. Unnesting may add
+    // additional entries to operands_ and the result exprs of those unnested
+    // operands must also be cast properly.
+    unnestOperands(analyzer);
 
-    // compute hasAnalyticExprs_
+    // Compute hasAnalyticExprs_
     hasAnalyticExprs_ = false;
     for (UnionOperand op: operands_) {
       if (op.hasAnalyticExprs()) {
@@ -257,20 +237,69 @@ public class UnionStmt extends QueryStmt {
       }
     }
 
+    // Collect all result expr lists and cast the exprs as necessary.
+    List<List<Expr>> resultExprLists = Lists.newArrayList();
+    for (UnionOperand op: operands_) {
+      resultExprLists.add(op.getQueryStmt().getResultExprs());
+    }
     analyzer.castToUnionCompatibleTypes(resultExprLists);
 
-    // Create tuple descriptor materialized by this UnionStmt,
-    // its resultExprs, and its sortInfo if necessary.
+    // Create tuple descriptor materialized by this UnionStmt, its resultExprs, and
+    // its sortInfo if necessary.
     createMetadata(analyzer);
     createSortInfo(analyzer);
-    toSqlString_ = toSql();
 
-    unnestOperands(analyzer);
+    // Create unnested operands' smaps.
+    for (UnionOperand operand: operands_) setOperandSmap(operand, analyzer);
+
+    // Create distinctAggInfo, if necessary.
+    if (!distinctOperands_.isEmpty()) {
+      // Aggregate produces exactly the same tuple as the original union stmt.
+      ArrayList<Expr> groupingExprs = Expr.cloneList(resultExprs_);
+      try {
+        distinctAggInfo_ =
+            AggregateInfo.create(groupingExprs, null,
+              analyzer.getDescTbl().getTupleDesc(tupleId_), analyzer);
+      } catch (AnalysisException e) {
+        // Should never happen.
+        throw new IllegalStateException(
+            "Error creating agg info in UnionStmt.analyze()", e);
+      }
+    }
+
     if (evaluateOrderBy_) createSortTupleInfo(analyzer);
     baseTblResultExprs_ = resultExprs_;
   }
 
   /**
+   * Analyzes all operands and checks that they return an equal number of exprs.
+   * Throws an AnalysisException if that is not the case, or if analyzing
+   * an operand fails.
+   */
+  private void analyzeOperands(Analyzer analyzer) throws AnalysisException {
+    for (int i = 0; i < operands_.size(); ++i) {
+      try {
+        operands_.get(i).analyze(analyzer);
+        QueryStmt firstQuery = operands_.get(0).getQueryStmt();
+        List<Expr> firstExprs = operands_.get(0).getQueryStmt().getResultExprs();
+        QueryStmt query = operands_.get(i).getQueryStmt();
+        List<Expr> exprs = query.getResultExprs();
+        if (firstExprs.size() != exprs.size()) {
+          throw new AnalysisException("Operands have unequal number of columns:\n" +
+              "'" + queryStmtToSql(firstQuery) + "' has " +
+              firstExprs.size() + " column(s)\n" +
+              "'" + queryStmtToSql(query) + "' has " + exprs.size() + " column(s)");
+        }
+      } catch (AnalysisException e) {
+        if (analyzer.getMissingTbls().isEmpty()) throw e;
+      }
+    }
+    if (!analyzer.getMissingTbls().isEmpty()) {
+      throw new AnalysisException("Found missing tables. Aborting analysis.");
+    }
+  }
+
+  /**
    * Marks the baseTblResultExprs of its operands as materialized, based on
    * which of the output slots have been marked.
    * Calls materializeRequiredSlots() on the operands themselves.
@@ -278,17 +307,11 @@ public class UnionStmt extends QueryStmt {
   @Override
   public void materializeRequiredSlots(Analyzer analyzer) {
     TupleDescriptor tupleDesc = analyzer.getDescTbl().getTupleDesc(tupleId_);
-    if (!distinctOperands_.isEmpty()) {
-      // to keep things simple we materialize all grouping exprs = output slots,
-      // regardless of what's being referenced externally
-      for (SlotDescriptor slotDesc: tupleDesc.getSlots()) {
-        slotDesc.setIsMaterialized(true);
-      }
-    }
+    // to keep things simple we materialize all grouping exprs = output slots,
+    // regardless of what's being referenced externally
+    if (!distinctOperands_.isEmpty()) tupleDesc.materializeSlots();
 
-    if (evaluateOrderBy_) {
-      sortInfo_.materializeRequiredSlots(analyzer, null);
-    }
+    if (evaluateOrderBy_) sortInfo_.materializeRequiredSlots(analyzer, null);
 
     // collect operands' result exprs
     List<SlotDescriptor> outputSlots = tupleDesc.getSlots();
@@ -320,7 +343,6 @@ public class UnionStmt extends QueryStmt {
     if (operands_.size() == 1) {
       // ValuesStmt for a single row.
       allOperands_.add(operands_.get(0));
-      setOperandSmap(operands_.get(0), analyzer);
       return;
     }
 
@@ -352,50 +374,33 @@ public class UnionStmt extends QueryStmt {
     operands_.clear();
     operands_.addAll(distinctOperands_);
     operands_.addAll(allOperands_);
-
-    // create unnested operands' smaps
-    for (UnionOperand operand: operands_) {
-      setOperandSmap(operand, analyzer);
-    }
-
-    // create distinctAggInfo, if necessary
-    if (!distinctOperands_.isEmpty()) {
-      // Aggregate produces exactly the same tuple as the original union stmt.
-      ArrayList<Expr> groupingExprs = Expr.cloneList(resultExprs_);
-      try {
-        distinctAggInfo_ =
-            AggregateInfo.create(groupingExprs, null,
-              analyzer.getDescTbl().getTupleDesc(tupleId_), analyzer);
-      } catch (AnalysisException e) {
-        // this should never happen
-        throw new AnalysisException("error creating agg info in UnionStmt.analyze()");
-      }
-    }
   }
 
   /**
    * Sets the smap for the given operand. It maps from the output slots this union's
-   * tuple to the corresponding base table exprs of the operand.
+   * tuple to the corresponding result exprs of the operand.
    */
   private void setOperandSmap(UnionOperand operand, Analyzer analyzer) {
     TupleDescriptor tupleDesc = analyzer.getDescTbl().getTupleDesc(tupleId_);
     // operands' smaps were already set in the operands' analyze()
     operand.getSmap().clear();
+    List<Expr> resultExprs = operand.getQueryStmt().getResultExprs();
+    Preconditions.checkState(resultExprs.size() == tupleDesc.getSlots().size());
     for (int i = 0; i < tupleDesc.getSlots().size(); ++i) {
       SlotDescriptor outputSlot = tupleDesc.getSlots().get(i);
-      operand.getSmap().put(
-          new SlotRef(outputSlot),
-          // TODO: baseTblResultExprs?
-          operand.getQueryStmt().getResultExprs().get(i).clone());
+      // Map to the original (uncast) result expr of the operand.
+      Expr origExpr = resultExprs.get(i).unwrapExpr(true).clone();
+      operand.getSmap().put(new SlotRef(outputSlot), origExpr);
     }
   }
 
   /**
-   * Add a single operand to the target list; if the operand itself is a UnionStmt,
-   * apply unnesting to the extent possible (possibly modifying 'operand' in the process).
+   * Add a single operand to the target list; if the operand itself is a UnionStmt, apply
+   * unnesting to the extent possible (possibly modifying 'operand' in the process).
    */
   private void unnestOperand(
       List<UnionOperand> target, Qualifier targetQualifier, UnionOperand operand) {
+    Preconditions.checkState(operand.isAnalyzed());
     QueryStmt queryStmt = operand.getQueryStmt();
     if (queryStmt instanceof SelectStmt) {
       target.add(operand);
@@ -462,12 +467,12 @@ public class UnionStmt extends QueryStmt {
     LOG.trace("UnionStmt.createMetadata: tupleId=" + tupleId_.toString());
 
     // One slot per expr in the select blocks. Use first select block as representative.
-    List<Expr> firstSelectExprs = operands_.get(0).getQueryStmt().getBaseTblResultExprs();
+    List<Expr> firstSelectExprs = operands_.get(0).getQueryStmt().getResultExprs();
 
     // Compute column stats for the materialized slots from the source exprs.
     List<ColumnStats> columnStats = Lists.newArrayList();
     for (int i = 0; i < operands_.size(); ++i) {
-      List<Expr> selectExprs = operands_.get(i).getQueryStmt().getBaseTblResultExprs();
+      List<Expr> selectExprs = operands_.get(i).getQueryStmt().getResultExprs();
       for (int j = 0; j < selectExprs.size(); ++j) {
         ColumnStats statsToAdd = ColumnStats.fromExpr(selectExprs.get(j));
         if (i == 0) {
@@ -503,7 +508,7 @@ public class UnionStmt extends QueryStmt {
       // don't do that if the operand computes analytic exprs
       // (see Planner.createInlineViewPlan() for the reasoning)
       for (UnionOperand op: operands_) {
-        Expr resultExpr = op.getQueryStmt().getBaseTblResultExprs().get(i);
+        Expr resultExpr = op.getQueryStmt().getResultExprs().get(i);
         slotDesc.addSourceExpr(resultExpr);
         if (op.hasAnalyticExprs()) continue;
         SlotRef slotRef = resultExpr.unwrapSlotRef(true);

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/795c085f/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 434e36d..9b0e376 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -1523,7 +1523,7 @@ public class SingleNodePlanner {
       if (queryStmt instanceof SelectStmt) {
         SelectStmt selectStmt = (SelectStmt) queryStmt;
         if (selectStmt.getTableRefs().isEmpty()) {
-          unionNode.addConstExprList(selectStmt.getBaseTblResultExprs());
+          unionNode.addConstExprList(selectStmt.getResultExprs());
           continue;
         }
       }
@@ -1532,7 +1532,7 @@ public class SingleNodePlanner {
       // Place them into a SelectNode on top of the operand's plan.
       opPlan = addUnassignedConjuncts(analyzer, opPlan.getTupleIds(), opPlan);
       if (opPlan instanceof EmptySetNode) continue;
-      unionNode.addChild(opPlan, op.getQueryStmt().getBaseTblResultExprs());
+      unionNode.addChild(opPlan, op.getQueryStmt().getResultExprs());
     }
 
     if (unionDistinctPlan != null) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/795c085f/fe/src/main/java/org/apache/impala/planner/UnionNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/UnionNode.java b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
index a451ba0..a085973 100644
--- a/fe/src/main/java/org/apache/impala/planner/UnionNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/UnionNode.java
@@ -22,9 +22,6 @@ import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.SlotDescriptor;
@@ -35,6 +32,9 @@ import org.apache.impala.thrift.TExpr;
 import org.apache.impala.thrift.TPlanNode;
 import org.apache.impala.thrift.TPlanNodeType;
 import org.apache.impala.thrift.TUnionNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
@@ -74,16 +74,11 @@ public class UnionNode extends PlanNode {
   public boolean isConstantUnion() { return resultExprLists_.isEmpty(); }
 
   /**
-   * Add a child tree plus its corresponding resolved resultExprs.
+   * Add a child tree plus its corresponding unresolved resultExprs.
    */
-  public void addChild(PlanNode node, List<Expr> baseTblResultExprs) {
+  public void addChild(PlanNode node, List<Expr> resultExprs) {
     super.addChild(node);
-    resultExprLists_.add(baseTblResultExprs);
-    if (baseTblResultExprs != null) {
-      // if we're materializing output, we can only do that into a single
-      // output tuple
-      Preconditions.checkState(tupleIds_.size() == 1, tupleIds_.size());
-    }
+    resultExprLists_.add(resultExprs);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/795c085f/testdata/workloads/functional-query/queries/QueryTest/union.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/union.test b/testdata/workloads/functional-query/queries/QueryTest/union.test
index 9212af3..3ccecd9 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/union.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/union.test
@@ -1009,6 +1009,34 @@ bigint
 1
 ====
 ---- QUERY
+# IMPALA-4336: Test proper result expr casting when unnesting union operands.
+select double_col from alltypestiny union all (select 80 union all select 90)
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+0
+0
+0
+0
+10.1
+10.1
+10.1
+10.1
+80
+90
+---- TYPES
+DOUBLE
+=====
+---- QUERY
+# IMPALA-4336: Test proper result expr casting when unnesting union operands.
+select double_col from alltypestiny union distinct (select 80 union all select 90)
+---- RESULTS: VERIFY_IS_EQUAL_SORTED
+0
+10.1
+80
+90
+---- TYPES
+DOUBLE
+=====
+---- QUERY
 # Regression test for IMPALA-2948. When fast partition key scan is enabled,
 # make sure union nodes formed contain correct constant expressions even if
 # there are unmaterialized slots.


Mime
View raw message