Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 489A5179B4 for ; Mon, 20 Apr 2015 06:14:42 +0000 (UTC) Received: (qmail 40782 invoked by uid 500); 20 Apr 2015 06:14:42 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 40585 invoked by uid 500); 20 Apr 2015 06:14:41 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 40528 invoked by uid 99); 20 Apr 2015 06:14:41 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 20 Apr 2015 06:14:41 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id 9E372AC08D7 for ; Mon, 20 Apr 2015 06:14:41 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1674738 [2/11] - in /hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ hbase-handler/ metastore/bin/ ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ ql/src/java/org/apache/hadoop/h... Date: Mon, 20 Apr 2015 06:14:39 -0000 To: commits@hive.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150420061441.9E372AC08D7@hades.apache.org> Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ExprNodeConverter.java Mon Apr 20 06:14:38 2015 @@ -24,58 +24,94 @@ import java.util.Calendar; import java.util.LinkedList; import java.util.List; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.RexWindow; +import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.RexVisitor; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter.Schema; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.Order; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.OrderSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionExpression; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionSpec; +import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.RangeBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.ValueBoundarySpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFrameSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; +import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * convert a RexNode to an ExprNodeDesc */ public class ExprNodeConverter extends RexVisitorImpl { - RelDataType rType; - String tabAlias; - boolean partitioningExpr; + String tabAlias; + String columnAlias; + RelDataType inputRowType; + RelDataType outputRowType; + boolean partitioningExpr; + WindowFunctionSpec wfs; private final RelDataTypeFactory dTFactory; + protected final Log LOG = LogFactory.getLog(this.getClass().getName()); - public ExprNodeConverter(String tabAlias, RelDataType rType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { + public ExprNodeConverter(String tabAlias, RelDataType inputRowType, + boolean partitioningExpr, RelDataTypeFactory dTFactory) { + this(tabAlias, null, inputRowType, null, partitioningExpr, dTFactory); + } + + public ExprNodeConverter(String tabAlias, String columnAlias, RelDataType inputRowType, + RelDataType outputRowType, boolean partitioningExpr, RelDataTypeFactory dTFactory) { super(true); - /* - * hb: 6/25/14 for now we only support expressions that only contain - * partition cols. there is no use case for supporting generic expressions. - * for supporting generic exprs., we need to give the converter information - * on whether a column is a partition column or not, whether a column is a - * virtual column or not. - */ - assert partitioningExpr == true; this.tabAlias = tabAlias; - this.rType = rType; + this.columnAlias = columnAlias; + this.inputRowType = inputRowType; + this.outputRowType = outputRowType; this.partitioningExpr = partitioningExpr; - this.dTFactory = dTFactory; + this.dTFactory = dTFactory; + } + + public WindowFunctionSpec getWindowFunctionSpec() { + return this.wfs; } @Override public ExprNodeDesc visitInputRef(RexInputRef inputRef) { - RelDataTypeField f = rType.getFieldList().get(inputRef.getIndex()); + RelDataTypeField f = inputRowType.getFieldList().get(inputRef.getIndex()); return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), f.getName(), tabAlias, partitioningExpr); } + /** + * TODO: Handle 1) cast 2) Field Access 3) Windowing Over() 4, Windowing Agg Call + */ @Override public ExprNodeDesc visitCall(RexCall call) { ExprNodeGenericFuncDesc gfDesc = null; @@ -99,30 +135,46 @@ public class ExprNodeConverter extends R } else if (ASTConverter.isFlat(call)) { // If Expr is flat (and[p,q,r,s] or[p,q,r,s]) then recursively build the // exprnode + GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2); ArrayList tmpExprArgs = new ArrayList(); tmpExprArgs.addAll(args.subList(0, 2)); - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), - SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); + try { + gfDesc = ExprNodeGenericFuncDesc.newInstance(hiveUdf, tmpExprArgs); + } catch (UDFArgumentException e) { + LOG.error(e); + throw new RuntimeException(e); + } for (int i = 2; i < call.operands.size(); i++) { tmpExprArgs = new ArrayList(); tmpExprArgs.add(gfDesc); tmpExprArgs.add(args.get(i)); - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), - SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), 2), tmpExprArgs); + try { + gfDesc = ExprNodeGenericFuncDesc.newInstance(hiveUdf, tmpExprArgs); + } catch (UDFArgumentException e) { + LOG.error(e); + throw new RuntimeException(e); + } } } else { - GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF( - call.getOperator(), call.getType(), args.size()); + GenericUDF hiveUdf = SqlFunctionConverter.getHiveUDF(call.getOperator(), call.getType(), + args.size()); if (hiveUdf == null) { - throw new RuntimeException("Cannot find UDF for " + call.getType() + " " + call.getOperator() - + "[" + call.getOperator().getKind() + "]/" + args.size()); + throw new RuntimeException("Cannot find UDF for " + call.getType() + " " + + call.getOperator() + "[" + call.getOperator().getKind() + "]/" + args.size()); + } + try { + gfDesc = ExprNodeGenericFuncDesc.newInstance(hiveUdf, args); + } catch (UDFArgumentException e) { + LOG.error(e); + throw new RuntimeException(e); } - gfDesc = new ExprNodeGenericFuncDesc(TypeConverter.convert(call.getType()), hiveUdf, args); } - return gfDesc; } + /** + * TODO: 1. Handle NULL + */ @Override public ExprNodeDesc visitLiteral(RexLiteral literal) { RelDataType lType = literal.getType(); @@ -176,4 +228,126 @@ public class ExprNodeConverter extends R } } + @Override + public ExprNodeDesc visitOver(RexOver over) { + if (!deep) { + return null; + } + + final RexWindow window = over.getWindow(); + + final WindowSpec windowSpec = new WindowSpec(); + final PartitioningSpec partitioningSpec = getPSpec(window); + windowSpec.setPartitioning(partitioningSpec); + final WindowFrameSpec windowFrameSpec = getWindowRange(window); + windowSpec.setWindowFrame(windowFrameSpec); + + wfs = new WindowFunctionSpec(); + wfs.setWindowSpec(windowSpec); + final Schema schema = new Schema(tabAlias, inputRowType.getFieldList()); + final ASTNode wUDAFAst = new ASTConverter.RexVisitor(schema).visitOver(over); + wfs.setExpression(wUDAFAst); + ASTNode nameNode = (ASTNode) wUDAFAst.getChild(0); + wfs.setName(nameNode.getText()); + for(int i=1; i < wUDAFAst.getChildCount()-1; i++) { + ASTNode child = (ASTNode) wUDAFAst.getChild(i); + wfs.addArg(child); + } + wfs.setAlias(columnAlias); + + RelDataTypeField f = outputRowType.getField(columnAlias, false, false); + return new ExprNodeColumnDesc(TypeConverter.convert(f.getType()), columnAlias, tabAlias, + partitioningExpr); + } + + private PartitioningSpec getPSpec(RexWindow window) { + PartitioningSpec partitioning = new PartitioningSpec(); + + Schema schema = new Schema(tabAlias, inputRowType.getFieldList()); + + if (window.partitionKeys != null && !window.partitionKeys.isEmpty()) { + PartitionSpec pSpec = new PartitionSpec(); + for (RexNode pk : window.partitionKeys) { + PartitionExpression exprSpec = new PartitionExpression(); + ASTNode astNode = pk.accept(new RexVisitor(schema)); + exprSpec.setExpression(astNode); + pSpec.addExpression(exprSpec); + } + partitioning.setPartSpec(pSpec); + } + + if (window.orderKeys != null && !window.orderKeys.isEmpty()) { + OrderSpec oSpec = new OrderSpec(); + for (RexFieldCollation ok : window.orderKeys) { + OrderExpression exprSpec = new OrderExpression(); + Order order = ok.getDirection() == RelFieldCollation.Direction.ASCENDING ? + Order.ASC : Order.DESC; + exprSpec.setOrder(order); + ASTNode astNode = ok.left.accept(new RexVisitor(schema)); + exprSpec.setExpression(astNode); + oSpec.addExpression(exprSpec); + } + partitioning.setOrderSpec(oSpec); + } + + return partitioning; + } + + private WindowFrameSpec getWindowRange(RexWindow window) { + // NOTE: in Hive AST Rows->Range(Physical) & Range -> Values (logical) + + WindowFrameSpec windowFrame = new WindowFrameSpec(); + + BoundarySpec start = null; + RexWindowBound ub = window.getUpperBound(); + if (ub != null) { + start = getWindowBound(ub, window.isRows()); + } + + BoundarySpec end = null; + RexWindowBound lb = window.getLowerBound(); + if (lb != null) { + end = getWindowBound(lb, window.isRows()); + } + + if (start != null || end != null) { + if (start != null) { + windowFrame.setStart(start); + } + if (end != null) { + windowFrame.setEnd(end); + } + } + + return windowFrame; + } + + private BoundarySpec getWindowBound(RexWindowBound wb, boolean isRows) { + BoundarySpec boundarySpec; + + if (wb.isCurrentRow()) { + boundarySpec = new CurrentRowSpec(); + } else { + final Direction direction; + final int amt; + if (wb.isPreceding()) { + direction = Direction.PRECEDING; + } else { + direction = Direction.FOLLOWING; + } + if (wb.isUnbounded()) { + amt = BoundarySpec.UNBOUNDED_AMOUNT; + } else { + amt = RexLiteral.intValue(wb.getOffset()); + } + if (isRows) { + boundarySpec = new RangeBoundarySpec(direction, amt); + } else { + boundarySpec = new ValueBoundarySpec(direction, amt); + } + } + + return boundarySpec; + } + } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java Mon Apr 20 06:14:38 2015 @@ -50,9 +50,11 @@ import org.apache.commons.logging.LogFac import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSort; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import com.google.common.collect.ImmutableList; @@ -95,6 +97,23 @@ public class PlanModifierForASTConv { return newTopNode; } + private static String getTblAlias(RelNode rel) { + + if (null == rel) { + return null; + } + if (rel instanceof HiveTableScan) { + return ((HiveTableScan)rel).getTableAlias(); + } + if (rel instanceof Project) { + return null; + } + if (rel.getInputs().size() == 1) { + return getTblAlias(rel.getInput(0)); + } + return null; + } + private static void convertOpTree(RelNode rel, RelNode parent) { if (rel instanceof HepRelVertex) { @@ -103,6 +122,12 @@ public class PlanModifierForASTConv { if (!validJoinParent(rel, parent)) { introduceDerivedTable(rel, parent); } + String leftChild = getTblAlias(((Join)rel).getLeft()); + if (null != leftChild && leftChild.equalsIgnoreCase(getTblAlias(((Join)rel).getRight()))) { + // introduce derived table above one child, if this is self-join + // since user provided aliases are lost at this point. + introduceDerivedTable(((Join)rel).getLeft(), rel); + } } else if (rel instanceof MultiJoin) { throw new RuntimeException("Found MultiJoin"); } else if (rel instanceof RelSubset) { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/TypeConverter.java Mon Apr 20 06:14:38 2015 @@ -240,7 +240,7 @@ public class TypeConverter { } else if (rType.getKeyType() != null) { return convertMapType(rType); } else { - return convertPrimtiveType(rType); + return convertPrimitiveType(rType); } } @@ -271,7 +271,7 @@ public class TypeConverter { return TypeInfoFactory.getListTypeInfo(convert(rType.getComponentType())); } - public static TypeInfo convertPrimtiveType(RelDataType rType) { + public static TypeInfo convertPrimitiveType(RelDataType rType) { switch (rType.getSqlTypeName()) { case BOOLEAN: return TypeInfoFactory.booleanTypeInfo; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java Mon Apr 20 06:14:38 2015 @@ -49,8 +49,8 @@ import org.apache.calcite.plan.hep.HepPr import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.InvalidRelException; import org.apache.calcite.rel.RelCollation; -import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelCollationImpl; +import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; @@ -58,8 +58,10 @@ import org.apache.calcite.rel.core.Aggre import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; @@ -116,12 +118,15 @@ import org.apache.hadoop.hive.ql.lib.Nod import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelOptUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveVolcanoPlanner; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; @@ -135,8 +140,11 @@ import org.apache.hadoop.hive.ql.optimiz import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinCondTypeCheckProcFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.JoinTypeCheckCtx; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter; @@ -175,7 +183,8 @@ import com.google.common.collect.Immutab import com.google.common.collect.Lists; public class CalcitePlanner extends SemanticAnalyzer { - private final AtomicInteger noColsMissingStats = new AtomicInteger(0); + + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); private List topLevelFieldSchema; private SemanticException semanticException; private boolean runCBO = true; @@ -218,26 +227,29 @@ public class CalcitePlanner extends Sema if (cboCtx.type == PreCboCtx.Type.CTAS) { queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } - runCBO = canHandleAstForCbo(queryForCbo, getQB(), cboCtx); + runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx); if (runCBO) { disableJoinMerge = true; boolean reAnalyzeAST = false; try { - // 1. Gen Optimized AST - ASTNode newAST = getOptimizedAST(); + if (this.conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + sinkOp = getOptimizedHiveOPDag(); + } else { + // 1. Gen Optimized AST + ASTNode newAST = getOptimizedAST(); - // 1.1. Fix up the query for insert/ctas - newAST = fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); + // 1.1. Fix up the query for insert/ctas + newAST = fixUpCtasAndInsertAfterCbo(ast, newAST, cboCtx); - // 2. Regen OP plan from optimized AST - init(false); - if (cboCtx.type == PreCboCtx.Type.CTAS) { - // Redo create-table analysis, because it's not part of doPhase1. - setAST(newAST); - newAST = reAnalyzeCtasAfterCbo(newAST); - } + // 2. Regen OP plan from optimized AST + init(false); + if (cboCtx.type == PreCboCtx.Type.CTAS) { + // Redo create-table analysis, because it's not part of doPhase1. + setAST(newAST); + newAST = reAnalyzeCtasAfterCbo(newAST); + } Phase1Ctx ctx_1 = initPhase1Ctx(); if (!doPhase1(newAST, getQB(), ctx_1, null)) { throw new RuntimeException("Couldn't do phase1 on CBO optimized query plan"); @@ -252,6 +264,7 @@ public class CalcitePlanner extends Sema LOG.info("CBO Succeeded; optimized logical plan."); this.ctx.setCboInfo("Plan optimized by CBO."); LOG.debug(newAST.dump()); + } } catch (Exception e) { boolean isMissingStats = noColsMissingStats.get() > 0; if (isMissingStats) { @@ -324,7 +337,7 @@ public class CalcitePlanner extends Sema * If top level QB is query then everything below it must also be * Query. */ - boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { + boolean canCBOHandleAst(ASTNode ast, QB qb, PreCboCtx cboCtx) { int root = ast.getToken().getType(); boolean needToLogMessage = STATIC_LOG.isInfoEnabled(); boolean isSupportedRoot = root == HiveParser.TOK_QUERY || root == HiveParser.TOK_EXPLAIN @@ -598,6 +611,58 @@ public class CalcitePlanner extends Sema return optiqOptimizedAST; } + /** + * Get Optimized Hive Operator DAG for the given QB tree in the semAnalyzer. + * + * @return Optimized Hive operator tree + * @throws SemanticException + */ + Operator getOptimizedHiveOPDag() throws SemanticException { + RelNode optimizedOptiqPlan = null; + CalcitePlannerAction calcitePlannerAction = new CalcitePlannerAction(prunedPartitions); + + try { + optimizedOptiqPlan = Frameworks.withPlanner(calcitePlannerAction, Frameworks + .newConfigBuilder().typeSystem(new HiveTypeSystemImpl()).build()); + } catch (Exception e) { + rethrowCalciteException(e); + throw new AssertionError("rethrowCalciteException didn't throw for " + e.getMessage()); + } + + RelNode modifiedOptimizedOptiqPlan = introduceProjectIfNeeded(optimizedOptiqPlan); + + LOG.debug("Translating the following plan:\n" + RelOptUtil.toString(modifiedOptimizedOptiqPlan)); + Operator hiveRoot = new HiveOpConverter(this, conf, unparseTranslator, topOps, + conf.getVar(HiveConf.ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("strict")).convert(modifiedOptimizedOptiqPlan); + RowResolver hiveRootRR = genRowResolver(hiveRoot, getQB()); + opParseCtx.put(hiveRoot, new OpParseContext(hiveRootRR)); + return genFileSinkPlan(getQB().getParseInfo().getClauseNames().iterator().next(), getQB(), hiveRoot); + } + + private RelNode introduceProjectIfNeeded(RelNode optimizedOptiqPlan) + throws CalciteSemanticException { + RelNode parent = null; + RelNode input = optimizedOptiqPlan; + RelNode newRoot = optimizedOptiqPlan; + + while (!(input instanceof Project) && (input instanceof Sort)) { + parent = input; + input = input.getInput(0); + } + + if (!(input instanceof Project)) { + HiveProject hpRel = HiveProject.create(input, + HiveCalciteUtil.getProjsFromBelowAsInputRef(input), input.getRowType().getFieldNames()); + if (input == optimizedOptiqPlan) { + newRoot = hpRel; + } else { + parent.replaceInput(0, hpRel); + } + } + + return newRoot; + } + /*** * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause. @@ -674,6 +739,24 @@ public class CalcitePlanner extends Sema || t instanceof UndeclaredThrowableException; } + private RowResolver genRowResolver(Operator op, QB qb) { + RowResolver rr = new RowResolver(); + String subqAlias = (qb.getAliases().size() == 1 && qb.getSubqAliases().size() == 1) ? qb + .getAliases().get(0) : null; + + for (ColumnInfo ci : op.getSchema().getSignature()) { + try { + rr.putWithCheck((subqAlias != null) ? subqAlias : ci.getTabAlias(), + ci.getAlias() != null ? ci.getAlias() : ci.getInternalName(), ci.getInternalName(), + new ColumnInfo(ci)); + } catch (SemanticException e) { + throw new RuntimeException(e); + } + } + + return rr; + } + /** * Code responsible for Calcite plan generation and optimization. */ @@ -700,7 +783,13 @@ public class CalcitePlanner extends Sema /* * recreate cluster, so that it picks up the additional traitDef */ - RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(); + final Double maxSplitSize = (double) HiveConf.getLongVar( + conf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + final Double maxMemory = (double) HiveConf.getLongVar( + conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); + HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); + HiveConfigContext confContext = new HiveConfigContext(algorithmsConf); + RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); cluster = query.createCluster(rexBuilder.getTypeFactory(), rexBuilder); @@ -719,13 +808,16 @@ public class CalcitePlanner extends Sema throw new RuntimeException(e); } + // Create MD provider + HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + // 2. Apply Pre Join Order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, - HiveDefaultRelMetadataProvider.INSTANCE); + mdProvider.getMetadataProvider()); // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) List list = Lists.newArrayList(); - list.add(HiveDefaultRelMetadataProvider.INSTANCE); + list.add(mdProvider.getMetadataProvider()); RelTraitSet desiredTraits = cluster .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); @@ -758,6 +850,18 @@ public class CalcitePlanner extends Sema calciteOptimizedPlan = hepPlanner.findBestExp(); + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + // run rules to aid in translation from Optiq tree -> Hive tree + hepPgm = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new HiveInsertExchange4JoinRule()).build(); + hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + hepPlanner.setRoot(calciteOptimizedPlan); + calciteOptimizedPlan = hepPlanner.findBestExp(); + } + if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { LOG.debug("CBO Planning details:\n"); LOG.debug("Original Plan:\n" + RelOptUtil.toString(calciteGenPlan)); @@ -789,7 +893,12 @@ public class CalcitePlanner extends Sema basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. PPD + // 2. Add not null filters + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); + } + + // 3. PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, @@ -802,19 +911,19 @@ public class CalcitePlanner extends Sema HiveFilterJoinRule.FILTER_ON_JOIN, new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 3. Transitive inference & Partition Pruning + // 4. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 4. Projection Pruning + // 5. Projection Pruning RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY, HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 5. Rerun PPD through Project as column pruning would have introduced DT + // 6. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, @@ -1050,7 +1159,7 @@ public class CalcitePlanner extends Sema List leftJoinKeys = new ArrayList(); List rightJoinKeys = new ArrayList(); - RexNode nonEquiConds = RelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, + RexNode nonEquiConds = HiveRelOptUtil.splitJoinCondition(sysFieldList, leftRel, rightRel, calciteJoinCond, leftJoinKeys, rightJoinKeys, null, null); if (!nonEquiConds.isAlwaysTrue()) { @@ -1186,7 +1295,7 @@ public class CalcitePlanner extends Sema } // 2. Get Table Metadata - Table tab = qb.getMetaData().getSrcForAlias(tableAlias); + Table tabMetaData = qb.getMetaData().getSrcForAlias(tableAlias); // 3. Get Table Logical Schema (Row Type) // NOTE: Table logical schema = Non Partition Cols + Partition Cols + @@ -1194,7 +1303,7 @@ public class CalcitePlanner extends Sema // 3.1 Add Column info for non partion cols (Object Inspector fields) @SuppressWarnings("deprecation") - StructObjectInspector rowObjectInspector = (StructObjectInspector) tab.getDeserializer() + StructObjectInspector rowObjectInspector = (StructObjectInspector) tabMetaData.getDeserializer() .getObjectInspector(); List fields = rowObjectInspector.getAllStructFieldRefs(); ColumnInfo colInfo; @@ -1216,7 +1325,7 @@ public class CalcitePlanner extends Sema ArrayList partitionColumns = new ArrayList(); // 3.2 Add column info corresponding to partition columns - for (FieldSchema part_col : tab.getPartCols()) { + for (FieldSchema part_col : tabMetaData.getPartCols()) { colName = part_col.getName(); colInfo = new ColumnInfo(colName, TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), tableAlias, true); @@ -1226,6 +1335,7 @@ public class CalcitePlanner extends Sema } // 3.3 Add column info corresponding to virtual columns + List virtualCols = new ArrayList(); Iterator vcs = VirtualColumn.getRegistry(conf).iterator(); while (vcs.hasNext()) { VirtualColumn vc = vcs.next(); @@ -1233,24 +1343,29 @@ public class CalcitePlanner extends Sema vc.getIsHidden()); rr.put(tableAlias, vc.getName(), colInfo); cInfoLst.add(colInfo); + virtualCols.add(vc); } // 3.4 Build row type from field RelDataType rowType = TypeConverter.getType(cluster, rr, null); // 4. Build RelOptAbstractTable - String fullyQualifiedTabName = tab.getDbName(); - if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) - fullyQualifiedTabName = fullyQualifiedTabName + "." + tab.getTableName(); - else - fullyQualifiedTabName = tab.getTableName(); + String fullyQualifiedTabName = tabMetaData.getDbName(); + if (fullyQualifiedTabName != null && !fullyQualifiedTabName.isEmpty()) { + fullyQualifiedTabName = fullyQualifiedTabName + "." + tabMetaData.getTableName(); + } + else { + fullyQualifiedTabName = tabMetaData.getTableName(); + } RelOptHiveTable optTable = new RelOptHiveTable(relOptSchema, fullyQualifiedTabName, - tableAlias, rowType, tab, nonPartitionColumns, partitionColumns, conf, partitionCache, - noColsMissingStats); + rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf, + partitionCache, noColsMissingStats); // 5. Build Hive Table Scan Rel tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable, - rowType); + null == tableAlias ? tabMetaData.getTableName() : tableAlias, + getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf, + HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)); // 6. Add Schema(RR) to RelNode-Schema map ImmutableMap hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr, @@ -1712,7 +1827,11 @@ public class CalcitePlanner extends Sema // 3.3.2 Get UDAF Info using UDAF Evaluator GenericUDAFInfo udaf = SemanticAnalyzer.getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); - udafRetType = udaf.returnType; + if (FunctionRegistry.pivotResult(aggName)) { + udafRetType = ((ListTypeInfo)udaf.returnType).getListElementTypeInfo(); + } else { + udafRetType = udaf.returnType; + } } } catch (Exception e) { LOG.debug("CBO: Couldn't Obtain UDAF evaluators for " + aggName @@ -1768,23 +1887,53 @@ public class CalcitePlanner extends Sema qbp.setSelExprForClause(detsClauseName, SemanticAnalyzer.genSelectDIAST(rr)); } } + List grpByAstExprs = SemanticAnalyzer.getGroupByForClause(qbp, detsClauseName); HashMap aggregationTrees = qbp.getAggregationExprsForClause(detsClauseName); boolean hasGrpByAstExprs = (grpByAstExprs != null && !grpByAstExprs.isEmpty()) ? true : false; boolean hasAggregationTrees = (aggregationTrees != null && !aggregationTrees.isEmpty()) ? true : false; + final boolean cubeRollupGrpSetPresent = (!qbp.getDestRollups().isEmpty() + || !qbp.getDestGroupingSets().isEmpty() || !qbp.getDestCubes().isEmpty()); + + // 2. Sanity check + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) + && qbp.getDistinctFuncExprsForClause(detsClauseName).size() > 1) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.getMsg()); + } + if (cubeRollupGrpSetPresent) { + if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) { + throw new SemanticException(ErrorMsg.HIVE_GROUPING_SETS_AGGR_NOMAPAGGR.getMsg()); + } + + if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { + checkExpressionsForGroupingSet(grpByAstExprs, qb.getParseInfo() + .getDistinctFuncExprsForClause(detsClauseName), aggregationTrees, + this.relToHiveRR.get(srcRel)); + + if (qbp.getDestGroupingSets().size() > conf + .getIntVar(HiveConf.ConfVars.HIVE_NEW_JOB_GROUPING_SET_CARDINALITY)) { + String errorMsg = "The number of rows per input row due to grouping sets is " + + qbp.getDestGroupingSets().size(); + throw new SemanticException( + ErrorMsg.HIVE_GROUPING_SETS_THRESHOLD_NOT_ALLOWED_WITH_SKEW.getMsg(errorMsg)); + } + } + } + + if (hasGrpByAstExprs || hasAggregationTrees) { ArrayList gbExprNDescLst = new ArrayList(); ArrayList outputColumnNames = new ArrayList(); - // 2. Input, Output Row Resolvers + // 3. Input, Output Row Resolvers RowResolver groupByInputRowResolver = this.relToHiveRR.get(srcRel); RowResolver groupByOutputRowResolver = new RowResolver(); groupByOutputRowResolver.setIsExprResolver(true); if (hasGrpByAstExprs) { - // 3. Construct GB Keys (ExprNode) + // 4. Construct GB Keys (ExprNode) for (int i = 0; i < grpByAstExprs.size(); ++i) { ASTNode grpbyExpr = grpByAstExprs.get(i); Map astToExprNDescMap = TypeCheckProcFactory.genExprNode( @@ -1799,12 +1948,10 @@ public class CalcitePlanner extends Sema } } - // 4. GroupingSets, Cube, Rollup + // 5. GroupingSets, Cube, Rollup int groupingColsSize = gbExprNDescLst.size(); List groupingSets = null; - if (!qbp.getDestRollups().isEmpty() - || !qbp.getDestGroupingSets().isEmpty() - || !qbp.getDestCubes().isEmpty()) { + if (cubeRollupGrpSetPresent) { if (qbp.getDestRollups().contains(detsClauseName)) { groupingSets = getGroupingSetsForRollup(grpByAstExprs.size()); } else if (qbp.getDestCubes().contains(detsClauseName)) { @@ -1812,7 +1959,7 @@ public class CalcitePlanner extends Sema } else if (qbp.getDestGroupingSets().contains(detsClauseName)) { groupingSets = getGroupingSets(grpByAstExprs, qbp, detsClauseName); } - + final int limit = groupingColsSize * 2; while (groupingColsSize < limit) { String field = getColumnInternalName(groupingColsSize); @@ -1827,18 +1974,18 @@ public class CalcitePlanner extends Sema } } - // 5. Construct aggregation function Info + // 6. Construct aggregation function Info ArrayList aggregations = new ArrayList(); if (hasAggregationTrees) { assert (aggregationTrees != null); for (ASTNode value : aggregationTrees.values()) { - // 5.1 Determine type of UDAF + // 6.1 Determine type of UDAF // This is the GenericUDAF name String aggName = SemanticAnalyzer.unescapeIdentifier(value.getChild(0).getText()); boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; - // 5.2 Convert UDAF Params to ExprNodeDesc + // 6.2 Convert UDAF Params to ExprNodeDesc ArrayList aggParameters = new ArrayList(); for (int i = 1; i < value.getChildCount(); i++) { ASTNode paraExpr = (ASTNode) value.getChild(i); @@ -1862,7 +2009,7 @@ public class CalcitePlanner extends Sema } } - // 6. If GroupingSets, Cube, Rollup were used, we account grouping__id + // 7. If GroupingSets, Cube, Rollup were used, we account grouping__id if(groupingSets != null && !groupingSets.isEmpty()) { String field = getColumnInternalName(groupingColsSize + aggregations.size()); outputColumnNames.add(field); @@ -1874,7 +2021,7 @@ public class CalcitePlanner extends Sema true)); } - // 7. We create the group_by operator + // 8. We create the group_by operator gbRel = genGBRelNode(gbExprNDescLst, aggregations, groupingSets, srcRel); relToHiveColNameCalcitePosMap.put(gbRel, buildHiveToCalciteColumnMap(groupByOutputRowResolver, gbRel)); @@ -2250,15 +2397,27 @@ public class CalcitePlanner extends Sema } } - return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel); + return genSelectRelNode(projsForWindowSelOp, out_rwsch, srcRel, windowExpressions); + } + + private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, + RelNode srcRel) throws CalciteSemanticException { + return genSelectRelNode(calciteColLst, out_rwsch, srcRel, null); } private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rwsch, - RelNode srcRel) throws CalciteSemanticException { + RelNode srcRel, List windowExpressions) throws CalciteSemanticException { // 1. Build Column Names Set colNamesSet = new HashSet(); List cInfoLst = out_rwsch.getRowSchema().getSignature(); ArrayList columnNames = new ArrayList(); + Map windowToAlias = null; + if (windowExpressions != null ) { + windowToAlias = new HashMap(); + for (WindowExpressionSpec wes : windowExpressions) { + windowToAlias.put(wes.getExpression().toStringTree().toLowerCase(), wes.getAlias()); + } + } String[] qualifiedColNames; String tmpColAlias; for (int i = 0; i < calciteColLst.size(); i++) { @@ -2276,8 +2435,11 @@ public class CalcitePlanner extends Sema * the names so we don't run into this issue when converting back to * Hive AST. */ - if (tmpColAlias.startsWith("_c")) + if (tmpColAlias.startsWith("_c")) { tmpColAlias = "_o_" + tmpColAlias; + } else if (windowToAlias != null && windowToAlias.containsKey(tmpColAlias)) { + tmpColAlias = windowToAlias.get(tmpColAlias); + } int suffix = 1; while (colNamesSet.contains(tmpColAlias)) { tmpColAlias = qualifiedColNames[1] + suffix; @@ -2769,4 +2931,5 @@ public class CalcitePlanner extends Sema return tabAliases; } } + } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Apr 20 06:14:38 2015 @@ -229,9 +229,9 @@ public class SemanticAnalyzer extends Ba private HashMap opToPartPruner; private HashMap opToPartList; - private HashMap> topOps; - private final HashMap> topSelOps; - private final LinkedHashMap, OpParseContext> opParseCtx; + protected HashMap> topOps; + private HashMap> topSelOps; + protected LinkedHashMap, OpParseContext> opParseCtx; private List loadTableWork; private List loadFileWork; private final Map joinContext; @@ -258,7 +258,7 @@ public class SemanticAnalyzer extends Ba private CreateViewDesc createVwDesc; private ArrayList viewsExpanded; private ASTNode viewSelect; - private final UnparseTranslator unparseTranslator; + protected final UnparseTranslator unparseTranslator; private final GlobalLimitCtx globalLimitCtx; // prefix for column names auto generated by hive @@ -478,7 +478,7 @@ public class SemanticAnalyzer extends Ba wExprsInDest.containsKey(wFnSpec.getExpression().toStringTree())) { continue; } - wFnSpec.setAlias("_wcol" + wColIdx); + wFnSpec.setAlias(wFnSpec.getName() + "_window_" + wColIdx); spec.addWindowFunction(wFnSpec); qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression()); } @@ -3448,7 +3448,7 @@ public class SemanticAnalyzer extends Ba return ret; } - private int setBit(int bitmap, int bitIdx) { + public static int setBit(int bitmap, int bitIdx) { return bitmap | (1 << bitIdx); } @@ -3984,10 +3984,10 @@ public class SemanticAnalyzer extends Ba /** * Class to store GenericUDAF related information. */ - static class GenericUDAFInfo { - ArrayList convertedParameters; - GenericUDAFEvaluator genericUDAFEvaluator; - TypeInfo returnType; + public static class GenericUDAFInfo { + public ArrayList convertedParameters; + public GenericUDAFEvaluator genericUDAFEvaluator; + public TypeInfo returnType; } /** @@ -4028,7 +4028,7 @@ public class SemanticAnalyzer extends Ba * Returns the GenericUDAFEvaluator for the aggregation. This is called once * for each GroupBy aggregation. */ - static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, + public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, ArrayList aggParameters, ASTNode aggTree, boolean isDistinct, boolean isAllColumns) throws SemanticException { @@ -4058,7 +4058,7 @@ public class SemanticAnalyzer extends Ba * @throws SemanticException * when the UDAF is not found or has problems. */ - static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, + public static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) throws SemanticException { @@ -4087,7 +4087,7 @@ public class SemanticAnalyzer extends Ba return r; } - static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( + public static GenericUDAFEvaluator.Mode groupByDescModeToUDAFMode( GroupByDesc.Mode mode, boolean isDistinct) { switch (mode) { case COMPLETE: @@ -4130,7 +4130,7 @@ public class SemanticAnalyzer extends Ba * @return the ExprNodeDesc of the constant parameter if the given internalName represents * a constant parameter; otherwise, return null */ - private ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, + public static ExprNodeDesc isConstantParameterInAggregationParameters(String internalName, List reduceValues) { // only the pattern of "VALUE._col([0-9]+)" should be handled. @@ -5577,7 +5577,7 @@ public class SemanticAnalyzer extends Ba return false; } - private void checkExpressionsForGroupingSet(List grpByExprs, + void checkExpressionsForGroupingSet(List grpByExprs, List distinctGrpByExprs, Map aggregationTrees, RowResolver inputRowResolver) throws SemanticException { @@ -6131,7 +6131,7 @@ public class SemanticAnalyzer extends Ba } @SuppressWarnings("nls") - private Operator genFileSinkPlan(String dest, QB qb, Operator input) + protected Operator genFileSinkPlan(String dest, QB qb, Operator input) throws SemanticException { RowResolver inputRR = opParseCtx.get(input).getRowResolver(); @@ -9238,7 +9238,7 @@ public class SemanticAnalyzer extends Ba return equalsExpr; } - private String getAliasId(String alias, QB qb) { + protected String getAliasId(String alias, QB qb) { return (qb.getId() == null ? alias : qb.getId() + ":" + alias).toLowerCase(); } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/UnparseTranslator.java Mon Apr 20 06:14:38 2015 @@ -38,7 +38,7 @@ import org.apache.hadoop.hive.ql.metadat * SemanticAnalyzer.saveViewDefinition() calls TokenRewriteStream.toString(). * */ -class UnparseTranslator { +public class UnparseTranslator { // key is token start index private final NavigableMap translations; private final List copyTranslations; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java Mon Apr 20 06:14:38 2015 @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -444,4 +445,42 @@ public class ExprNodeDescUtils { // If the child is also decimal, no cast is needed (we hope - can target type be narrower?). return HiveDecimalUtils.getDecimalTypeForPrimitiveCategory(childTi); } + + /** + * Build ExprNodeColumnDesc for the projections in the input operator from + * sartpos to endpos(both included). Operator must have an associated + * colExprMap. + * + * @param inputOp + * Input Hive Operator + * @param startPos + * starting position in the input operator schema; must be >=0 and <= + * endPos + * @param endPos + * end position in the input operator schema; must be >=0. + * @return List of ExprNodeDesc + */ + public static ArrayList genExprNodeDesc(Operator inputOp, int startPos, int endPos, + boolean addEmptyTabAlias, boolean setColToNonVirtual) { + ArrayList exprColLst = new ArrayList(); + List colInfoLst = inputOp.getSchema().getSignature(); + + String tabAlias; + boolean vc; + ColumnInfo ci; + for (int i = startPos; i <= endPos; i++) { + ci = colInfoLst.get(i); + tabAlias = ci.getTabAlias(); + if (addEmptyTabAlias) { + tabAlias = ""; + } + vc = ci.getIsVirtualCol(); + if (setColToNonVirtual) { + vc = false; + } + exprColLst.add(new ExprNodeColumnDesc(ci.getType(), ci.getInternalName(), tabAlias, vc)); + } + + return exprColLst; + } } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java Mon Apr 20 06:14:38 2015 @@ -109,6 +109,13 @@ public class JoinDesc extends AbstractOp } public JoinDesc(final Map> exprs, + List outputColumnNames, final boolean noOuterJoin, + final JoinCondDesc[] conds, ExprNodeDesc[][] joinKeys) { + this (exprs, outputColumnNames, noOuterJoin, conds, + new HashMap>(), joinKeys); + } + + public JoinDesc(final Map> exprs, List outputColumnNames, final boolean noOuterJoin, final JoinCondDesc[] conds, final Map> filters, ExprNodeDesc[][] joinKeys) { Modified: hive/trunk/ql/src/test/queries/clientpositive/cbo_join.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/cbo_join.q?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/cbo_join.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/cbo_join.q Mon Apr 20 06:14:38 2015 @@ -4,6 +4,7 @@ set hive.exec.check.crossproducts=false; set hive.stats.fetch.column.stats=true; set hive.auto.convert.join=false; +-- SORT_QUERY_RESULTS -- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key; select cbo_t1.key from cbo_t1 join cbo_t3; Modified: hive/trunk/ql/src/test/queries/clientpositive/cbo_simple_select.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/cbo_simple_select.q?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/cbo_simple_select.q (original) +++ hive/trunk/ql/src/test/queries/clientpositive/cbo_simple_select.q Mon Apr 20 06:14:38 2015 @@ -9,7 +9,8 @@ select * from cbo_t1; select * from cbo_t1 as cbo_t1; select * from cbo_t1 as cbo_t2; -select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1; +select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1; +select * from cbo_t1 where (((key=1) and (c_float=10)) and (c_int=20)); -- 2. Test Select + TS + FIL select * from cbo_t1 where cbo_t1.c_int >= 0; Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out Mon Apr 20 06:14:38 2015 @@ -808,32 +808,32 @@ STAGE PLANS: alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TableScan alias: s Statistics: Num rows: 12 Data size: 3143 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + predicate: ((s_floor_space > 1000) and s_store_sk is not null) (type: boolean) + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s_store_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 12 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: @@ -843,10 +843,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col1 + outputColumnNames: _col2 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: int) + expressions: _col2 (type: int) outputColumnNames: _col0 Statistics: Num rows: 322 Data size: 1288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator Modified: hive/trunk/ql/src/test/results/clientpositive/cbo_join.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/cbo_join.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/cbo_join.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/cbo_join.q.out Mon Apr 20 06:14:38 2015 @@ -1,4 +1,5 @@ -PREHOOK: query: -- 4. Test Select + Join + TS +PREHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6,7 +7,8 @@ PREHOOK: Input: default@cbo_t1@dt=2014 PREHOOK: Input: default@cbo_t2 PREHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -POSTHOOK: query: -- 4. Test Select + Join + TS +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- 4. Test Select + Join + TS select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 join cbo_t2 on cbo_t1.key=cbo_t2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@cbo_t1 @@ -122,166 +124,6 @@ POSTHOOK: Input: default@cbo_t1 POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -NULL -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 - 1 1 1 1 @@ -322,6 +164,61 @@ NULL 1 1 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 1 1 1 @@ -522,6 +419,111 @@ NULL 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL +NULL PREHOOK: query: select cbo_t1.key from cbo_t1 join cbo_t3 where cbo_t1.key=cbo_t3.key and cbo_t1.key >= 1 PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -632,8 +634,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -730,6 +730,8 @@ NULL NULL 1 1 1 1 1 1 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 right outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -744,8 +746,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -847,6 +847,8 @@ NULL 2 NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL PREHOOK: query: select cbo_t1.c_int, cbo_t2.c_int from cbo_t1 full outer join cbo_t2 on cbo_t1.key=cbo_t2.key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -861,10 +863,6 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 #### A masked pattern was here #### -NULL NULL -NULL NULL -NULL NULL -NULL NULL 1 1 1 1 1 1 @@ -966,6 +964,10 @@ NULL 2 NULL 2 NULL 2 NULL 2 +NULL NULL +NULL NULL +NULL NULL +NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -5334,8 +5336,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -5870,6 +5870,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL PREHOOK: query: select b, cbo_t1.c, cbo_t2.p, q, cbo_t3.c_int from (select key as a, c_int as b, cbo_t1.c_float as c from cbo_t1) cbo_t1 full outer join (select cbo_t2.key as p, cbo_t2.c_int as q, c_float as r from cbo_t2) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key PREHOOK: type: QUERY PREHOOK: Input: default@cbo_t1 @@ -6430,8 +6432,6 @@ POSTHOOK: Input: default@cbo_t2 POSTHOOK: Input: default@cbo_t2@dt=2014 POSTHOOK: Input: default@cbo_t3 #### A masked pattern was here #### -NULL NULL NULL NULL -NULL NULL NULL NULL 1 1 1 1 1 1 1 1 1 1 1 1 @@ -6963,6 +6963,8 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL NULL NULL NULL +NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL Modified: hive/trunk/ql/src/test/results/clientpositive/cbo_simple_select.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/cbo_simple_select.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/cbo_simple_select.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/cbo_simple_select.q.out Mon Apr 20 06:14:38 2015 @@ -120,6 +120,16 @@ POSTHOOK: Input: default@cbo_t1@dt=2014 1 1 25.0 NULL NULL NULL NULL NULL NULL +PREHOOK: query: select * from cbo_t1 where (((key=1) and (c_float=10)) and (c_int=20)) +PREHOOK: type: QUERY +PREHOOK: Input: default@cbo_t1 +PREHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### +POSTHOOK: query: select * from cbo_t1 where (((key=1) and (c_float=10)) and (c_int=20)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cbo_t1 +POSTHOOK: Input: default@cbo_t1@dt=2014 +#### A masked pattern was here #### PREHOOK: query: -- 2. Test Select + TS + FIL select * from cbo_t1 where cbo_t1.c_int >= 0 PREHOOK: type: QUERY Modified: hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer12.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer12.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer12.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/correlationoptimizer12.q.out Mon Apr 20 06:14:38 2015 @@ -52,7 +52,7 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col1 name: count window function: GenericUDAFCountEvaluator @@ -62,7 +62,7 @@ STAGE PLANS: predicate: _col0 is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: bigint) + expressions: _col0 (type: string), count_window_0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -137,7 +137,7 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: _wcol0 + alias: count_window_0 arguments: _col1 name: count window function: GenericUDAFCountEvaluator @@ -147,7 +147,7 @@ STAGE PLANS: predicate: _col0 is not null (type: boolean) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _wcol0 (type: bigint) + expressions: _col0 (type: string), count_window_0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE File Output Operator Modified: hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/ctas_colname.q.out Mon Apr 20 06:14:38 2015 @@ -198,7 +198,7 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -206,7 +206,7 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: string), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -360,7 +360,7 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: _wcol0 + alias: lead_window_0 arguments: _col0, 1 name: lead window function: GenericUDAFLeadEvaluator @@ -368,7 +368,7 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _wcol0 (type: string) + expressions: _col0 (type: string), _col1 (type: string), lead_window_0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Limit Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_window.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_window.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_window.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/groupby_grouping_window.q.out Mon Apr 20 06:14:38 2015 @@ -106,7 +106,7 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col3 name: rank window function: GenericUDAFRankEvaluator @@ -114,7 +114,7 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _wcol0 (type: int) + expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE File Output Operator Modified: hive/trunk/ql/src/test/results/clientpositive/groupby_resolution.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/groupby_resolution.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/groupby_resolution.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/groupby_resolution.q.out Mon Apr 20 06:14:38 2015 @@ -690,7 +690,7 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: _wcol0 + alias: rank_window_0 arguments: _col1 name: rank window function: GenericUDAFRankEvaluator @@ -698,7 +698,7 @@ STAGE PLANS: isPivotResult: true Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), _wcol0 (type: int) + expressions: _col0 (type: string), _col1 (type: bigint), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator Modified: hive/trunk/ql/src/test/results/clientpositive/join32.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/join32.q.out?rev=1674738&r1=1674737&r2=1674738&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/join32.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/join32.q.out Mon Apr 20 06:14:38 2015 @@ -109,25 +109,71 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:y + $hdt$_0:z Fetch Operator limit: -1 + Partition Description: + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart $hdt$_1:$hdt$_2:x Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:y + $hdt$_0:z TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: key is not null (type: boolean) + predicate: ((11.0 = 11.0) and value is not null) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 + expressions: value (type: string) + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: @@ -141,7 +187,7 @@ STAGE PLANS: GatherStats: false Filter Operator isSamplingPred: false - predicate: (value is not null and key is not null) (type: boolean) + predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 7 Data size: 53 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) @@ -150,31 +196,31 @@ STAGE PLANS: HashTable Sink Operator keys: 0 _col0 (type: string) - 1 _col1 (type: string) + 1 _col0 (type: string) Position of Big Table: 0 Stage: Stage-5 Map Reduce Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false - predicate: ((11.0 = 11.0) and value is not null) (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) - outputColumnNames: _col0 + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 Position of Big Table: 0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -183,11 +229,11 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col3 (type: string) - outputColumnNames: _col1, _col2, _col5 + outputColumnNames: _col0, _col4, _col5 Position of Big Table: 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col5 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col5 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -356,7 +402,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [$hdt$_1:$hdt$_1:z] + /src [$hdt$_1:$hdt$_1:y] Stage: Stage-0 Move Operator @@ -405,8 +451,8 @@ POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest_j1.value EXPRESSION [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1