Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0762A17BD0 for ; Mon, 6 Oct 2014 04:02:25 +0000 (UTC) Received: (qmail 15170 invoked by uid 500); 6 Oct 2014 04:02:24 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 15122 invoked by uid 500); 6 Oct 2014 04:02:24 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 15111 invoked by uid 99); 6 Oct 2014 04:02:24 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 06 Oct 2014 04:02:24 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 06 Oct 2014 04:02:17 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 0A4AA2388C38; Mon, 6 Oct 2014 04:01:27 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1629563 [13/33] - in /hive/branches/spark: ./ accumulo-handler/ beeline/ beeline/src/java/org/apache/hive/beeline/ bin/ bin/ext/ common/ common/src/java/org/apache/hadoop/hive/conf/ common/src/test/org/apache/hadoop/hive/common/type/ contr... Date: Mon, 06 Oct 2014 04:00:54 -0000 To: commits@hive.apache.org From: brock@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20141006040127.0A4AA2388C38@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java Mon Oct 6 04:00:39 2014 @@ -26,6 +26,8 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Set; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -278,6 +280,11 @@ public abstract class TaskCompiler { tsk.setRetryCmdWhenFail(true); } } + + Interner interner = Interners.newStrongInterner(); + for (Task rootTask : rootTasks) { + GenMapRedUtils.internTableDesc(rootTask, interner); + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java Mon Oct 6 04:00:39 2014 @@ -36,7 +36,9 @@ import org.apache.hadoop.hive.conf.HiveC import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.AppMasterEventOperator; +import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.DummyStoreOperator; import org.apache.hadoop.hive.ql.exec.FileSinkOperator; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; @@ -62,6 +64,7 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate; import org.apache.hadoop.hive.ql.optimizer.ConvertJoinMapJoin; import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization; +import org.apache.hadoop.hive.ql.optimizer.MergeJoinProc; import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc; import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize; import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism; @@ -330,10 +333,17 @@ public class TezCompiler extends TaskCom opRules.put(new RuleRegExp("No more walking on ReduceSink-MapJoin", MapJoinOperator.getOperatorName() + "%"), new ReduceSinkMapJoinProc()); + opRules.put(new RuleRegExp("Recoginze a Sorted Merge Join operator to setup the right edge and" + + " stop traversing the DummyStore-MapJoin", CommonMergeJoinOperator.getOperatorName() + + "%"), new MergeJoinProc()); + opRules.put(new RuleRegExp("Split Work + Move/Merge - FileSink", FileSinkOperator.getOperatorName() + "%"), new CompositeProcessor(new FileSinkProcessor(), genTezWork)); + opRules.put(new RuleRegExp("Split work - DummyStore", DummyStoreOperator.getOperatorName() + + "%"), genTezWork); + opRules.put(new RuleRegExp("Handle Potential Analyze Command", TableScanOperator.getOperatorName() + "%"), new ProcessAnalyzeTable(GenTezUtils.getUtils())); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java Mon Oct 6 04:00:39 2014 @@ -54,6 +54,19 @@ public class TypeCheckCtx implements Nod private boolean allowDistinctFunctions; + private final boolean allowGBExprElimination; + + private final boolean allowAllColRef; + + private final boolean allowFunctionStar; + + private final boolean allowWindowing; + + // "[]" : LSQUARE/INDEX Expression + private final boolean allowIndexExpr; + + private final boolean allowSubQueryExpr; + /** * Constructor. * @@ -61,10 +74,23 @@ public class TypeCheckCtx implements Nod * The input row resolver of the previous operator. */ public TypeCheckCtx(RowResolver inputRR) { + this(inputRR, false, true, true, true, true, true, true, true); + } + + public TypeCheckCtx(RowResolver inputRR, boolean allowStatefulFunctions, + boolean allowDistinctFunctions, boolean allowGBExprElimination, boolean allowAllColRef, + boolean allowFunctionStar, boolean allowWindowing, + boolean allowIndexExpr, boolean allowSubQueryExpr) { setInputRR(inputRR); error = null; - allowStatefulFunctions = false; - allowDistinctFunctions = true; + this.allowStatefulFunctions = allowStatefulFunctions; + this.allowDistinctFunctions = allowDistinctFunctions; + this.allowGBExprElimination = allowGBExprElimination; + this.allowAllColRef = allowAllColRef; + this.allowFunctionStar = allowFunctionStar; + this.allowWindowing = allowWindowing; + this.allowIndexExpr = allowIndexExpr; + this.allowSubQueryExpr = allowSubQueryExpr; } /** @@ -98,7 +124,8 @@ public class TypeCheckCtx implements Nod } /** - * @param allowStatefulFunctions whether to allow stateful UDF invocations + * @param allowStatefulFunctions + * whether to allow stateful UDF invocations */ public void setAllowStatefulFunctions(boolean allowStatefulFunctions) { this.allowStatefulFunctions = allowStatefulFunctions; @@ -136,7 +163,31 @@ public class TypeCheckCtx implements Nod this.allowDistinctFunctions = allowDistinctFunctions; } - public boolean isAllowDistinctFunctions() { + public boolean getAllowDistinctFunctions() { return allowDistinctFunctions; } + + public boolean getAllowGBExprElimination() { + return allowGBExprElimination; + } + + public boolean getallowAllColRef() { + return allowAllColRef; + } + + public boolean getallowFunctionStar() { + return allowFunctionStar; + } + + public boolean getallowWindowing() { + return allowWindowing; + } + + public boolean getallowIndexExpr() { + return allowIndexExpr; + } + + public boolean getallowSubQueryExpr() { + return allowSubQueryExpr; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java Mon Oct 6 04:00:39 2014 @@ -80,12 +80,12 @@ import org.apache.hadoop.hive.serde2.typ * expression Node Descriptor trees. They also introduce the correct conversion * functions to do proper implicit conversion. */ -public final class TypeCheckProcFactory { +public class TypeCheckProcFactory { protected static final Log LOG = LogFactory.getLog(TypeCheckProcFactory.class .getName()); - private TypeCheckProcFactory() { + protected TypeCheckProcFactory() { // prevent instantiation } @@ -118,7 +118,7 @@ public final class TypeCheckProcFactory RowResolver input = ctx.getInputRR(); ExprNodeDesc desc = null; - if ((ctx == null) || (input == null)) { + if ((ctx == null) || (input == null) || (!ctx.getAllowGBExprElimination())) { return null; } @@ -137,8 +137,13 @@ public final class TypeCheckProcFactory return desc; } - public static Map genExprNode(ASTNode expr, - TypeCheckCtx tcCtx) throws SemanticException { + public static Map genExprNode(ASTNode expr, TypeCheckCtx tcCtx) + throws SemanticException { + return genExprNode(expr, tcCtx, new TypeCheckProcFactory()); + } + + protected static Map genExprNode(ASTNode expr, + TypeCheckCtx tcCtx, TypeCheckProcFactory tf) throws SemanticException { // Create the walker, the rules dispatcher and the context. // create a walker which walks the tree in a DFS manner while maintaining // the operator stack. The dispatcher @@ -146,13 +151,13 @@ public final class TypeCheckProcFactory Map opRules = new LinkedHashMap(); opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), - getNullExprProcessor()); + tf.getNullExprProcessor()); opRules.put(new RuleRegExp("R2", HiveParser.Number + "%|" + HiveParser.TinyintLiteral + "%|" + HiveParser.SmallintLiteral + "%|" + HiveParser.BigintLiteral + "%|" + HiveParser.DecimalLiteral + "%"), - getNumExprProcessor()); + tf.getNumExprProcessor()); opRules .put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + HiveParser.StringLiteral + "%|" + HiveParser.TOK_CHARSETLITERAL + "%|" @@ -162,18 +167,18 @@ public final class TypeCheckProcFactory + HiveParser.KW_ARRAY + "%|" + HiveParser.KW_MAP + "%|" + HiveParser.KW_STRUCT + "%|" + HiveParser.KW_EXISTS + "%|" + HiveParser.TOK_SUBQUERY_OP_NOTIN + "%"), - getStrExprProcessor()); + tf.getStrExprProcessor()); opRules.put(new RuleRegExp("R4", HiveParser.KW_TRUE + "%|" - + HiveParser.KW_FALSE + "%"), getBoolExprProcessor()); - opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), getDateExprProcessor()); + + HiveParser.KW_FALSE + "%"), tf.getBoolExprProcessor()); + opRules.put(new RuleRegExp("R5", HiveParser.TOK_DATELITERAL + "%"), tf.getDateExprProcessor()); opRules.put(new RuleRegExp("R6", HiveParser.TOK_TABLE_OR_COL + "%"), - getColumnExprProcessor()); + tf.getColumnExprProcessor()); opRules.put(new RuleRegExp("R7", HiveParser.TOK_SUBQUERY_OP + "%"), - getSubQueryExprProcessor()); + tf.getSubQueryExprProcessor()); // The dispatcher fires the processor corresponding to the closest matching // rule and passes the context along - Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), + Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx); GraphWalker ogw = new DefaultGraphWalker(disp); @@ -229,7 +234,7 @@ public final class TypeCheckProcFactory * * @return NullExprProcessor. */ - public static NullExprProcessor getNullExprProcessor() { + public NullExprProcessor getNullExprProcessor() { return new NullExprProcessor(); } @@ -304,7 +309,7 @@ public final class TypeCheckProcFactory * * @return NumExprProcessor. */ - public static NumExprProcessor getNumExprProcessor() { + public NumExprProcessor getNumExprProcessor() { return new NumExprProcessor(); } @@ -362,7 +367,7 @@ public final class TypeCheckProcFactory * * @return StrExprProcessor. */ - public static StrExprProcessor getStrExprProcessor() { + public StrExprProcessor getStrExprProcessor() { return new StrExprProcessor(); } @@ -408,7 +413,7 @@ public final class TypeCheckProcFactory * * @return BoolExprProcessor. */ - public static BoolExprProcessor getBoolExprProcessor() { + public BoolExprProcessor getBoolExprProcessor() { return new BoolExprProcessor(); } @@ -449,7 +454,7 @@ public final class TypeCheckProcFactory * * @return DateExprProcessor. */ - public static DateExprProcessor getDateExprProcessor() { + public DateExprProcessor getDateExprProcessor() { return new DateExprProcessor(); } @@ -546,7 +551,7 @@ public final class TypeCheckProcFactory * * @return ColumnExprProcessor. */ - public static ColumnExprProcessor getColumnExprProcessor() { + public ColumnExprProcessor getColumnExprProcessor() { return new ColumnExprProcessor(); } @@ -613,7 +618,7 @@ public final class TypeCheckProcFactory windowingTokens.add(HiveParser.TOK_TABSORTCOLNAMEDESC); } - private static boolean isRedundantConversionFunction(ASTNode expr, + protected static boolean isRedundantConversionFunction(ASTNode expr, boolean isFunction, ArrayList children) { if (!isFunction) { return false; @@ -700,7 +705,30 @@ public final class TypeCheckProcFactory return getFuncExprNodeDescWithUdfData(udfName, null, children); } - static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, + protected void validateUDF(ASTNode expr, boolean isFunction, TypeCheckCtx ctx, FunctionInfo fi, + List children, GenericUDF genericUDF) throws SemanticException { + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't + // supported + if (fi.getGenericUDTF() != null) { + throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); + } + // UDAF in filter condition, group-by caluse, param of funtion, etc. + if (fi.getGenericUDAFResolver() != null) { + if (isFunction) { + throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg((ASTNode) expr + .getChild(0))); + } else { + throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); + } + } + if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { + if (FunctionRegistry.isStateful(genericUDF)) { + throw new SemanticException(ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); + } + } + } + + protected ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, boolean isFunction, ArrayList children, TypeCheckCtx ctx) throws SemanticException, UDFArgumentException { // return the child directly if the conversion is redundant. @@ -713,6 +741,7 @@ public final class TypeCheckProcFactory ExprNodeDesc desc; if (funcText.equals(".")) { // "." : FIELD Expression + assert (children.size() == 2); // Only allow constant field name for now assert (children.get(1) instanceof ExprNodeConstantDesc); @@ -727,23 +756,22 @@ public final class TypeCheckProcFactory // Allow accessing a field of list element structs directly from a list boolean isList = (object.getTypeInfo().getCategory() == ObjectInspector.Category.LIST); if (isList) { - objectTypeInfo = ((ListTypeInfo) objectTypeInfo) - .getListElementTypeInfo(); + objectTypeInfo = ((ListTypeInfo) objectTypeInfo).getListElementTypeInfo(); } if (objectTypeInfo.getCategory() != Category.STRUCT) { throw new SemanticException(ErrorMsg.INVALID_DOT.getMsg(expr)); } - TypeInfo t = ((StructTypeInfo) objectTypeInfo) - .getStructFieldTypeInfo(fieldNameString); + TypeInfo t = ((StructTypeInfo) objectTypeInfo).getStructFieldTypeInfo(fieldNameString); if (isList) { t = TypeInfoFactory.getListTypeInfo(t); } - desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, - isList); - + desc = new ExprNodeFieldDesc(t, children.get(0), fieldNameString, isList); } else if (funcText.equals("[")) { // "[]" : LSQUARE/INDEX Expression + if (!ctx.getallowIndexExpr()) + throw new SemanticException(ErrorMsg.INVALID_FUNCTION.getMsg(expr)); + assert (children.size() == 2); // Check whether this is a list or a map @@ -759,8 +787,7 @@ public final class TypeCheckProcFactory // Calculate TypeInfo TypeInfo t = ((ListTypeInfo) myt).getListElementTypeInfo(); - desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry - .getGenericUDFForIndex(), children); + desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); } else if (myt.getCategory() == Category.MAP) { if (!FunctionRegistry.implicitConvertible(children.get(1).getTypeInfo(), ((MapTypeInfo) myt).getMapKeyTypeInfo())) { @@ -769,11 +796,9 @@ public final class TypeCheckProcFactory } // Calculate TypeInfo TypeInfo t = ((MapTypeInfo) myt).getMapValueTypeInfo(); - desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry - .getGenericUDFForIndex(), children); + desc = new ExprNodeGenericFuncDesc(t, FunctionRegistry.getGenericUDFForIndex(), children); } else { - throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, - myt.getTypeName())); + throw new SemanticException(ErrorMsg.NON_COLLECTION_TYPE.getMsg(expr, myt.getTypeName())); } } else { // other operators or functions @@ -825,26 +850,7 @@ public final class TypeCheckProcFactory } } - // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't - // supported - if (fi.getGenericUDTF() != null) { - throw new SemanticException(ErrorMsg.UDTF_INVALID_LOCATION.getMsg()); - } - // UDAF in filter condition, group-by caluse, param of funtion, etc. - if (fi.getGenericUDAFResolver() != null) { - if (isFunction) { - throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION. - getMsg((ASTNode) expr.getChild(0))); - } else { - throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); - } - } - if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { - if (FunctionRegistry.isStateful(genericUDF)) { - throw new SemanticException( - ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); - } - } + validateUDF(expr, isFunction, ctx, fi, children, genericUDF); // Try to infer the type of the constant only if there are two // nodes, one of them is column and the other is numeric const @@ -955,6 +961,24 @@ public final class TypeCheckProcFactory return false; } + protected ExprNodeColumnDesc processQualifiedColRef(TypeCheckCtx ctx, ASTNode expr, + Object... nodeOutputs) throws SemanticException { + RowResolver input = ctx.getInputRR(); + String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr.getChild(0).getChild(0) + .getText()); + // NOTE: tableAlias must be a valid non-ambiguous table alias, + // because we've checked that in TOK_TABLE_OR_COL's process method. + ColumnInfo colInfo = input.get(tableAlias, ((ExprNodeConstantDesc) nodeOutputs[1]).getValue() + .toString()); + + if (colInfo == null) { + ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); + return null; + } + return new ExprNodeColumnDesc(colInfo.getType(), colInfo.getInternalName(), + colInfo.getTabAlias(), colInfo.getIsVirtualCol()); + } + @Override public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { @@ -1004,7 +1028,11 @@ public final class TypeCheckProcFactory * The difference is that there is translation for Window related tokens, so we just * return null; */ - if ( windowingTokens.contains(expr.getType())) { + if (windowingTokens.contains(expr.getType())) { + if (!ctx.getallowWindowing()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_FUNCTION.getMsg("Windowing is not supported in the context"))); + return null; } @@ -1013,6 +1041,11 @@ public final class TypeCheckProcFactory } if (expr.getType() == HiveParser.TOK_ALLCOLREF) { + if (!ctx.getallowAllColRef()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg("All column reference is not supported in the context"))); + RowResolver input = ctx.getInputRR(); ExprNodeColumnListDesc columnList = new ExprNodeColumnListDesc(); assert expr.getChildCount() <= 1; @@ -1050,22 +1083,7 @@ public final class TypeCheckProcFactory if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && nodeOutputs[0] == null) { - - RowResolver input = ctx.getInputRR(); - String tableAlias = BaseSemanticAnalyzer.unescapeIdentifier(expr - .getChild(0).getChild(0).getText()); - // NOTE: tableAlias must be a valid non-ambiguous table alias, - // because we've checked that in TOK_TABLE_OR_COL's process method. - ColumnInfo colInfo = input.get(tableAlias, - ((ExprNodeConstantDesc) nodeOutputs[1]).getValue().toString()); - - if (colInfo == null) { - ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(1)), expr); - return null; - } - return new ExprNodeColumnDesc(colInfo.getType(), colInfo - .getInternalName(), colInfo.getTabAlias(), colInfo - .getIsVirtualCol()); + return processQualifiedColRef(ctx, expr, nodeOutputs); } // Return nulls for conversion operators @@ -1080,7 +1098,7 @@ public final class TypeCheckProcFactory expr.getType() == HiveParser.TOK_FUNCTIONSTAR || expr.getType() == HiveParser.TOK_FUNCTIONDI); - if (!ctx.isAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) { + if (!ctx.getAllowDistinctFunctions() && expr.getType() == HiveParser.TOK_FUNCTIONDI) { throw new SemanticException( SemanticAnalyzer.generateErrorMessage(expr, ErrorMsg.DISTINCT_NOT_SUPPORTED.getMsg())); } @@ -1099,6 +1117,11 @@ public final class TypeCheckProcFactory } if (expr.getType() == HiveParser.TOK_FUNCTIONSTAR) { + if (!ctx.getallowFunctionStar()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(expr, + ErrorMsg.INVALID_COLUMN + .getMsg(".* reference is not supported in the context"))); + RowResolver input = ctx.getInputRR(); for (ColumnInfo colInfo : input.getColumnInfos()) { if (!colInfo.getIsVirtualCol()) { @@ -1111,8 +1134,7 @@ public final class TypeCheckProcFactory // If any of the children contains null, then return a null // this is a hack for now to handle the group by case if (children.contains(null)) { - RowResolver input = ctx.getInputRR(); - List possibleColumnNames = input.getReferenceableColumnAliases(null, -1); + List possibleColumnNames = getReferenceableColumnAliases(ctx); String reason = String.format("(possible column names are: %s)", StringUtils.join(possibleColumnNames, ", ")); ctx.setError(ErrorMsg.INVALID_COLUMN.getMsg(expr.getChild(0), reason), @@ -1135,6 +1157,9 @@ public final class TypeCheckProcFactory } } + protected List getReferenceableColumnAliases(TypeCheckCtx ctx) { + return ctx.getInputRR().getReferenceableColumnAliases(null, -1); + } } /** @@ -1142,7 +1167,7 @@ public final class TypeCheckProcFactory * * @return DefaultExprProcessor. */ - public static DefaultExprProcessor getDefaultExprProcessor() { + public DefaultExprProcessor getDefaultExprProcessor() { return new DefaultExprProcessor(); } @@ -1160,13 +1185,18 @@ public final class TypeCheckProcFactory return null; } + ASTNode expr = (ASTNode) nd; + ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); + + if (!ctx.getallowSubQueryExpr()) + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(sqNode, + ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg())); + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); if (desc != null) { return desc; } - ASTNode expr = (ASTNode) nd; - ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); /* * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. */ @@ -1182,7 +1212,7 @@ public final class TypeCheckProcFactory * * @return DateExprProcessor. */ - public static SubQueryExprProcessor getSubQueryExprProcessor() { + public SubQueryExprProcessor getSubQueryExprProcessor() { return new SubQueryExprProcessor(); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java Mon Oct 6 04:00:39 2014 @@ -17,6 +17,13 @@ */ package org.apache.hadoop.hive.ql.parse; +import java.io.IOException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.Context; @@ -27,19 +34,12 @@ import org.apache.hadoop.hive.ql.hooks.W import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.session.SessionState; -import java.io.IOException; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - - /** * A subclass of the {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer} that just handles * update and delete statements. It works by rewriting the updates and deletes into insert @@ -128,11 +128,16 @@ public class UpdateDeleteSemanticAnalyze Table mTable; try { mTable = db.getTable(tableName[0], tableName[1]); + } catch (InvalidTableException e) { + LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + + e.getMessage()); + throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(getDotName(tableName)), e); } catch (HiveException e) { - LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + - e.getMessage()); - throw new SemanticException(ErrorMsg.INVALID_TABLE, getDotName(tableName)); + LOG.error("Failed to find table " + getDotName(tableName) + " got exception " + + e.getMessage()); + throw new SemanticException(e.getMessage(), e); } + List partCols = mTable.getPartCols(); rewrittenQueryStr.append("insert into table "); @@ -343,8 +348,10 @@ public class UpdateDeleteSemanticAnalyze // Add the setRCols to the input list for (String colName : setRCols) { - columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), + if(columnAccessInfo != null) {//assuming this means we are not doing Auth + columnAccessInfo.add(Table.getCompleteName(mTable.getDbName(), mTable.getTableName()), colName); + } } } @@ -386,7 +393,7 @@ public class UpdateDeleteSemanticAnalyze setRCols.add(colName.getText()); } else if (node.getChildren() != null) { for (Node n : node.getChildren()) { - addSetRCols(node, setRCols); + addSetRCols((ASTNode)n, setRCols); } } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java Mon Oct 6 04:00:39 2014 @@ -110,10 +110,12 @@ public class AlterTableDesc extends DDLD * @param newComment * @param newType */ - public AlterTableDesc(String tblName, String oldColName, String newColName, + public AlterTableDesc(String tblName, HashMap partSpec, + String oldColName, String newColName, String newType, String newComment, boolean first, String afterCol) { super(); oldName = tblName; + this.partSpec = partSpec; this.oldColName = oldColName; this.newColName = newColName; newColType = newType; @@ -142,11 +144,12 @@ public class AlterTableDesc extends DDLD * @param newCols * new columns to be added */ - public AlterTableDesc(String name, List newCols, + public AlterTableDesc(String name, HashMap partSpec, List newCols, AlterTableTypes alterType) { op = alterType; oldName = name; this.newCols = new ArrayList(newCols); + this.partSpec = partSpec; } /** Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Mon Oct 6 04:00:39 2014 @@ -41,6 +41,7 @@ public abstract class BaseWork extends A // Their function is mainly as root ops to give the mapjoin the correct // schema info. List dummyOps; + int tag; public BaseWork() {} @@ -100,7 +101,7 @@ public abstract class BaseWork extends A // add all children opStack.addAll(opSet); - + while(!opStack.empty()) { Operator op = opStack.pop(); returnSet.add(op); @@ -139,4 +140,12 @@ public abstract class BaseWork extends A } public abstract void configureJobConf(JobConf job); + + public void setTag(int tag) { + this.tag = tag; + } + + public int getTag() { + return tag; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/DropTableDesc.java Mon Oct 6 04:00:39 2014 @@ -52,6 +52,7 @@ public class DropTableDesc extends DDLDe ArrayList partSpecs; boolean expectView; boolean ifExists; + boolean ifPurge; boolean ignoreProtection; public DropTableDesc() { @@ -59,12 +60,14 @@ public class DropTableDesc extends DDLDe /** * @param tableName + * @param ifPurge */ - public DropTableDesc(String tableName, boolean expectView, boolean ifExists) { + public DropTableDesc(String tableName, boolean expectView, boolean ifExists, boolean ifPurge) { this.tableName = tableName; this.partSpecs = null; this.expectView = expectView; this.ifExists = ifExists; + this.ifPurge = ifPurge; this.ignoreProtection = false; } @@ -149,4 +152,19 @@ public class DropTableDesc extends DDLDe public void setIfExists(boolean ifExists) { this.ifExists = ifExists; } + + /** + * @return whether Purge was specified + */ + public boolean getIfPurge() { + return ifPurge; + } + + /** + * @param ifPurge + * set whether Purge was specified + */ + public void setIfPurge(boolean ifPurge) { + this.ifPurge = ifPurge; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java Mon Oct 6 04:00:39 2014 @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.serde.serd import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -93,7 +94,7 @@ public class ExprNodeConstantDesc extend return "null"; } - if (typeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME)) { + if (typeInfo.getTypeName().equals(serdeConstants.STRING_TYPE_NAME) || typeInfo instanceof BaseCharTypeInfo) { return "'" + value.toString() + "'"; } else if (typeInfo.getTypeName().equals(serdeConstants.BINARY_TYPE_NAME)) { byte[] bytes = (byte[]) value; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java Mon Oct 6 04:00:39 2014 @@ -372,5 +372,42 @@ public class ExprNodeDescUtils { } catch (Exception e) { return null; } - } + } + + public static void getExprNodeColumnDesc(List exprDescList, + Map hashCodeTocolumnDescMap) { + for (ExprNodeDesc exprNodeDesc : exprDescList) { + getExprNodeColumnDesc(exprNodeDesc, hashCodeTocolumnDescMap); + } + } + + /** + * Get Map of ExprNodeColumnDesc HashCode to ExprNodeColumnDesc. + * + * @param exprDesc + * @param hashCodeTocolumnDescMap + * Assumption: If two ExprNodeColumnDesc have same hash code then + * they are logically referring to same projection + */ + public static void getExprNodeColumnDesc(ExprNodeDesc exprDesc, + Map hashCodeTocolumnDescMap) { + if (exprDesc instanceof ExprNodeColumnDesc) { + hashCodeTocolumnDescMap.put( + ((ExprNodeColumnDesc) exprDesc).hashCode(), + ((ExprNodeColumnDesc) exprDesc)); + } else if (exprDesc instanceof ExprNodeColumnListDesc) { + for (ExprNodeDesc child : ((ExprNodeColumnListDesc) exprDesc) + .getChildren()) { + getExprNodeColumnDesc(child, hashCodeTocolumnDescMap); + } + } else if (exprDesc instanceof ExprNodeGenericFuncDesc) { + for (ExprNodeDesc child : ((ExprNodeGenericFuncDesc) exprDesc) + .getChildren()) { + getExprNodeColumnDesc(child, hashCodeTocolumnDescMap); + } + } else if (exprDesc instanceof ExprNodeFieldDesc) { + getExprNodeColumnDesc(((ExprNodeFieldDesc) exprDesc).getDesc(), + hashCodeTocolumnDescMap); + } + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java Mon Oct 6 04:00:39 2014 @@ -69,6 +69,7 @@ public class MapJoinDesc extends JoinDes // Hash table memory usage allowed; used in case of non-staged mapjoin. private float hashtableMemoryUsage; + protected boolean genJoinKeys = true; public MapJoinDesc() { bigTableBucketNumMapping = new LinkedHashMap(); @@ -122,6 +123,7 @@ public class MapJoinDesc extends JoinDes } } + @Explain(displayName = "input vertices") public Map getParentToInput() { return parentToInput; } @@ -331,4 +333,16 @@ public class MapJoinDesc extends JoinDes public boolean getCustomBucketMapJoin() { return this.customBucketMapJoin; } + + public boolean isMapSideJoin() { + return true; + } + + public void setGenJoinKeys(boolean genJoinKeys) { + this.genJoinKeys = genJoinKeys; + } + + public boolean getGenJoinKeys() { + return genJoinKeys; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Mon Oct 6 04:00:39 2014 @@ -30,6 +30,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import com.google.common.collect.Interner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -96,6 +97,7 @@ public class MapWork extends BaseWork { private Long minSplitSize; private Long minSplitSizePerNode; private Long minSplitSizePerRack; + private final int tag = 0; //use sampled partitioning private int samplingType; @@ -126,6 +128,8 @@ public class MapWork extends BaseWork { private Map> eventSourcePartKeyExprMap = new LinkedHashMap>(); + private boolean doSplitsGrouping = true; + public MapWork() {} public MapWork(String name) { @@ -195,6 +199,22 @@ public class MapWork extends BaseWork { } } + public void internTable(Interner interner) { + if (aliasToPartnInfo != null) { + for (PartitionDesc part : aliasToPartnInfo.values()) { + if (part == null) { + continue; + } + part.intern(interner); + } + } + if (pathToPartitionInfo != null) { + for (PartitionDesc part : pathToPartitionInfo.values()) { + part.intern(interner); + } + } + } + /** * @return the aliasToPartnInfo */ @@ -567,4 +587,12 @@ public class MapWork extends BaseWork { public void setEventSourcePartKeyExprMap(Map> map) { this.eventSourcePartKeyExprMap = map; } + + public void setDoSplitsGrouping(boolean doSplitsGrouping) { + this.doSplitsGrouping = doSplitsGrouping; + } + + public boolean getDoSplitsGrouping() { + return this.doSplitsGrouping; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/OpTraits.java Mon Oct 6 04:00:39 2014 @@ -20,17 +20,16 @@ package org.apache.hadoop.hive.ql.plan; import java.util.List; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; - public class OpTraits { - + List> bucketColNames; + List> sortColNames; int numBuckets; - - public OpTraits(List> bucketColNames, int numBuckets) { + + public OpTraits(List> bucketColNames, int numBuckets, List> sortColNames) { this.bucketColNames = bucketColNames; this.numBuckets = numBuckets; + this.sortColNames = sortColNames; } public List> getBucketColNames() { @@ -42,10 +41,18 @@ public class OpTraits { } public void setBucketColNames(List> bucketColNames) { - this.bucketColNames = bucketColNames; + this.bucketColNames = bucketColNames; } public void setNumBuckets(int numBuckets) { this.numBuckets = numBuckets; } + + public void setSortColNames(List> sortColNames) { + this.sortColNames = sortColNames; + } + + public List> getSortCols() { + return sortColNames; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java Mon Oct 6 04:00:39 2014 @@ -48,12 +48,10 @@ import org.apache.hadoop.util.Reflection public class PartitionDesc implements Serializable, Cloneable { static { - TABLE_INTERNER = Interners.newWeakInterner(); STRING_INTERNER = Interners.newWeakInterner(); CLASS_INTERNER = Interners.newWeakInterner(); } - private static final Interner TABLE_INTERNER; private static final Interner STRING_INTERNER; private static final Interner> CLASS_INTERNER; @@ -73,12 +71,12 @@ public class PartitionDesc implements Se } public PartitionDesc(final TableDesc table, final LinkedHashMap partSpec) { - setTableDesc(table); + this.tableDesc = table; this.partSpec = partSpec; } public PartitionDesc(final Partition part) throws HiveException { - setTableDesc(Utilities.getTableDesc(part.getTable())); + this.tableDesc = Utilities.getTableDesc(part.getTable()); setProperties(part.getMetadataFromPartitionSchema()); partSpec = part.getSpec(); setInputFileFormatClass(part.getInputFormatClass()); @@ -86,7 +84,7 @@ public class PartitionDesc implements Se } public PartitionDesc(final Partition part,final TableDesc tblDesc) throws HiveException { - setTableDesc(tblDesc); + this.tableDesc = tblDesc; setProperties(part.getSchemaFromTableSchema(tblDesc.getProperties())); // each partition maintains a large properties partSpec = part.getSpec(); setOutputFileFormatClass(part.getInputFormatClass()); @@ -99,7 +97,7 @@ public class PartitionDesc implements Se } public void setTableDesc(TableDesc tableDesc) { - this.tableDesc = TABLE_INTERNER.intern(tableDesc); + this.tableDesc = tableDesc; } @Explain(displayName = "partition values") @@ -266,4 +264,8 @@ public class PartitionDesc implements Se baseFileName = path; } } + + public void intern(Interner interner) { + this.tableDesc = interner.intern(tableDesc); + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/TezWork.java Mon Oct 6 04:00:39 2014 @@ -19,12 +19,13 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -46,6 +47,22 @@ import org.apache.hadoop.mapred.JobConf; @Explain(displayName = "Tez") public class TezWork extends AbstractOperatorDesc { + public enum VertexType { + AUTO_INITIALIZED_EDGES, // no custom vertex or edge + INITIALIZED_EDGES, // custom vertex and custom edge but single MR Input + MULTI_INPUT_INITIALIZED_EDGES, // custom vertex, custom edge and multi MR Input + MULTI_INPUT_UNINITIALIZED_EDGES // custom vertex, no custom edge, multi MR Input + ; + + public static boolean isCustomInputType(VertexType vertex) { + if ((vertex == null) || (vertex == AUTO_INITIALIZED_EDGES)) { + return false; + } else { + return true; + } + } + } + private static transient final Log LOG = LogFactory.getLog(TezWork.class); private static int counter; @@ -56,6 +73,7 @@ public class TezWork extends AbstractOpe private final Map> invertedWorkGraph = new HashMap>(); private final Map, TezEdgeProperty> edgeProperties = new HashMap, TezEdgeProperty>(); + private final Map workVertexTypeMap = new HashMap(); public TezWork(String name) { this.name = name + ":" + (++counter); @@ -305,15 +323,23 @@ public class TezWork extends AbstractOpe work.configureJobConf(jobConf); } String[] newTmpJars = jobConf.getStrings(MR_JAR_PROPERTY); - if (oldTmpJars != null && (oldTmpJars.length != 0)) { - if (newTmpJars != null && (newTmpJars.length != 0)) { - String[] combinedTmpJars = new String[newTmpJars.length + oldTmpJars.length]; - System.arraycopy(oldTmpJars, 0, combinedTmpJars, 0, oldTmpJars.length); - System.arraycopy(newTmpJars, 0, combinedTmpJars, oldTmpJars.length, newTmpJars.length); - jobConf.setStrings(MR_JAR_PROPERTY, combinedTmpJars); + if (oldTmpJars != null || newTmpJars != null) { + String[] finalTmpJars; + if (oldTmpJars == null || oldTmpJars.length == 0) { + // Avoid a copy when oldTmpJars is null or empty + finalTmpJars = newTmpJars; + } else if (newTmpJars == null || newTmpJars.length == 0) { + // Avoid a copy when newTmpJars is null or empty + finalTmpJars = oldTmpJars; } else { - jobConf.setStrings(MR_JAR_PROPERTY, oldTmpJars); + // Both are non-empty, only copy now + finalTmpJars = new String[oldTmpJars.length + newTmpJars.length]; + System.arraycopy(oldTmpJars, 0, finalTmpJars, 0, oldTmpJars.length); + System.arraycopy(newTmpJars, 0, finalTmpJars, oldTmpJars.length, newTmpJars.length); } + + jobConf.setStrings(MR_JAR_PROPERTY, finalTmpJars); + return finalTmpJars; } return newTmpJars; } @@ -332,4 +358,40 @@ public class TezWork extends AbstractOpe ImmutablePair workPair = new ImmutablePair(a, b); edgeProperties.put(workPair, edgeProp); } + + public void setVertexType(BaseWork w, VertexType incomingVertexType) { + VertexType vertexType = workVertexTypeMap.get(w); + if (vertexType == null) { + vertexType = VertexType.AUTO_INITIALIZED_EDGES; + } + switch (vertexType) { + case INITIALIZED_EDGES: + if (incomingVertexType == VertexType.MULTI_INPUT_UNINITIALIZED_EDGES) { + vertexType = VertexType.MULTI_INPUT_INITIALIZED_EDGES; + } + break; + + case MULTI_INPUT_INITIALIZED_EDGES: + // nothing to do + break; + + case MULTI_INPUT_UNINITIALIZED_EDGES: + if (incomingVertexType == VertexType.INITIALIZED_EDGES) { + vertexType = VertexType.MULTI_INPUT_INITIALIZED_EDGES; + } + break; + + case AUTO_INITIALIZED_EDGES: + vertexType = incomingVertexType; + break; + + default: + break; + } + workVertexTypeMap.put(w, vertexType); + } + + public VertexType getVertexType(BaseWork w) { + return workVertexTypeMap.get(w); + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationPreEventListener.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationPreEventListener.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationPreEventListener.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/AuthorizationPreEventListener.java Mon Oct 6 04:00:39 2014 @@ -23,7 +23,9 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.classification.InterfaceAudience.Private; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.MetaStorePreEventListener; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.TableType; @@ -40,6 +42,8 @@ import org.apache.hadoop.hive.metastore. import org.apache.hadoop.hive.metastore.events.PreDropPartitionEvent; import org.apache.hadoop.hive.metastore.events.PreDropTableEvent; import org.apache.hadoop.hive.metastore.events.PreEventContext; +import org.apache.hadoop.hive.metastore.events.PreReadDatabaseEvent; +import org.apache.hadoop.hive.metastore.events.PreReadTableEvent; import org.apache.hadoop.hive.ql.metadata.AuthorizationException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; @@ -54,6 +58,7 @@ import org.apache.hadoop.hive.ql.securit * metastore PreEventContexts, such as the adding/dropping and altering * of databases, tables and partitions. */ +@Private public class AuthorizationPreEventListener extends MetaStorePreEventListener { public static final Log LOG = LogFactory.getLog( @@ -136,6 +141,12 @@ public class AuthorizationPreEventListen case ALTER_TABLE: authorizeAlterTable((PreAlterTableEvent)context); break; + case READ_TABLE: + authorizeReadTable((PreReadTableEvent)context); + break; + case READ_DATABASE: + authorizeReadDatabase((PreReadDatabaseEvent)context); + break; case ADD_PARTITION: authorizeAddPartition((PreAddPartitionEvent)context); break; @@ -162,6 +173,44 @@ public class AuthorizationPreEventListen } + private void authorizeReadTable(PreReadTableEvent context) throws InvalidOperationException, + MetaException { + if (!isReadAuthzEnabled()) { + return; + } + try { + org.apache.hadoop.hive.ql.metadata.Table wrappedTable = new TableWrapper(context.getTable()); + for (HiveMetastoreAuthorizationProvider authorizer : tAuthorizers.get()) { + authorizer.authorize(wrappedTable, new Privilege[] { Privilege.SELECT }, null); + } + } catch (AuthorizationException e) { + throw invalidOperationException(e); + } catch (HiveException e) { + throw metaException(e); + } + } + + private void authorizeReadDatabase(PreReadDatabaseEvent context) + throws InvalidOperationException, MetaException { + if (!isReadAuthzEnabled()) { + return; + } + try { + for (HiveMetastoreAuthorizationProvider authorizer : tAuthorizers.get()) { + authorizer.authorize(new Database(context.getDatabase()), + new Privilege[] { Privilege.SELECT }, null); + } + } catch (AuthorizationException e) { + throw invalidOperationException(e); + } catch (HiveException e) { + throw metaException(e); + } + } + + private boolean isReadAuthzEnabled() { + return tConfig.get().getBoolean(ConfVars.HIVE_METASTORE_AUTHORIZATION_AUTH_READS.varname, true); + } + private void authorizeAuthorizationAPICall() throws InvalidOperationException, MetaException { for (HiveMetastoreAuthorizationProvider authorizer : tAuthorizers.get()) { try { @@ -358,7 +407,7 @@ public class AuthorizationPreEventListen public PartitionWrapper(org.apache.hadoop.hive.metastore.api.Partition mapiPart, PreEventContext context) throws HiveException, NoSuchObjectException, MetaException { org.apache.hadoop.hive.metastore.api.Partition wrapperApiPart = mapiPart.deepCopy(); - org.apache.hadoop.hive.metastore.api.Table t = context.getHandler().get_table( + org.apache.hadoop.hive.metastore.api.Table t = context.getHandler().get_table_core( mapiPart.getDbName(), mapiPart.getTableName()); if (wrapperApiPart.getSd() == null){ // In the cases of create partition, by the time this event fires, the partition Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/HiveAuthorizationProviderBase.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/HiveAuthorizationProviderBase.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/HiveAuthorizationProviderBase.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/HiveAuthorizationProviderBase.java Mon Oct 6 04:00:39 2014 @@ -85,7 +85,7 @@ public abstract class HiveAuthorizationP return hiveClient.getDatabase(dbName); } else { try { - return handler.get_database(dbName); + return handler.get_database_core(dbName); } catch (NoSuchObjectException e) { throw new HiveException(e); } catch (MetaException e) { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java Mon Oct 6 04:00:39 2014 @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.metadat * an error while performing authorization, and not a authorization being * denied. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public class HiveAccessControlException extends HiveException{ Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessController.java Mon Oct 6 04:00:39 2014 @@ -19,8 +19,7 @@ package org.apache.hadoop.hive.ql.securi import java.util.List; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; -import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.hive.conf.HiveConf; /** @@ -28,8 +27,7 @@ import org.apache.hadoop.hive.conf.HiveC * create/drop roles, and commands to read the state of authorization rules. * Methods here have corresponding methods in HiveAuthorizer, check method documentation there. */ -@LimitedPrivate(value = { "" }) -@Evolving +@Private public interface HiveAccessController { void grantPrivileges(List hivePrincipals, List hivePrivileges, Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java Mon Oct 6 04:00:39 2014 @@ -19,15 +19,13 @@ package org.apache.hadoop.hive.ql.securi import java.util.List; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; -import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving; +import org.apache.hadoop.classification.InterfaceAudience.Private; /** * Interface used to check if user has privileges to perform certain action. * Methods here have corresponding methods in HiveAuthorizer, check method documentation there. */ -@LimitedPrivate(value = { "" }) -@Evolving +@Private public interface HiveAuthorizationValidator { /** Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java Mon Oct 6 04:00:39 2014 @@ -34,7 +34,7 @@ import org.apache.hadoop.hive.ql.securit * statements and does not make assumptions about the privileges needed for a hive operation. * This is referred to as V2 authorizer in other parts of the code. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public interface HiveAuthorizer { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerFactory.java Mon Oct 6 04:00:39 2014 @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.securit * create {@link HiveAuthorizer} instance used for hive authorization. * */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public interface HiveAuthorizerFactory { /** Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzContext.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzContext.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzContext.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzContext.java Mon Oct 6 04:00:39 2014 @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.common.cla * auditing and/or authorization. * It is an immutable class. Builder inner class is used instantiate it. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public final class HiveAuthzContext { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzPluginException.java Mon Oct 6 04:00:39 2014 @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.metadat * an error while performing authorization, and not a authorization being * denied. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public class HiveAuthzPluginException extends HiveException{ Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzSessionContext.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzSessionContext.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzSessionContext.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthzSessionContext.java Mon Oct 6 04:00:39 2014 @@ -24,7 +24,7 @@ import org.apache.hadoop.hive.common.cla * Provides session context information. * It is an immutable class. Builder inner class is used instantiate it. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public final class HiveAuthzSessionContext { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveMetastoreClientFactory.java Mon Oct 6 04:00:39 2014 @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.metastore. * But Hive class is not a public interface, so this factory helps in hiding Hive * class from the authorization interface users. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public interface HiveMetastoreClientFactory { IMetaStoreClient getHiveMetastoreClient() throws HiveAuthzPluginException; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java Mon Oct 6 04:00:39 2014 @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.common.cla /** * List of hive operations types. */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public enum HiveOperationType { EXPLAIN, Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrincipal.java Mon Oct 6 04:00:39 2014 @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.common.cla /** * Represents the user or role in grant/revoke statements */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public class HivePrincipal implements Comparable { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilege.java Mon Oct 6 04:00:39 2014 @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.ql.securit /** * Represents the hive privilege being granted/revoked */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public class HivePrivilege implements Comparable { @Override @@ -97,6 +97,7 @@ public class HivePrivilege implements Co return supportedScope != null && supportedScope.contains(scope.name()); } + @Override public int compareTo(HivePrivilege privilege) { int compare = columns != null ? (privilege.columns != null ? compare(columns, privilege.columns) : 1) : Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeInfo.java Mon Oct 6 04:00:39 2014 @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.common.cla /** * Represents a privilege granted for an object to a principal */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public class HivePrivilegeInfo{ private final HivePrincipal principal; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HivePrivilegeObject.java Mon Oct 6 04:00:39 2014 @@ -22,14 +22,19 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; +import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; -import org.apache.hadoop.hive.common.classification.InterfaceStability.Unstable; /** - * Represents the object on which privilege is being granted/revoked + * Represents the object on which privilege is being granted/revoked, and objects + * being used in queries. + * + * Check the get* function documentation for information on what value it returns based on + * the {@link HivePrivilegeObjectType}. + * */ -@LimitedPrivate(value = { "" }) -@Unstable +@LimitedPrivate(value = { "Apache Argus (incubating)" }) +@Evolving public class HivePrivilegeObject implements Comparable { @Override @@ -77,9 +82,20 @@ public class HivePrivilegeObject impleme return o1.size() > o2.size() ? 1 : (o1.size() < o2.size() ? -1 : 0); } + /** + * Note that GLOBAL, PARTITION, COLUMN fields are populated only for Hive's old default + * authorization mode. + * When the authorization manager is an instance of HiveAuthorizerFactory, these types are not + * used. + */ public enum HivePrivilegeObjectType { GLOBAL, DATABASE, TABLE_OR_VIEW, PARTITION, COLUMN, LOCAL_URI, DFS_URI, COMMAND_PARAMS, FUNCTION - } ; + }; + + /** + * When {@link HiveOperationType} is QUERY, this action type is set so that it is possible + * to determine if the action type on this object is an INSERT or INSERT_OVERWRITE + */ public enum HivePrivObjectActionType { OTHER, INSERT, INSERT_OVERWRITE, UPDATE, DELETE }; @@ -139,6 +155,9 @@ public class HivePrivilegeObject impleme return type; } + /** + * @return the db name if type is DATABASE, TABLE, or FUNCTION + */ public String getDbname() { return dbname; } @@ -150,6 +169,10 @@ public class HivePrivilegeObject impleme return objectName; } + /** + * See javadoc of {@link HivePrivObjectActionType} + * @return action type + */ public HivePrivObjectActionType getActionType() { return actionType; } @@ -158,12 +181,15 @@ public class HivePrivilegeObject impleme return commandParams; } + /** + * @return partiton key information. Used only for old default authorization mode. + */ public List getPartKeys() { return partKeys; } /** - * Applicable columns in this object + * Applicable columns in this object, when the type is {@link HivePrivilegeObjectType.TABLE} * In case of DML read operations, this is the set of columns being used. * Column information is not set for DDL operations and for tables being written into * @return list of applicable columns Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRoleGrant.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRoleGrant.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRoleGrant.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveRoleGrant.java Mon Oct 6 04:00:39 2014 @@ -26,7 +26,7 @@ import com.google.common.collect.Compari /** * Represents a grant of a role to a principal */ -@LimitedPrivate(value = { "" }) +@LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving public class HiveRoleGrant implements Comparable { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Mon Oct 6 04:00:39 2014 @@ -1249,7 +1249,7 @@ public class SessionState { try { if (tezSessionState != null) { - TezSessionPoolManager.getInstance().close(tezSessionState); + TezSessionPoolManager.getInstance().close(tezSessionState, false); } } catch (Exception e) { LOG.info("Error closing tez session", e); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Mon Oct 6 04:00:39 2014 @@ -18,8 +18,15 @@ package org.apache.hadoop.hive.ql.stats; -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; @@ -80,19 +87,14 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.io.BytesWritable; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import com.google.common.base.Joiner; +import com.google.common.collect.Lists; public class StatsUtils { private static final Log LOG = LogFactory.getLog(StatsUtils.class.getName()); + /** * Collect table, partition and column level statistics * @param conf @@ -109,15 +111,34 @@ public class StatsUtils { public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, Table table, TableScanOperator tableScanOperator) throws HiveException { - Statistics stats = new Statistics(); - // column level statistics are required only for the columns that are needed List schema = tableScanOperator.getSchema().getSignature(); List neededColumns = tableScanOperator.getNeededColumns(); + List referencedColumns = tableScanOperator.getReferencedColumns(); + + return collectStatistics(conf, partList, table, schema, neededColumns, referencedColumns); + } + + private static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, List schema, List neededColumns, + List referencedColumns) throws HiveException { + boolean fetchColStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_COLUMN_STATS); boolean fetchPartStats = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_STATS_FETCH_PARTITION_STATS); + + return collectStatistics(conf, partList, table, schema, neededColumns, referencedColumns, + fetchColStats, fetchPartStats); + } + + public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList, + Table table, List schema, List neededColumns, + List referencedColumns, boolean fetchColStats, boolean fetchPartStats) + throws HiveException { + + Statistics stats = new Statistics(); + float deserFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_DESERIALIZATION_FACTOR); @@ -207,7 +228,6 @@ public class StatsUtils { stats.getBasicStatsState().equals(State.COMPLETE)) { stats.setBasicStatsState(State.PARTIAL); } - boolean haveFullStats = fetchColStats; if (fetchColStats) { List partNames = new ArrayList(partList.getNotDeniedPartns().size()); for (Partition part : partList.getNotDeniedPartns()) { @@ -215,37 +235,84 @@ public class StatsUtils { } Map colToTabAlias = new HashMap(); neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias); - AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), neededColumns, partNames); + AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(), + neededColumns, partNames); if (null == aggrStats) { - haveFullStats = false; + // There are some partitions with no state (or we didn't fetch any state). + // Update the stats with empty list to reflect that in the + // state/initialize structures. + List emptyStats = Lists.newArrayList(); + + // add partition column stats + addParitionColumnStats(neededColumns, referencedColumns, schema, table, partList, + emptyStats); + + stats.addToColumnStats(emptyStats); + stats.updateColumnStatsState(deriveStatType(emptyStats, referencedColumns)); } else { List colStats = aggrStats.getColStats(); if (colStats.size() != neededColumns.size()) { - LOG.debug("Column stats requested for : " + neededColumns.size() + " columns. Able to retrieve" - + " for " + colStats.size() + " columns"); + LOG.debug("Column stats requested for : " + neededColumns.size() + " columns. Able to" + + " retrieve for " + colStats.size() + " columns"); } - List columnStats = convertColStats(colStats, table.getTableName(), colToTabAlias); + List columnStats = convertColStats(colStats, table.getTableName(), + colToTabAlias); + + addParitionColumnStats(neededColumns, referencedColumns, schema, table, partList, + columnStats); + stats.addToColumnStats(columnStats); - State colState = deriveStatType(columnStats, neededColumns); + State colState = deriveStatType(columnStats, referencedColumns); if (aggrStats.getPartsFound() != partNames.size() && colState != State.NONE) { - LOG.debug("Column stats requested for : " + partNames.size() +" partitions. " - + "Able to retrieve for " + aggrStats.getPartsFound() + " partitions"); + LOG.debug("Column stats requested for : " + partNames.size() + " partitions. " + + "Able to retrieve for " + aggrStats.getPartsFound() + " partitions"); colState = State.PARTIAL; } stats.setColumnStatsState(colState); } } - // There are some partitions with no state (or we didn't fetch any state). - // Update the stats with empty list to reflect that in the state/initialize structures. - if (!haveFullStats) { - List emptyStats = Lists.newArrayList(); - stats.addToColumnStats(emptyStats); - stats.updateColumnStatsState(deriveStatType(emptyStats, neededColumns)); - } } return stats; } + private static void addParitionColumnStats(List neededColumns, + List referencedColumns, List schema, Table table, + PrunedPartitionList partList, List colStats) + throws HiveException { + + // extra columns is difference between referenced columns vs needed + // columns. The difference could be partition columns. + List extraCols = Lists.newArrayList(referencedColumns); + if (referencedColumns.size() > neededColumns.size()) { + extraCols.removeAll(neededColumns); + for (String col : extraCols) { + for (ColumnInfo ci : schema) { + // conditions for being partition column + if (col.equals(ci.getInternalName()) && ci.getIsVirtualCol() && + !ci.isHiddenVirtualCol()) { + // currently metastore does not store column stats for + // partition column, so we calculate the NDV from pruned + // partition list + ColStatistics partCS = new ColStatistics(table.getTableName(), + ci.getInternalName(), ci.getType().getTypeName()); + long numPartitions = getNDVPartitionColumn(partList.getPartitions(), + ci.getInternalName()); + partCS.setCountDistint(numPartitions); + colStats.add(partCS); + } + } + } + } + } + + public static int getNDVPartitionColumn(Set partitions, String partColName) { + Set distinctVals = new HashSet(partitions.size()); + for (Partition partition : partitions) { + distinctVals.add(partition.getSpec().get(partColName)); + } + return distinctVals.size(); + } + private static void setUnknownRcDsToAverage( List rowCounts, List dataSizes, int avgRowSize) { if (LOG.isDebugEnabled()) { @@ -751,7 +818,8 @@ public class StatsUtils { || colType.equalsIgnoreCase(serdeConstants.FLOAT_TYPE_NAME)) { return JavaDataModel.get().primitive1(); } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME) - || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + || colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME) + || colType.equalsIgnoreCase("long")) { return JavaDataModel.get().primitive2(); } else if (colType.equalsIgnoreCase(serdeConstants.TIMESTAMP_TYPE_NAME)) { return JavaDataModel.get().lengthOfTimestamp(); @@ -780,7 +848,8 @@ public class StatsUtils { return JavaDataModel.get().lengthForIntArrayOfSize(length); } else if (colType.equalsIgnoreCase(serdeConstants.DOUBLE_TYPE_NAME)) { return JavaDataModel.get().lengthForDoubleArrayOfSize(length); - } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME)) { + } else if (colType.equalsIgnoreCase(serdeConstants.BIGINT_TYPE_NAME) + || colType.equalsIgnoreCase("long")) { return JavaDataModel.get().lengthForLongArrayOfSize(length); } else if (colType.equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME)) { return JavaDataModel.get().lengthForByteArrayOfSize(length); @@ -876,7 +945,7 @@ public class StatsUtils { Statistics parentStats, Map colExprMap, RowSchema rowSchema) { List cs = Lists.newArrayList(); - if (colExprMap != null) { + if (colExprMap != null && rowSchema != null) { for (ColumnInfo ci : rowSchema.getSignature()) { String outColName = ci.getInternalName(); outColName = StatsUtils.stripPrefixFromColumnName(outColName); @@ -1042,10 +1111,8 @@ public class StatsUtils { /** * Get basic stats of table - * @param dbName - * - database name - * @param tabName - * - table name + * @param table + * - table * @param statType * - type of stats * @return value of stats Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java Mon Oct 6 04:00:39 2014 @@ -76,7 +76,7 @@ public class Initiator extends Compactor // don't doom the entire thread. try { ShowCompactResponse currentCompactions = txnHandler.showCompact(new ShowCompactRequest()); - ValidTxnList txns = TxnHandler.createValidTxnList(txnHandler.getOpenTxns()); + ValidTxnList txns = TxnHandler.createValidTxnList(txnHandler.getOpenTxns(), 0); Set potentials = txnHandler.findPotentialCompactions(abortedThreshold); LOG.debug("Found " + potentials.size() + " potential compactions, " + "checking to see if we should compact any of them"); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java Mon Oct 6 04:00:39 2014 @@ -120,7 +120,7 @@ public class Worker extends CompactorThr final boolean isMajor = ci.isMajorCompaction(); final ValidTxnList txns = - TxnHandler.createValidTxnList(txnHandler.getOpenTxns()); + TxnHandler.createValidTxnList(txnHandler.getOpenTxns(), 0); final StringBuffer jobName = new StringBuffer(name); jobName.append("-compactor-"); jobName.append(ci.getFullPartitionName()); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java?rev=1629563&r1=1629562&r2=1629563&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java Mon Oct 6 04:00:39 2014 @@ -52,29 +52,6 @@ public class UDFLog extends UDFMath { } /** - * Get the logarithm of the given decimal with the given base. - */ - public DoubleWritable evaluate(DoubleWritable base, HiveDecimalWritable writable) { - if (base == null || writable == null) { - return null; - } - double d = writable.getHiveDecimal().bigDecimalValue().doubleValue(); - return log(base.get(), d); - } - - /** - * Get the logarithm of input with the given decimal as the base. - */ - public DoubleWritable evaluate(HiveDecimalWritable base, DoubleWritable d) { - if (base == null || d == null) { - return null; - } - - double b = base.getHiveDecimal().bigDecimalValue().doubleValue(); - return log(b, d.get()); - } - - /** * Get the logarithm of the given decimal input with the given decimal base. */ public DoubleWritable evaluate(HiveDecimalWritable baseWritable, HiveDecimalWritable writable) {