Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1EE811034B for ; Fri, 27 Feb 2015 19:54:51 +0000 (UTC) Received: (qmail 75720 invoked by uid 500); 27 Feb 2015 19:54:51 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 75547 invoked by uid 500); 27 Feb 2015 19:54:50 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 75482 invoked by uid 99); 27 Feb 2015 19:54:50 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 27 Feb 2015 19:54:50 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id 9DEF2AC0238 for ; Fri, 27 Feb 2015 19:54:50 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1662806 [2/3] - in /hive/branches/spark: ./ common/src/java/org/apache/hadoop/hive/conf/ data/files/ hbase-handler/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/ hcatalog/src/test/e2e/templeton/drivers/ hcatalog/src/test/e2e/templet... Date: Fri, 27 Feb 2015 19:54:48 -0000 To: commits@hive.apache.org From: xuefu@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150227195450.9DEF2AC0238@hades.apache.org> Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java Fri Feb 27 19:54:46 2015 @@ -153,7 +153,9 @@ public class TableScanOperator extends O values.add(o == null ? defaultPartitionName : o.toString()); } partitionSpecs = FileUtils.makePartName(conf.getPartColumns(), values); - LOG.info("Stats Gathering found a new partition spec = " + partitionSpecs); + if (isLogInfoEnabled) { + LOG.info("Stats Gathering found a new partition spec = " + partitionSpecs); + } } // find which column contains the raw data size (both partitioned and non partitioned int uSizeColumn = -1; @@ -279,7 +281,9 @@ public class TableScanOperator extends O StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc); if (!statsPublisher.connect(jc)) { // just return, stats gathering should not block the main query. - LOG.info("StatsPublishing error: cannot connect to database."); + if (isLogInfoEnabled) { + LOG.info("StatsPublishing error: cannot connect to database."); + } if (isStatsReliable) { throw new HiveException(ErrorMsg.STATSPUBLISHER_CONNECTION_ERROR.getErrorCodedMsg()); } @@ -307,7 +311,9 @@ public class TableScanOperator extends O throw new HiveException(ErrorMsg.STATSPUBLISHER_PUBLISHING_ERROR.getErrorCodedMsg()); } } - LOG.info("publishing : " + key + " : " + statsToPublish.toString()); + if (isLogInfoEnabled) { + LOG.info("publishing : " + key + " : " + statsToPublish.toString()); + } } if (!statsPublisher.closeConnection()) { if (isStatsReliable) { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java Fri Feb 27 19:54:46 2015 @@ -111,7 +111,7 @@ public class UnionOperator extends Opera // to // create ObjectInspectors. needsTransform[p] = (inputObjInspectors[p] != outputObjInspector); - if (needsTransform[p]) { + if (isLogInfoEnabled && needsTransform[p]) { LOG.info("Union Operator needs to transform row from parent[" + p + "] from " + inputObjInspectors[p] + " to " + outputObjInspector); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java Fri Feb 27 19:54:46 2015 @@ -666,7 +666,7 @@ public class ExecDriver extends Task metadata = parent.getMetadata(); //Current Hive parquet timestamp implementation stores it in UTC, but other components do not do that. //If this file written by current Hive implementation itself, we need to do the reverse conversion, else skip the conversion. - boolean skipConversion = false; - if (Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname))) { - skipConversion = !Strings.nullToEmpty(metadata.get("createdBy")).startsWith("parquet-mr"); - } + boolean skipConversion = Boolean.valueOf(metadata.get(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname)); Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipConversion); return new TimestampWritable(ts); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Fri Feb 27 19:54:46 2015 @@ -19,6 +19,7 @@ import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; @@ -30,7 +31,6 @@ import parquet.column.ColumnDescriptor; import parquet.hadoop.api.ReadSupport; import parquet.io.api.RecordMaterializer; import parquet.schema.MessageType; -import parquet.schema.MessageTypeParser; import parquet.schema.PrimitiveType; import parquet.schema.PrimitiveType.PrimitiveTypeName; import parquet.schema.Type; @@ -153,6 +153,11 @@ public class DataWritableReadSupport ext throw new IllegalStateException("ReadContext not initialized properly. " + "Don't know the Hive Schema."); } + String key = HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname; + if (!metadata.containsKey(key)) { + metadata.put(key, String.valueOf(HiveConf.getBoolVar( + configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); + } return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata); } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java Fri Feb 27 19:54:46 2015 @@ -16,10 +16,10 @@ package org.apache.hadoop.hive.ql.io.par import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -49,6 +49,8 @@ import parquet.hadoop.metadata.ParquetMe import parquet.hadoop.util.ContextUtil; import parquet.schema.MessageTypeParser; +import com.google.common.base.Strings; + public class ParquetRecordReaderWrapper implements RecordReader { public static final Log LOG = LogFactory.getLog(ParquetRecordReaderWrapper.class); @@ -61,6 +63,7 @@ public class ParquetRecordReaderWrapper private boolean firstRecord = false; private boolean eof = false; private int schemaSize; + private boolean skipTimestampConversion = false; private final ProjectionPusher projectionPusher; @@ -93,7 +96,14 @@ public class ParquetRecordReaderWrapper setFilter(oldJobConf); // create a TaskInputOutputContext - final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(oldJobConf, taskAttemptID); + Configuration conf = oldJobConf; + if (skipTimestampConversion ^ HiveConf.getBoolVar( + conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { + conf = new JobConf(oldJobConf); + HiveConf.setBoolVar(conf, + HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION, skipTimestampConversion); + } + final TaskAttemptContext taskContext = ContextUtil.newTaskAttemptContext(conf, taskAttemptID); if (split != null) { try { @@ -218,6 +228,7 @@ public class ParquetRecordReaderWrapper * @return a ParquetInputSplit corresponding to the oldSplit * @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file */ + @SuppressWarnings("deprecation") protected ParquetInputSplit getSplit( final InputSplit oldSplit, final JobConf conf @@ -248,7 +259,9 @@ public class ParquetRecordReaderWrapper LOG.warn("Skipping split, could not find row group in: " + (FileSplit) oldSplit); split = null; } else { - populateReadMetadata(readContext.getReadSupportMetadata(), fileMetaData, conf); + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { + skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); + } split = new ParquetInputSplit(finalPath, splitStart, splitLength, @@ -264,16 +277,4 @@ public class ParquetRecordReaderWrapper } return split; } - - /** - * Method populates the read metadata, using filemetadata and Hive configuration. - * @param metadata read metadata to populate - * @param fileMetaData parquet file metadata - * @param conf hive configuration - */ - private void populateReadMetadata(Map metadata, FileMetaData fileMetaData, JobConf conf) { - metadata.put("createdBy", fileMetaData.getCreatedBy()); - metadata.put(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname, - String.valueOf(HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); - } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Fri Feb 27 19:54:46 2015 @@ -74,7 +74,6 @@ import org.apache.hadoop.hive.ql.plan.pt import org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails; import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; -import org.apache.hadoop.hive.ql.udf.ptf.Noop; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -265,16 +264,19 @@ public final class ColumnPrunerProcFacto //Since we cannot know what columns will be needed by a PTF chain, //we do not prune columns on PTFOperator for PTF chains. PartitionedTableFunctionDef funcDef = conf.getFuncDef(); - if (!conf.forWindowing() && !Noop.class.isInstance(funcDef.getTFunction())) { + List referencedColumns = funcDef.getReferencedColumns(); + if (!conf.forWindowing() && !conf.forNoop() && referencedColumns == null) { return super.process(nd, stack, cppCtx, nodeOutputs); } - - //we create a copy of prunedCols to create a list of pruned columns for PTFOperator - List prunedCols = - new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); - if (funcDef instanceof WindowTableFunctionDef) { + + List prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); + if (conf.forWindowing()) { WindowTableFunctionDef def = (WindowTableFunctionDef) funcDef; prunedCols = Utilities.mergeUniqElems(getWindowFunctionColumns(def), prunedCols); + } else if (conf.forNoop()) { + prunedCols = new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); + } else { + prunedCols = referencedColumns; } List newRS = prunedColumnsList(prunedCols, op.getSchema(), funcDef); @@ -521,6 +523,9 @@ public final class ColumnPrunerProcFacto for (ExprNodeDesc key : keys) { colLists = Utilities.mergeUniqElems(colLists, key.getCols()); } + for (ExprNodeDesc key : conf.getPartitionCols()) { + colLists = Utilities.mergeUniqElems(colLists, key.getCols()); + } assert op.getNumChild() == 1; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GlobalLimitOptimizer.java Fri Feb 27 19:54:46 2015 @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.exec.Op import org.apache.hadoop.hive.ql.exec.OperatorUtils; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.parse.GlobalLimitCtx; @@ -99,28 +98,18 @@ public class GlobalLimitOptimizer implem // query qualify for the optimization if (tempGlobalLimit != null && tempGlobalLimit != 0) { Table tab = ts.getConf().getTableMetadata(); + Set filterOps = OperatorUtils.findOperators(ts, FilterOperator.class); if (!tab.isPartitioned()) { - Set filterOps = - OperatorUtils.findOperators(ts, FilterOperator.class); if (filterOps.size() == 0) { globalLimitCtx.enableOpt(tempGlobalLimit); } } else { // check if the pruner only contains partition columns - if (PartitionPruner.onlyContainsPartnCols(tab, - opToPartPruner.get(ts))) { + if (onlyContainsPartnCols(tab, filterOps)) { - PrunedPartitionList partsList; - try { - String alias = (String) topOps.keySet().toArray()[0]; - partsList = PartitionPruner.prune(ts, pctx, alias); - } catch (HiveException e) { - // Has to use full name to make sure it does not conflict with - // org.apache.commons.lang.StringUtils - LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); - throw new SemanticException(e.getMessage(), e); - } + String alias = (String) topOps.keySet().toArray()[0]; + PrunedPartitionList partsList = pctx.getPrunedPartitions(alias, ts); // If there is any unknown partition, create a map-reduce job for // the filter to prune correctly @@ -138,6 +127,15 @@ public class GlobalLimitOptimizer implem return pctx; } + private boolean onlyContainsPartnCols(Table table, Set filters) { + for (FilterOperator filter : filters) { + if (!PartitionPruner.onlyContainsPartnCols(table, filter.getConf().getPredicate())) { + return false; + } + } + return true; + } + /** * Check the limit number in all sub queries * Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Fri Feb 27 19:54:46 2015 @@ -75,6 +75,8 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import com.google.common.annotations.VisibleForTesting; + /** * BaseSemanticAnalyzer. * @@ -1245,7 +1247,36 @@ public abstract class BaseSemanticAnalyz inputOI.getTypeName(), outputOI.getTypeName()); } + normalizeColSpec(partSpec, astKeyName, colType, colSpec, convertedValue); + } + } + + @VisibleForTesting + static void normalizeColSpec(Map partSpec, String colName, + String colType, String originalColSpec, Object colValue) throws SemanticException { + if (colValue == null) return; // nothing to do with nulls + String normalizedColSpec = originalColSpec; + if (colType.equals(serdeConstants.DATE_TYPE_NAME)) { + normalizedColSpec = normalizeDateCol(colValue, originalColSpec); + } + if (!normalizedColSpec.equals(originalColSpec)) { + STATIC_LOG.warn("Normalizing partition spec - " + colName + " from " + + originalColSpec + " to " + normalizedColSpec); + partSpec.put(colName, normalizedColSpec); + } + } + + private static String normalizeDateCol( + Object colValue, String originalColSpec) throws SemanticException { + Date value; + if (colValue instanceof DateWritable) { + value = ((DateWritable) colValue).get(); + } else if (colValue instanceof Date) { + value = (Date) colValue; + } else { + throw new SemanticException("Unexpected date type " + colValue.getClass()); } + return HiveMetaStore.PARTITION_DATE_FORMAT.get().format(value); } protected WriteEntity toWriteEntity(String location) throws SemanticException { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Fri Feb 27 19:54:46 2015 @@ -342,6 +342,7 @@ public class PTFTranslator { outColNames, outRR); def.setOutputShape(outputShape); + def.setReferencedColumns(tFn.getReferencedColumns()); return def; } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java Fri Feb 27 19:54:46 2015 @@ -494,9 +494,6 @@ public class QBSubQuery implements ISubQ public ASTNode getSubQueryAST() { return subQueryAST; } - public ASTNode getOuterQueryExpression() { - return parentQueryExpression; - } public SubQueryTypeDef getOperator() { return operator; } @@ -526,15 +523,8 @@ public class QBSubQuery implements ISubQ /* * Restriction.16.s :: Correlated Expression in Outer Query must not contain * unqualified column references. + * disabled : if it's obvious, we allow unqualified refs */ - if ( parentQueryExpression != null && !forHavingClause ) { - ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(parentQueryExpression); - if ( u != null ) { - subQueryAST.setOrigin(originalSQASTOrigin); - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - u, "Correlating expression cannot contain unqualified column references.")); - } - } /* * Restriction 17.s :: SubQuery cannot use the same table alias as one used in @@ -664,12 +654,30 @@ public class QBSubQuery implements ISubQ try { outerQueryCol = outerQueryRR.getExpression(parentQueryExpression); } catch(SemanticException se) { + // ignore } + ASTNode parentExpr = parentQueryExpression; + if (!forHavingClause) { + Set aliases = outerQueryRR.getRslvMap().keySet(); + if (notInCheck != null) { + aliases.remove(notInCheck.getAlias()); + } + String tableAlias = aliases.size() == 1 ? aliases.iterator().next() : null; + parentExpr = + SubQueryUtils.setQualifiedColumnReferences(parentExpr, tableAlias); + if (parentExpr == null) { + subQueryAST.setOrigin(originalSQASTOrigin); + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + parentQueryExpression, + "Correlating expression contains ambiguous column references.")); + } + } + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( - getOuterQueryExpression(), - alias, - sqRR); + parentExpr, + alias, + sqRR); if ( outerQueryCol != null ) { rewriteCorrConjunctForHaving(parentQueryJoinCond, true, Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Feb 27 19:54:46 2015 @@ -1357,25 +1357,30 @@ public class SemanticAnalyzer extends Ba partition.put(partitionName, partitionVal); } // if it is a dynamic partition throw the exception - if (childCount == partition.size()) { - try { - Table table = db.getTable(tableName); - Partition parMetaData = db.getPartition(table, partition, false); - // Check partition exists if it exists skip the overwrite - if (parMetaData != null) { - phase1Result = false; - skipRecursion = true; - LOG.info("Partition already exists so insert into overwrite " + - "skipped for partition : " + parMetaData.toString()); - break; - } - } catch (HiveException e) { - LOG.info("Error while getting metadata : ", e); - } - } else { + if (childCount != partition.size()) { throw new SemanticException(ErrorMsg.INSERT_INTO_DYNAMICPARTITION_IFNOTEXISTS .getMsg(partition.toString())); } + Table table = null; + try { + table = db.getTable(tableName); + } catch (HiveException ex) { + throw new SemanticException(ex); + } + try { + Partition parMetaData = db.getPartition(table, partition, false); + // Check partition exists if it exists skip the overwrite + if (parMetaData != null) { + phase1Result = false; + skipRecursion = true; + LOG.info("Partition already exists so insert into overwrite " + + "skipped for partition : " + parMetaData.toString()); + break; + } + } catch (HiveException e) { + LOG.info("Error while getting metadata : ", e); + } + validatePartSpec(table, partition, (ASTNode)tab, conf, false); } skipRecursion = false; break; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java Fri Feb 27 19:54:46 2015 @@ -316,6 +316,32 @@ public class SubQueryUtils { } return null; } + + static ASTNode setQualifiedColumnReferences(ASTNode ast, String tableAlias) { + int type = ast.getType(); + if (type == HiveParser.DOT) { + return ast; + } + if (type == HiveParser.TOK_TABLE_OR_COL) { + if (tableAlias == null) { + return null; + } + String colName = SemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()); + return SubQueryUtils.createColRefAST(tableAlias, colName); + } + + for (int i = 0; i < ast.getChildCount(); i++) { + ASTNode child = (ASTNode) ast.getChild(i); + ASTNode c = setQualifiedColumnReferences(child, tableAlias); + if (c == null) { + return null; + } + if (c != child) { + ast.setChild(i, c); + } + } + return ast; + } static ASTNode subQueryWhere(ASTNode insertClause) { if (insertClause.getChildCount() > 2 && @@ -335,7 +361,7 @@ public class SubQueryUtils { RowResolver sqRR) { ASTNode node = (ASTNode) ParseDriver.adaptor.create(HiveParser.EQUAL, "="); node.addChild(outerQueryExpr); - node.addChild(buildSQJoinExpr(sqAlias, sqRR, false)); + node.addChild(buildSQJoinExpr(sqAlias, sqRR)); return node; } @@ -345,18 +371,16 @@ public class SubQueryUtils { * this will build (. (TOK_TABLE_OR_COL Identifier[SQ_1]) Identifier[B]) * where 'SQ_1' is the alias generated for the SubQuery. */ - static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR, - boolean useInternalName) { + static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) { List signature = sqRR.getRowSchema().getSignature(); ColumnInfo joinColumn = signature.get(0); String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName()); - return createColRefAST(sqAlias, useInternalName ? - joinColumn.getInternalName() : joinColName[1]); + return createColRefAST(sqAlias, joinColName[1]); } static ASTNode buildOuterJoinPostCond(String sqAlias, RowResolver sqRR) { - return isNull(buildSQJoinExpr(sqAlias, sqRR, false)); + return isNull(buildSQJoinExpr(sqAlias, sqRR)); } @SuppressWarnings("rawtypes") Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java Fri Feb 27 19:54:46 2015 @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.parse.L import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; +import org.apache.hadoop.hive.ql.udf.ptf.Noop; import java.util.ArrayList; import java.util.Collections; @@ -97,6 +98,10 @@ public class PTFDesc extends AbstractOpe return funcDef instanceof WindowTableFunctionDef; } + public boolean forNoop() { + return funcDef.getTFunction() instanceof Noop; + } + @Explain(displayName = "Map-side function", displayOnlyOnTrue = true) public boolean isMapSide() { return isMapSide; Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java Fri Feb 27 19:54:46 2015 @@ -37,6 +37,8 @@ public class PartitionedTableFunctionDef private OrderDef order; private TableFunctionEvaluator tFunction; boolean transformsRawInput; + + private transient List referencedColumns; @Explain(displayName = "name") public String getName() { @@ -185,4 +187,13 @@ public class PartitionedTableFunctionDef public void setResolverClassName(String resolverClassName) { this.resolverClassName = resolverClassName; } + + @Explain(displayName = "referenced columns") + public List getReferencedColumns() { + return referencedColumns; + } + + public void setReferencedColumns(List referencedColumns) { + this.referencedColumns = referencedColumns; + } } \ No newline at end of file Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAccessControlException.java Fri Feb 27 19:54:46 2015 @@ -24,8 +24,7 @@ import org.apache.hadoop.hive.ql.metadat /** * Exception thrown by the Authorization plugin api (v2). Indicates - * an error while performing authorization, and not a authorization being - * denied. + * a authorization check denying permissions for an action. */ @LimitedPrivate(value = { "Apache Argus (incubating)" }) @Evolving Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java Fri Feb 27 19:54:46 2015 @@ -29,17 +29,15 @@ import org.apache.hadoop.classification. public interface HiveAuthorizationValidator { /** - * Check if current user has privileges to perform given operation type - * hiveOpType on the given input and output objects - * - * @param hiveOpType - * @param inputHObjs - * @param outputHObjs - * @param context - * @throws HiveAuthzPluginException - * @throws HiveAccessControlException + * see HiveAuthorizer.checkPrivileges */ void checkPrivileges(HiveOperationType hiveOpType, List inputHObjs, List outputHObjs, HiveAuthzContext context) throws HiveAuthzPluginException, HiveAccessControlException; + /** + * see HiveAuthorizer.filterListCmdObjects + */ + List filterListCmdObjects(List listObjs, + HiveAuthzContext context); + } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java Fri Feb 27 19:54:46 2015 @@ -154,6 +154,21 @@ public interface HiveAuthorizer { List outputHObjs, HiveAuthzContext context) throws HiveAuthzPluginException, HiveAccessControlException; + + /** + * Filter out any objects that should not be shown to the user, from the list of + * tables or databases coming from a 'show tables' or 'show databases' command + * @param listObjs List of all objects obtained as result of a show command + * @param context + * @return filtered list of objects that will be returned to the user invoking the command + * @throws HiveAuthzPluginException + * @throws HiveAccessControlException + */ + List filterListCmdObjects(List listObjs, + HiveAuthzContext context) + throws HiveAuthzPluginException, HiveAccessControlException; + + /** * @return all existing roles * @throws HiveAuthzPluginException Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java Fri Feb 27 19:54:46 2015 @@ -85,6 +85,13 @@ public class HiveAuthorizerImpl implemen authValidator.checkPrivileges(hiveOpType, inputHObjs, outputHObjs, context); } + + @Override + public List filterListCmdObjects(List listObjs, + HiveAuthzContext context) throws HiveAuthzPluginException, HiveAccessControlException { + return authValidator.filterListCmdObjects(listObjs, context); + } + @Override public List getAllRoles() throws HiveAuthzPluginException, HiveAccessControlException { return accessController.getAllRoles(); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java Fri Feb 27 19:54:46 2015 @@ -21,7 +21,6 @@ package org.apache.hadoop.hive.ql.securi import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.Set; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.Warehouse; @@ -372,4 +371,11 @@ public class HiveV1Authorizer implements @Override public void applyAuthorizationConfigPolicy(HiveConf hiveConf) { } + + @Override + public List filterListCmdObjects(List listObjs, + HiveAuthzContext context) throws HiveAuthzPluginException, HiveAccessControlException { + // do no filtering in old authorizer + return listObjs; + } } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java Fri Feb 27 19:54:46 2015 @@ -42,4 +42,10 @@ public class DummyHiveAuthorizationValid // no-op } + @Override + public List filterListCmdObjects(List listObjs, + HiveAuthzContext context) { + return listObjs; + } + } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java Fri Feb 27 19:54:46 2015 @@ -149,4 +149,10 @@ public class SQLStdHiveAuthorizationVali } } + @Override + public List filterListCmdObjects(List listObjs, + HiveAuthzContext context) { + return listObjs; + } + } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java Fri Feb 27 19:54:46 2015 @@ -419,7 +419,7 @@ public class SessionState { return hdfsEncryptionShim; } - // SessionState is not available in runtime and Hive.get().getConf() is not safe to call + // SessionState is not available in runtime and Hive.get().getConf() is not safe to call private static class SessionStates { private SessionState state; private HiveConf conf; @@ -435,7 +435,7 @@ public class SessionState { } } } - + /** * Singleton Session object per thread. * @@ -705,7 +705,7 @@ public class SessionState { clsStr, authenticator, true); if (authorizer == null) { - // if it was null, the new authorization plugin must be specified in + // if it was null, the new (V2) authorization plugin must be specified in // config HiveAuthorizerFactory authorizerFactory = HiveUtils.getAuthorizerFactory(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER); @@ -717,13 +717,14 @@ public class SessionState { authorizerV2 = authorizerFactory.createHiveAuthorizer(new HiveMetastoreClientFactoryImpl(), conf, authenticator, authzContextBuilder.build()); + setAuthorizerV2Config(); - authorizerV2.applyAuthorizationConfigPolicy(conf); } // create the create table grants with new config createTableGrants = CreateTableAutomaticGrant.create(conf); } catch (HiveException e) { + LOG.error("Error setting up authorization: " + e.getMessage(), e); throw new RuntimeException(e); } @@ -734,6 +735,28 @@ public class SessionState { return; } + private void setAuthorizerV2Config() throws HiveException { + // avoid processing the same config multiple times, check marker + if (conf.get(CONFIG_AUTHZ_SETTINGS_APPLIED_MARKER, "").equals(Boolean.TRUE.toString())) { + return; + } + conf.setVar(ConfVars.METASTORE_FILTER_HOOK, + "org.apache.hadoop.hive.ql.security.authorization.plugin.AuthorizationMetaStoreFilterHook"); + + authorizerV2.applyAuthorizationConfigPolicy(conf); + // update config in Hive thread local as well and init the metastore client + try { + Hive.get(conf).getMSC(); + } catch (Exception e) { + // catch-all due to some exec time dependencies on session state + // that would cause ClassNoFoundException otherwise + throw new HiveException(e.getMessage(), e); + } + + // set a marker that this conf has been processed. + conf.set(CONFIG_AUTHZ_SETTINGS_APPLIED_MARKER, Boolean.TRUE.toString()); + } + public Object getActiveAuthorizer() { return getAuthorizationMode() == AuthorizationMode.V1 ? getAuthorizer() : getAuthorizerV2(); @@ -1416,20 +1439,7 @@ public class SessionState { * any security configuration changes. */ public void applyAuthorizationPolicy() throws HiveException { - if(!isAuthorizationModeV2()){ - // auth v1 interface does not have this functionality - return; - } - - // avoid processing the same config multiple times, check marker - if (conf.get(CONFIG_AUTHZ_SETTINGS_APPLIED_MARKER, "").equals(Boolean.TRUE.toString())) { - return; - } - - authorizerV2.applyAuthorizationConfigPolicy(conf); - // set a marker that this conf has been processed. - conf.set(CONFIG_AUTHZ_SETTINGS_APPLIED_MARKER, Boolean.TRUE.toString()); - + setupAuth(); } public Map> getTempTables() { Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUtcTimestamp.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUtcTimestamp.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUtcTimestamp.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFFromUtcTimestamp.java Fri Feb 27 19:54:46 2015 @@ -32,11 +32,10 @@ import org.apache.hadoop.hive.serde2.obj import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TextConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.TimestampConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.io.Text; @Description(name = "from_utc_timestamp", value = "from_utc_timestamp(timestamp, string timezone) - " - + "Assumes given timestamp ist UTC and converts to given timezone (as of Hive 0.8.0)") + + "Assumes given timestamp is UTC and converts to given timezone (as of Hive 0.8.0)") public class GenericUDFFromUtcTimestamp extends GenericUDF { static final Log LOG = LogFactory.getLog(GenericUDFFromUtcTimestamp.class); @@ -48,17 +47,14 @@ public class GenericUDFFromUtcTimestamp @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length < 2) { - throw new UDFArgumentLengthException( - "The function " + getName() + " requires at least two " + if (arguments.length != 2) { + throw new UDFArgumentLengthException("The function " + getName() + " requires two " + "argument, got " + arguments.length); } try { argumentOIs = new PrimitiveObjectInspector[2]; argumentOIs[0] = (PrimitiveObjectInspector) arguments[0]; - if (arguments.length > 1) { - argumentOIs[1] = (PrimitiveObjectInspector) arguments[1]; - } + argumentOIs[1] = (PrimitiveObjectInspector) arguments[1]; } catch (ClassCastException e) { throw new UDFArgumentException( "The function " + getName() + " takes only primitive types"); @@ -73,20 +69,17 @@ public class GenericUDFFromUtcTimestamp @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { Object o0 = arguments[0].get(); - TimeZone timezone = null; if (o0 == null) { return null; } - - if (arguments.length > 1 && arguments[1] != null) { - Text text = textConverter.convert(arguments[1].get()); - if (text != null) { - timezone = TimeZone.getTimeZone(text.toString()); - } - } else { + Object o1 = arguments[1].get(); + if (o1 == null) { return null; } + String tzStr = textConverter.convert(o1).toString(); + TimeZone timezone = TimeZone.getTimeZone(tzStr); + Timestamp timestamp = ((TimestampWritable) timestampConverter.convert(o0)) .getTimestamp(); Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java Fri Feb 27 19:54:46 2015 @@ -35,18 +35,18 @@ public class NumDistinctValueEstimator { * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 * thus introducing errors in the estimates. */ - private static final int bitVectorSize = 31; - private int numBitVectors; + private static final int BIT_VECTOR_SIZE = 31; + private final int numBitVectors; // Refer to Flajolet-Martin'86 for the value of phi - private final double phi = 0.77351; + private static final double PHI = 0.77351; - private int[] a; - private int[] b; - private FastBitSet[] bitVector = new FastBitSet[numBitVectors]; + private final int[] a; + private final int[] b; + private final FastBitSet[] bitVector; - private Random aValue; - private Random bValue; + private final Random aValue; + private final Random bValue; /* Create a new distinctValueEstimator */ @@ -54,7 +54,7 @@ public class NumDistinctValueEstimator { this.numBitVectors = numBitVectors; bitVector = new FastBitSet[numBitVectors]; for (int i=0; i< numBitVectors; i++) { - bitVector[i] = new FastBitSet(bitVectorSize); + bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); } a = new int[numBitVectors]; @@ -98,23 +98,30 @@ public class NumDistinctValueEstimator { b[i] = randVal; if (a[i] < 0) { - a[i] = a[i] + (1 << bitVectorSize - 1); + a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); } if (b[i] < 0) { - b[i] = b[i] + (1 << bitVectorSize - 1); + b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); } } } public NumDistinctValueEstimator(String s, int numBitVectors) { - FastBitSet b[] = deserialize(s, numBitVectors); + this.numBitVectors = numBitVectors; + FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); bitVector = new FastBitSet[numBitVectors]; for(int i=0; i > 1; @@ -277,8 +284,8 @@ public class NumDistinctValueEstimator { int index; // Find the index of the least significant bit that is 1 - for (index=0; index> 1; @@ -321,13 +328,13 @@ public class NumDistinctValueEstimator { for (int i=0; i < numBitVectors; i++) { int index = 0; - while (bitVector[i].get(index) && index < bitVectorSize) { + while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) { index = index + 1; } S = S + index; } - numDistinctValues = ((numBitVectors/phi) * Math.pow(2.0, S/numBitVectors)); + numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors)); return ((long)numDistinctValues); } @@ -345,7 +352,7 @@ public class NumDistinctValueEstimator { } avgLeastSigZero = - (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(phi)/Math.log(2.0)); + (double)(sumLeastSigZero/(numBitVectors * 1.0)) - (Math.log(PHI)/Math.log(2.0)); numDistinctValues = Math.pow(2.0, avgLeastSigZero); return ((long)(numDistinctValues)); } Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java Fri Feb 27 19:54:46 2015 @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Ex import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.PTFPartition; import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.PTFTranslator; @@ -195,6 +196,20 @@ public class MatchPath extends TableFunc setOutputOI(OI); } + + @Override + public List getReferencedColumns() throws SemanticException { + MatchPath matchPath = (MatchPath) evaluator; + List columns = new ArrayList<>(); + for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) { + Utilities.mergeUniqElems(columns, exprNode.getCols()); + } + for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) { + Utilities.mergeUniqElems(columns, exprNode.getCols()); + } + return columns; + } + /* * validate and setup patternStr */ @@ -356,6 +371,7 @@ public class MatchPath extends TableFunc static class SymbolsInfo { int sz; + ArrayList symbolExprsDecs; ArrayList symbolExprsEvaluators; ArrayList symbolExprsOIs; ArrayList symbolExprsNames; @@ -366,6 +382,7 @@ public class MatchPath extends TableFunc symbolExprsEvaluators = new ArrayList(sz); symbolExprsOIs = new ArrayList(sz); symbolExprsNames = new ArrayList(sz); + symbolExprsDecs = new ArrayList<>(sz); } void add(String name, PTFExpressionDef arg) @@ -373,6 +390,7 @@ public class MatchPath extends TableFunc symbolExprsNames.add(name); symbolExprsEvaluators.add(arg.getExprEvaluator()); symbolExprsOIs.add(arg.getOI()); + symbolExprsDecs.add(arg.getExprNode()); } } @@ -749,8 +767,7 @@ public class MatchPath extends TableFunc /* * create SelectListOI */ - selectListInputOI = (StructObjectInspector) - PTFTranslator.getStandardStructOI(selectListInputRowResolver); + selectListInputOI = PTFTranslator.getStandardStructOI(selectListInputRowResolver); } private void fixResultExprString() Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java Fri Feb 27 19:54:46 2015 @@ -29,11 +29,6 @@ import org.apache.hadoop.hive.serde2.obj public class NoopWithMap extends Noop { - @Override - public PTFPartition execute(PTFPartition iPart) throws HiveException - { - return iPart; - } @Override protected PTFPartition _transformRawInput(PTFPartition iPart) throws HiveException Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java Fri Feb 27 19:54:46 2015 @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /* @@ -60,7 +59,7 @@ import org.apache.hadoop.hive.serde2.obj */ /** - * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AsbtractTableFunction + * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AbstractTableFunction * class into a Resolver and Evaluator. *

* The Evaluator also holds onto the {@link TableFunctionDef}. This provides information @@ -79,7 +78,7 @@ import org.apache.hadoop.hive.serde2.obj */ public abstract class TableFunctionEvaluator { /* - * how is this different from the OutpuShape set on the TableDef. + * how is this different from the OutputShape set on the TableDef. * This is the OI of the object coming out of the PTF. * It is put in an output Partition whose Serde is usually LazyBinarySerde. * So the next PTF (or Operator) in the chain gets a LazyBinaryStruct. Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java (original) +++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java Fri Feb 27 19:54:46 2015 @@ -60,7 +60,7 @@ public abstract class TableFunctionResol /* * - called during translation. * - invokes createEvaluator which must be implemented by a subclass - * - sets up the evaluator with references to the TableDef, PartitionClass, PartitonMemsize and + * - sets up the evaluator with references to the TableDef, PartitionClass, PartitionMemsize and * the transformsRawInput boolean. */ public void initialize(HiveConf cfg, PTFDesc ptfDesc, PartitionedTableFunctionDef tDef) @@ -193,4 +193,14 @@ public abstract class TableFunctionResol * a subclass must provide the {@link TableFunctionEvaluator} instance. */ protected abstract TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef); + + /** + * Provide referenced columns names to be used in partition function + * + * @return null for unknown (will get all columns from table including virtual columns) + * @throws SemanticException + */ + public List getReferencedColumns() throws SemanticException { + return null; + } } Modified: hive/branches/spark/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java (original) +++ hive/branches/spark/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java Fri Feb 27 19:54:46 2015 @@ -26,12 +26,9 @@ import java.util.Stack; import junit.framework.TestCase; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; @@ -90,14 +87,14 @@ public class TestMetastoreExpr extends T } } - private static void silentDropDatabase(String dbName) throws MetaException, TException { + private static void silentDropDatabase(String dbName) throws TException { try { for (String tableName : client.getTables(dbName, "*")) { client.dropTable(dbName, tableName); } client.dropDatabase(dbName); - } catch (NoSuchObjectException e) { - } catch (InvalidOperationException e) { + } catch (NoSuchObjectException ignore) { + } catch (InvalidOperationException ignore) { } } @@ -153,16 +150,16 @@ public class TestMetastoreExpr extends T client.listPartitionsByExpr(dbName, tblName, new byte[] { 'f', 'o', 'o' }, null, (short)-1, new ArrayList()); fail("Should have thrown IncompatibleMetastoreException"); - } catch (IMetaStoreClient.IncompatibleMetastoreException ex) { + } catch (IMetaStoreClient.IncompatibleMetastoreException ignore) { } // Invalid expression => throw some exception, but not incompatible metastore. try { checkExpr(-1, dbName, tblName, e.val(31).intCol("p3").pred(">", 2).build()); fail("Should have thrown"); - } catch (IMetaStoreClient.IncompatibleMetastoreException ex) { + } catch (IMetaStoreClient.IncompatibleMetastoreException ignore) { fail("Should not have thrown IncompatibleMetastoreException"); - } catch (Exception ex) { + } catch (Exception ignore) { } } @@ -198,7 +195,7 @@ public class TestMetastoreExpr extends T for (int i = 0; i < args; ++i) { children.add(stack.pop()); } - stack.push(new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, + stack.push(new ExprNodeGenericFuncDesc(ti, FunctionRegistry.getFunctionInfo(name).getGenericUDF(), children)); return this; } @@ -249,8 +246,7 @@ public class TestMetastoreExpr extends T } private void addPartition(HiveMetaStoreClient client, Table table, - List vals, String location) throws InvalidObjectException, - AlreadyExistsException, MetaException, TException { + List vals, String location) throws TException { Partition part = new Partition(); part.setDbName(table.getDbName()); Modified: hive/branches/spark/ql/src/test/results/clientpositive/ptf_matchpath.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/ptf_matchpath.q.out?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/ptf_matchpath.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/ptf_matchpath.q.out Fri Feb 27 19:54:46 2015 @@ -72,40 +72,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -191,43 +192,44 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -300,47 +302,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Modified: hive/branches/spark/ql/src/test/results/clientpositive/show_functions.q.out URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/test/results/clientpositive/show_functions.q.out?rev=1662806&r1=1662805&r2=1662806&view=diff ============================================================================== --- hive/branches/spark/ql/src/test/results/clientpositive/show_functions.q.out (original) +++ hive/branches/spark/ql/src/test/results/clientpositive/show_functions.q.out Fri Feb 27 19:54:46 2015 @@ -105,6 +105,7 @@ lcase lead least length +levenshtein like ln locate