Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 9BE8510A44 for ; Tue, 17 Feb 2015 23:18:56 +0000 (UTC) Received: (qmail 29279 invoked by uid 500); 17 Feb 2015 23:18:56 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 29234 invoked by uid 500); 17 Feb 2015 23:18:56 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 29223 invoked by uid 99); 17 Feb 2015 23:18:56 -0000 Received: from eris.apache.org (HELO hades.apache.org) (140.211.11.105) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Feb 2015 23:18:56 +0000 Received: from hades.apache.org (localhost [127.0.0.1]) by hades.apache.org (ASF Mail Server at hades.apache.org) with ESMTP id 3FAD6AC00A8 for ; Tue, 17 Feb 2015 23:18:56 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1660535 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/ java/org/apache/hadoop/hive/ql/plan/ptf/ java/org/apache/hadoop/hive/ql/udf/ptf/ test/res... Date: Tue, 17 Feb 2015 23:18:55 -0000 To: commits@hive.apache.org From: hashutosh@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20150217231856.3FAD6AC00A8@hades.apache.org> Author: hashutosh Date: Tue Feb 17 23:18:55 2015 New Revision: 1660535 URL: http://svn.apache.org/r1660535 Log: HIVE-9699 : Extend PTFs to provide referenced columns for CP (Navis via Ashutosh Chauhan) Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Tue Feb 17 23:18:55 2015 @@ -74,7 +74,6 @@ import org.apache.hadoop.hive.ql.plan.pt import org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails; import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; -import org.apache.hadoop.hive.ql.udf.ptf.Noop; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -265,16 +264,19 @@ public final class ColumnPrunerProcFacto //Since we cannot know what columns will be needed by a PTF chain, //we do not prune columns on PTFOperator for PTF chains. PartitionedTableFunctionDef funcDef = conf.getFuncDef(); - if (!conf.forWindowing() && !Noop.class.isInstance(funcDef.getTFunction())) { + List referencedColumns = funcDef.getReferencedColumns(); + if (!conf.forWindowing() && !conf.forNoop() && referencedColumns == null) { return super.process(nd, stack, cppCtx, nodeOutputs); } - - //we create a copy of prunedCols to create a list of pruned columns for PTFOperator - List prunedCols = - new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); - if (funcDef instanceof WindowTableFunctionDef) { + + List prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0)); + if (conf.forWindowing()) { WindowTableFunctionDef def = (WindowTableFunctionDef) funcDef; prunedCols = Utilities.mergeUniqElems(getWindowFunctionColumns(def), prunedCols); + } else if (conf.forNoop()) { + prunedCols = new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0))); + } else { + prunedCols = referencedColumns; } List newRS = prunedColumnsList(prunedCols, op.getSchema(), funcDef); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Tue Feb 17 23:18:55 2015 @@ -342,6 +342,7 @@ public class PTFTranslator { outColNames, outRR); def.setOutputShape(outputShape); + def.setReferencedColumns(tFn.getReferencedColumns()); return def; } Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java Tue Feb 17 23:18:55 2015 @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.parse.L import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef; +import org.apache.hadoop.hive.ql.udf.ptf.Noop; import java.util.ArrayList; import java.util.Collections; @@ -97,6 +98,10 @@ public class PTFDesc extends AbstractOpe return funcDef instanceof WindowTableFunctionDef; } + public boolean forNoop() { + return funcDef.getTFunction() instanceof Noop; + } + @Explain(displayName = "Map-side function", displayOnlyOnTrue = true) public boolean isMapSide() { return isMapSide; Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java Tue Feb 17 23:18:55 2015 @@ -37,6 +37,8 @@ public class PartitionedTableFunctionDef private OrderDef order; private TableFunctionEvaluator tFunction; boolean transformsRawInput; + + private transient List referencedColumns; @Explain(displayName = "name") public String getName() { @@ -185,4 +187,13 @@ public class PartitionedTableFunctionDef public void setResolverClassName(String resolverClassName) { this.resolverClassName = resolverClassName; } + + @Explain(displayName = "referenced columns") + public List getReferencedColumns() { + return referencedColumns; + } + + public void setReferencedColumns(List referencedColumns) { + this.referencedColumns = referencedColumns; + } } \ No newline at end of file Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java Tue Feb 17 23:18:55 2015 @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Ex import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; import org.apache.hadoop.hive.ql.exec.PTFPartition; import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.PTFTranslator; @@ -195,6 +196,20 @@ public class MatchPath extends TableFunc setOutputOI(OI); } + + @Override + public List getReferencedColumns() throws SemanticException { + MatchPath matchPath = (MatchPath) evaluator; + List columns = new ArrayList<>(); + for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) { + Utilities.mergeUniqElems(columns, exprNode.getCols()); + } + for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) { + Utilities.mergeUniqElems(columns, exprNode.getCols()); + } + return columns; + } + /* * validate and setup patternStr */ @@ -356,6 +371,7 @@ public class MatchPath extends TableFunc static class SymbolsInfo { int sz; + ArrayList symbolExprsDecs; ArrayList symbolExprsEvaluators; ArrayList symbolExprsOIs; ArrayList symbolExprsNames; @@ -366,6 +382,7 @@ public class MatchPath extends TableFunc symbolExprsEvaluators = new ArrayList(sz); symbolExprsOIs = new ArrayList(sz); symbolExprsNames = new ArrayList(sz); + symbolExprsDecs = new ArrayList<>(sz); } void add(String name, PTFExpressionDef arg) @@ -373,6 +390,7 @@ public class MatchPath extends TableFunc symbolExprsNames.add(name); symbolExprsEvaluators.add(arg.getExprEvaluator()); symbolExprsOIs.add(arg.getOI()); + symbolExprsDecs.add(arg.getExprNode()); } } @@ -749,8 +767,7 @@ public class MatchPath extends TableFunc /* * create SelectListOI */ - selectListInputOI = (StructObjectInspector) - PTFTranslator.getStandardStructOI(selectListInputRowResolver); + selectListInputOI = PTFTranslator.getStandardStructOI(selectListInputRowResolver); } private void fixResultExprString() Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java Tue Feb 17 23:18:55 2015 @@ -29,11 +29,6 @@ import org.apache.hadoop.hive.serde2.obj public class NoopWithMap extends Noop { - @Override - public PTFPartition execute(PTFPartition iPart) throws HiveException - { - return iPart; - } @Override protected PTFPartition _transformRawInput(PTFPartition iPart) throws HiveException Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java Tue Feb 17 23:18:55 2015 @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.metadat import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /* @@ -60,7 +59,7 @@ import org.apache.hadoop.hive.serde2.obj */ /** - * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AsbtractTableFunction + * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AbstractTableFunction * class into a Resolver and Evaluator. *

* The Evaluator also holds onto the {@link TableFunctionDef}. This provides information @@ -79,7 +78,7 @@ import org.apache.hadoop.hive.serde2.obj */ public abstract class TableFunctionEvaluator { /* - * how is this different from the OutpuShape set on the TableDef. + * how is this different from the OutputShape set on the TableDef. * This is the OI of the object coming out of the PTF. * It is put in an output Partition whose Serde is usually LazyBinarySerde. * So the next PTF (or Operator) in the chain gets a LazyBinaryStruct. Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java Tue Feb 17 23:18:55 2015 @@ -60,7 +60,7 @@ public abstract class TableFunctionResol /* * - called during translation. * - invokes createEvaluator which must be implemented by a subclass - * - sets up the evaluator with references to the TableDef, PartitionClass, PartitonMemsize and + * - sets up the evaluator with references to the TableDef, PartitionClass, PartitionMemsize and * the transformsRawInput boolean. */ public void initialize(HiveConf cfg, PTFDesc ptfDesc, PartitionedTableFunctionDef tDef) @@ -193,4 +193,14 @@ public abstract class TableFunctionResol * a subclass must provide the {@link TableFunctionEvaluator} instance. */ protected abstract TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef); + + /** + * Provide referenced columns names to be used in partition function + * + * @return null for unknown (will get all columns from table including virtual columns) + * @throws SemanticException + */ + public List getReferencedColumns() throws SemanticException { + return null; + } } Modified: hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out Tue Feb 17 23:18:55 2015 @@ -72,40 +72,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -191,43 +192,44 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -300,47 +302,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Modified: hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out Tue Feb 17 23:18:55 2015 @@ -77,41 +77,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -202,44 +203,45 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -317,48 +319,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Modified: hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out?rev=1660535&r1=1660534&r2=1660535&view=diff ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out (original) +++ hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out Tue Feb 17 23:18:55 2015 @@ -77,41 +77,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: ++++ Map-reduce partition columns: fl_num (type: string) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: _col6 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -202,44 +203,45 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE - value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct) + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + value expressions: origin_city_name (type: string), arr_delay (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct + output shape: type: TABLE Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = 1142) (type: boolean) - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -317,48 +319,49 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny - Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (fl_num = -1142) (type: boolean) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float) + outputColumnNames: _col0, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int) sort order: +++++ Map-reduce partition columns: 0 (type: int) - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float) + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col5 (type: float) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string) + outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: flights_tiny - output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string + output shape: type: SUBQUERY Partition table definition input alias: ptf_1 arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath' name: matchpath order by: _col6, _col2, _col3, _col4 - output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int + output shape: tpath: int partition by: 0 raw input shape: - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5 + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat