Mailing-List: contact commits-help@hive.apache.org; run by ezmlm
Precedence: bulk
Reply-To: hive-dev@hive.apache.org
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: svn commit: r1660535 - in /hive/trunk/ql/src:
 java/org/apache/hadoop/hive/ql/optimizer/
 java/org/apache/hadoop/hive/ql/parse/ java/org/apache/hadoop/hive/ql/plan/
 java/org/apache/hadoop/hive/ql/plan/ptf/
 java/org/apache/hadoop/hive/ql/udf/ptf/ test/res...
Date: Tue, 17 Feb 2015 23:18:55 -0000
To: commits@hive.apache.org
From: hashutosh@apache.org
Message-Id: <20150217231856.3FAD6AC00A8@hades.apache.org>

Author: hashutosh
Date: Tue Feb 17 23:18:55 2015
New Revision: 1660535

URL: http://svn.apache.org/r1660535
Log:
HIVE-9699 : Extend PTFs to provide referenced columns for CP (Navis via Ashutosh Chauhan)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java
    hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java Tue Feb 17 23:18:55 2015
@@ -74,7 +74,6 @@ import org.apache.hadoop.hive.ql.plan.pt
 import org.apache.hadoop.hive.ql.plan.ptf.ShapeDetails;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
-import org.apache.hadoop.hive.ql.udf.ptf.Noop;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
@@ -265,16 +264,19 @@ public final class ColumnPrunerProcFacto
       //Since we cannot know what columns will be needed by a PTF chain,
       //we do not prune columns on PTFOperator for PTF chains.
       PartitionedTableFunctionDef funcDef = conf.getFuncDef();
-      if (!conf.forWindowing() && !Noop.class.isInstance(funcDef.getTFunction())) {
+      List<String> referencedColumns = funcDef.getReferencedColumns();
+      if (!conf.forWindowing() && !conf.forNoop() && referencedColumns == null) {
         return super.process(nd, stack, cppCtx, nodeOutputs);
       }
-      
-      //we create a copy of prunedCols to create a list of pruned columns for PTFOperator
-      List<String> prunedCols = 
-          new ArrayList<String>(cppCtx.getPrunedColList(op.getChildOperators().get(0)));
-      if (funcDef instanceof WindowTableFunctionDef) {
+
+      List<String> prunedCols = cppCtx.getPrunedColList(op.getChildOperators().get(0));
+      if (conf.forWindowing()) {
         WindowTableFunctionDef def = (WindowTableFunctionDef) funcDef;
         prunedCols = Utilities.mergeUniqElems(getWindowFunctionColumns(def), prunedCols);
+      } else if (conf.forNoop()) {
+        prunedCols = new ArrayList(cppCtx.getPrunedColList(op.getChildOperators().get(0)));
+      } else {
+        prunedCols = referencedColumns;
       }
       
       List<ColumnInfo> newRS = prunedColumnsList(prunedCols, op.getSchema(), funcDef);      

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java Tue Feb 17 23:18:55 2015
@@ -342,6 +342,7 @@ public class PTFTranslator {
         outColNames,
         outRR);
     def.setOutputShape(outputShape);
+    def.setReferencedColumns(tFn.getReferencedColumns());
 
     return def;
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDesc.java Tue Feb 17 23:18:55 2015
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.parse.L
 import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef;
 import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
+import org.apache.hadoop.hive.ql.udf.ptf.Noop;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -97,6 +98,10 @@ public class PTFDesc extends AbstractOpe
     return funcDef instanceof WindowTableFunctionDef;
   }
 
+  public boolean forNoop() {
+    return funcDef.getTFunction() instanceof Noop;
+  }
+
   @Explain(displayName = "Map-side function", displayOnlyOnTrue = true)
   public boolean isMapSide() {
     return isMapSide;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ptf/PartitionedTableFunctionDef.java Tue Feb 17 23:18:55 2015
@@ -37,6 +37,8 @@ public class PartitionedTableFunctionDef
   private OrderDef order;
   private TableFunctionEvaluator tFunction;
   boolean transformsRawInput;
+  
+  private transient List<String> referencedColumns;
 
   @Explain(displayName = "name")
   public String getName() {
@@ -185,4 +187,13 @@ public class PartitionedTableFunctionDef
   public void setResolverClassName(String resolverClassName) {
     this.resolverClassName = resolverClassName;
   }
+
+  @Explain(displayName = "referenced columns")
+  public List<String> getReferencedColumns() {
+    return referencedColumns;
+  }
+
+  public void setReferencedColumns(List<String> referencedColumns) {
+    this.referencedColumns = referencedColumns;
+  }
 }
\ No newline at end of file

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/MatchPath.java Tue Feb 17 23:18:55 2015
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.Ex
 import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
 import org.apache.hadoop.hive.ql.exec.PTFPartition;
 import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.PTFTranslator;
@@ -195,6 +196,20 @@ public class MatchPath extends TableFunc
 
       setOutputOI(OI);
     }
+
+    @Override
+    public List<String> getReferencedColumns() throws SemanticException {
+      MatchPath matchPath = (MatchPath) evaluator;
+      List<String> columns = new ArrayList<>();
+      for (ExprNodeDesc exprNode : matchPath.resultExprInfo.resultExprNodes) {
+        Utilities.mergeUniqElems(columns, exprNode.getCols());
+      }
+      for (ExprNodeDesc exprNode : matchPath.symInfo.symbolExprsDecs) {
+        Utilities.mergeUniqElems(columns, exprNode.getCols());
+      }
+      return columns;
+    }
+    
     /*
      * validate and setup patternStr
      */
@@ -356,6 +371,7 @@ public class MatchPath extends TableFunc
 
   static class SymbolsInfo {
     int sz;
+    ArrayList<ExprNodeDesc> symbolExprsDecs;
     ArrayList<ExprNodeEvaluator> symbolExprsEvaluators;
     ArrayList<ObjectInspector> symbolExprsOIs;
     ArrayList<String> symbolExprsNames;
@@ -366,6 +382,7 @@ public class MatchPath extends TableFunc
       symbolExprsEvaluators = new ArrayList<ExprNodeEvaluator>(sz);
       symbolExprsOIs = new ArrayList<ObjectInspector>(sz);
       symbolExprsNames = new ArrayList<String>(sz);
+      symbolExprsDecs = new ArrayList<>(sz);
     }
 
     void add(String name, PTFExpressionDef arg)
@@ -373,6 +390,7 @@ public class MatchPath extends TableFunc
       symbolExprsNames.add(name);
       symbolExprsEvaluators.add(arg.getExprEvaluator());
       symbolExprsOIs.add(arg.getOI());
+      symbolExprsDecs.add(arg.getExprNode());
     }
   }
 
@@ -749,8 +767,7 @@ public class MatchPath extends TableFunc
       /*
        * create SelectListOI
        */
-      selectListInputOI = (StructObjectInspector)
-          PTFTranslator.getStandardStructOI(selectListInputRowResolver);
+      selectListInputOI = PTFTranslator.getStandardStructOI(selectListInputRowResolver);
     }
 
     private void fixResultExprString()

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/NoopWithMap.java Tue Feb 17 23:18:55 2015
@@ -29,11 +29,6 @@ import org.apache.hadoop.hive.serde2.obj
 
 public class NoopWithMap extends Noop
 {
-  @Override
-  public PTFPartition execute(PTFPartition iPart) throws HiveException
-  {
-    return iPart;
-  }
 
   @Override
   protected PTFPartition _transformRawInput(PTFPartition iPart) throws HiveException

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionEvaluator.java Tue Feb 17 23:18:55 2015
@@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.plan.PTFDesc;
 import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 
 /*
@@ -60,7 +59,7 @@ import org.apache.hadoop.hive.serde2.obj
  */
 
 /**
- * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AsbtractTableFunction
+ * Based on Hive {@link GenericUDAFEvaluator}. Break up the responsibility of the old AbstractTableFunction
  * class into a Resolver and Evaluator.
  * <p>
  * The Evaluator also holds onto the {@link TableFunctionDef}. This provides information
@@ -79,7 +78,7 @@ import org.apache.hadoop.hive.serde2.obj
  */
 public abstract class TableFunctionEvaluator {
   /*
-   * how is this different from the OutpuShape set on the TableDef.
+   * how is this different from the OutputShape set on the TableDef.
    * This is the OI of the object coming out of the PTF.
    * It is put in an output Partition whose Serde is usually LazyBinarySerde.
    * So the next PTF (or Operator) in the chain gets a LazyBinaryStruct.

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/TableFunctionResolver.java Tue Feb 17 23:18:55 2015
@@ -60,7 +60,7 @@ public abstract class TableFunctionResol
   /*
    * - called during translation.
    * - invokes createEvaluator which must be implemented by a subclass
-   * - sets up the evaluator with references to the TableDef, PartitionClass, PartitonMemsize and
+   * - sets up the evaluator with references to the TableDef, PartitionClass, PartitionMemsize and
    *   the transformsRawInput boolean.
    */
   public void initialize(HiveConf cfg, PTFDesc ptfDesc, PartitionedTableFunctionDef tDef)
@@ -193,4 +193,14 @@ public abstract class TableFunctionResol
    * a subclass must provide the {@link TableFunctionEvaluator} instance.
    */
   protected abstract TableFunctionEvaluator createEvaluator(PTFDesc ptfDesc, PartitionedTableFunctionDef tDef);
+
+  /**
+   * Provide referenced columns names to be used in partition function
+   *
+   * @return null for unknown (will get all columns from table including virtual columns) 
+   * @throws SemanticException
+   */
+  public List<String> getReferencedColumns() throws SemanticException {
+    return null;
+  }
 }

Modified: hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ptf_matchpath.q.out Tue Feb 17 23:18:55 2015
@@ -72,40 +72,41 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: flights_tiny
-            Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
             Reduce Output Operator
               key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int)
               sort order: ++++
               Map-reduce partition columns: fl_num (type: string)
-              Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
-              value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+              Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+              value expressions: origin_city_name (type: string), arr_delay (type: float)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-          Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string)
+          outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+          Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
                   input alias: flights_tiny
-                  output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct<transactionid:bigint,bucketid:int,rowid:bigint>
+                  output shape: 
                   type: TABLE
                 Partition table definition
                   input alias: ptf_1
                   arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                   name: matchpath
                   order by: _col2, _col3, _col4
-                  output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                  output shape: tpath: int
                   partition by: _col6
                   raw input shape:
-            Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+            Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-              Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -191,43 +192,44 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: flights_tiny
-            Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
             Reduce Output Operator
               key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int)
               sort order: +++++
               Map-reduce partition columns: 0 (type: int)
-              Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
-              value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+              Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+              value expressions: origin_city_name (type: string), arr_delay (type: float)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-          Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string)
+          outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+          Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
                   input alias: flights_tiny
-                  output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct<transactionid:bigint,bucketid:int,rowid:bigint>
+                  output shape: 
                   type: TABLE
                 Partition table definition
                   input alias: ptf_1
                   arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                   name: matchpath
                   order by: _col6, _col2, _col3, _col4
-                  output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                  output shape: tpath: int
                   partition by: 0
                   raw input shape:
-            Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+            Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (fl_num = 1142) (type: boolean)
-              Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
               Select Operator
                 expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.TextInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -300,47 +302,48 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: flights_tiny
-            Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
             Filter Operator
               predicate: (fl_num = -1142) (type: boolean)
-              Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
               Select Operator
-                expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5
+                Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int)
                   sort order: +++++
                   Map-reduce partition columns: 0 (type: int)
-                  Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float)
+                  Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: string), _col5 (type: float)
       Reduce Operator Tree:
         Select Operator
-          expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string)
-          outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-          Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+          expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string)
+          outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+          Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
           PTF Operator
             Function definitions:
                 Input definition
                   input alias: flights_tiny
-                  output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string
+                  output shape: 
                   type: SUBQUERY
                 Partition table definition
                   input alias: ptf_1
                   arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                   name: matchpath
                   order by: _col6, _col2, _col3, _col4
-                  output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                  output shape: tpath: int
                   partition by: 0
                   raw input shape:
-            Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                  referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+            Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-              Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/spark/ptf_matchpath.q.out Tue Feb 17 23:18:55 2015
@@ -77,41 +77,42 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: flights_tiny
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int)
                     sort order: ++++
                     Map-reduce partition columns: fl_num (type: string)
-                    Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                    Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: origin_city_name (type: string), arr_delay (type: float)
         Reducer 2 
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: flights_tiny
-                        output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct<transactionid:bigint,bucketid:int,rowid:bigint>
+                        output shape: 
                         type: TABLE
                       Partition table definition
                         input alias: ptf_1
                         arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                         name: matchpath
                         order by: _col2, _col3, _col4
-                        output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                        output shape: tpath: int
                         partition by: _col6
                         raw input shape:
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                        referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                    Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -202,44 +203,45 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: flights_tiny
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int)
                     sort order: +++++
                     Map-reduce partition columns: 0 (type: int)
-                    Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                    Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: origin_city_name (type: string), arr_delay (type: float)
         Reducer 2 
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: flights_tiny
-                        output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct<transactionid:bigint,bucketid:int,rowid:bigint>
+                        output shape: 
                         type: TABLE
                       Partition table definition
                         input alias: ptf_1
                         arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                         name: matchpath
                         order by: _col6, _col2, _col3, _col4
-                        output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                        output shape: tpath: int
                         partition by: 0
                         raw input shape:
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                        referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (fl_num = 1142) (type: boolean)
-                    Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                      Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -317,48 +319,49 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: flights_tiny
-                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (fl_num = -1142) (type: boolean)
-                    Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float)
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                      Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                      expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float)
+                      outputColumnNames: _col0, _col2, _col3, _col4, _col5
+                      Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int)
                         sort order: +++++
                         Map-reduce partition columns: 0 (type: int)
-                        Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float)
+                        Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string), _col5 (type: float)
         Reducer 2 
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: flights_tiny
-                        output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string
+                        output shape: 
                         type: SUBQUERY
                       Partition table definition
                         input alias: ptf_1
                         arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                         name: matchpath
                         order by: _col6, _col2, _col3, _col4
-                        output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                        output shape: tpath: int
                         partition by: 0
                         raw input shape:
-                  Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                        referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+                  Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                    Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

Modified: hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out?rev=1660535&r1=1660534&r2=1660535&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/tez/ptf_matchpath.q.out Tue Feb 17 23:18:55 2015
@@ -77,41 +77,42 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: flights_tiny
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int)
                     sort order: ++++
                     Map-reduce partition columns: fl_num (type: string)
-                    Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                    Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: origin_city_name (type: string), arr_delay (type: float)
         Reducer 2 
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey1 (type: int), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey0 (type: string)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: flights_tiny
-                        output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct<transactionid:bigint,bucketid:int,rowid:bigint>
+                        output shape: 
                         type: TABLE
                       Partition table definition
                         input alias: ptf_1
                         arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                         name: matchpath
                         order by: _col2, _col3, _col4
-                        output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                        output shape: tpath: int
                         partition by: _col6
                         raw input shape:
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                        referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: origin_city_name (type: string), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                    Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -202,44 +203,45 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: flights_tiny
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: 0 (type: int), fl_num (type: string), year (type: int), month (type: int), day_of_month (type: int)
                     sort order: +++++
                     Map-reduce partition columns: 0 (type: int)
-                    Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
-                    value expressions: origin_city_name (type: string), dest_city_name (type: string), arr_delay (type: float), BLOCK__OFFSET__INSIDE__FILE (type: bigint), INPUT__FILE__NAME (type: string), ROW__ID (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
+                    Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                    value expressions: origin_city_name (type: string), arr_delay (type: float)
         Reducer 2 
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string), VALUE._col3 (type: bigint), VALUE._col4 (type: string), VALUE._col5 (type: struct<transactionid:bigint,bucketid:int,rowid:bigint>)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
-                Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), KEY.reducesinkkey1 (type: string)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: flights_tiny
-                        output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string, _col7: bigint, _col8: string, _col9: struct<transactionid:bigint,bucketid:int,rowid:bigint>
+                        output shape: 
                         type: TABLE
                       Partition table definition
                         input alias: ptf_1
                         arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                         name: matchpath
                         order by: _col6, _col2, _col3, _col4
-                        output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                        output shape: tpath: int
                         partition by: 0
                         raw input shape:
-                  Statistics: Num rows: 17 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                        referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (fl_num = 1142) (type: boolean)
-                    Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: origin_city_name (type: string), '1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                      Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 8 Data size: 2531 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.TextInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -317,48 +319,49 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: flights_tiny
-                  Statistics: Num rows: 24 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 44 Data size: 5379 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (fl_num = -1142) (type: boolean)
-                    Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      expressions: origin_city_name (type: string), dest_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float)
-                      outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                      Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                      expressions: origin_city_name (type: string), year (type: int), month (type: int), day_of_month (type: int), arr_delay (type: float)
+                      outputColumnNames: _col0, _col2, _col3, _col4, _col5
+                      Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: 0 (type: int), '-1142' (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int)
                         sort order: +++++
                         Map-reduce partition columns: 0 (type: int)
-                        Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
-                        value expressions: _col0 (type: string), _col1 (type: string), _col5 (type: float)
+                        Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                        value expressions: _col0 (type: string), _col5 (type: float)
         Reducer 2 
             Reduce Operator Tree:
               Select Operator
-                expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string)
-                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                expressions: VALUE._col0 (type: string), KEY.reducesinkkey2 (type: int), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey4 (type: int), VALUE._col2 (type: float), '-1142' (type: string)
+                outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6
+                Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                 PTF Operator
                   Function definitions:
                       Input definition
                         input alias: flights_tiny
-                        output shape: _col0: string, _col1: string, _col2: int, _col3: int, _col4: int, _col5: float, _col6: string
+                        output shape: 
                         type: SUBQUERY
                       Partition table definition
                         input alias: ptf_1
                         arguments: 'LATE.LATE+', 'LATE', (_col5 > 15.0), 'origin_city_name, fl_num, year, month, day_of_month, size(tpath) as sz, tpath[0].day_of_month as tpath'
                         name: matchpath
                         order by: _col6, _col2, _col3, _col4
-                        output shape: origin_city_name: string, fl_num: string, year: int, month: int, day_of_month: int, sz: int, tpath: int
+                        output shape: tpath: int
                         partition by: 0
                         raw input shape:
-                  Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                        referenced columns: _col0, _col6, _col2, _col3, _col4, tpath, _col5
+                  Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: origin_city_name (type: string), '-1142' (type: string), year (type: int), month (type: int), day_of_month (type: int), sz (type: int), tpath (type: int)
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                    Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 12 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 22 Data size: 2689 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.TextInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat