hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1656393 [1/7] - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/exec/ java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/optimizer/correlation/ java/org/apache/hadoop/...
Date Mon, 02 Feb 2015 06:41:04 GMT
Author: hashutosh
Date: Mon Feb  2 06:41:02 2015
New Revision: 1656393

URL: http://svn.apache.org/r1656393
Log:
HIVE-9416 : Get rid of Extract Operator (Ashutosh Chauhan via Navis)

Removed:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExtractOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ExtractDesc.java
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
    hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_2.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
    hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_6.q.out
    hive/trunk/ql/src/test/results/clientpositive/disable_merge_for_bucketing.q.out
    hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
    hive/trunk/ql/src/test/results/clientpositive/dynpart_sort_optimization.q.out
    hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_dynamic.q.out
    hive/trunk/ql/src/test/results/clientpositive/encrypted/encryption_insert_partition_static.q.out
    hive/trunk/ql/src/test/results/clientpositive/load_dyn_part2.q.out
    hive/trunk/ql/src/test/results/clientpositive/ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/ptf_streaming.q.out
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out
    hive/trunk/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucket2.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucket3.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucket4.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/ptf_streaming.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/stats10.q.out
    hive/trunk/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/stats10.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/bucket2.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/bucket3.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/bucket4.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/disable_merge_for_bucketing.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/dynpart_sort_optimization.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/load_dyn_part2.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/ptf_streaming.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vector_bucket.q.out
    hive/trunk/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
    hive/trunk/ql/src/test/results/clientpositive/vector_bucket.q.out
    hive/trunk/ql/src/test/results/clientpositive/vectorized_ptf.q.out

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Mon Feb  2 06:41:02 2015
@@ -19,9 +19,6 @@
 package org.apache.hadoop.hive.ql.exec;
 
 import org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorAppMasterEventOperator;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -39,7 +36,6 @@ import org.apache.hadoop.hive.ql.plan.De
 import org.apache.hadoop.hive.ql.plan.DummyStoreDesc;
 import org.apache.hadoop.hive.ql.plan.DynamicPruningEventDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.ForwardDesc;
@@ -89,7 +85,6 @@ public final class OperatorFactory {
     opvec.add(new OpTuple<ScriptDesc>(ScriptDesc.class, ScriptOperator.class));
     opvec.add(new OpTuple<PTFDesc>(PTFDesc.class, PTFOperator.class));
     opvec.add(new OpTuple<ReduceSinkDesc>(ReduceSinkDesc.class, ReduceSinkOperator.class));
-    opvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, ExtractOperator.class));
     opvec.add(new OpTuple<GroupByDesc>(GroupByDesc.class, GroupByOperator.class));
     opvec.add(new OpTuple<JoinDesc>(JoinDesc.class, JoinOperator.class));
     opvec.add(new OpTuple<MapJoinDesc>(MapJoinDesc.class, MapJoinOperator.class));
@@ -143,7 +138,6 @@ public final class OperatorFactory {
     vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
     vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
     vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
-    vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, VectorExtractOperator.class));
   }
 
   private static final class OpTuple<T extends OperatorDesc> {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/PTFOperator.java Mon Feb  2 06:41:02 2015
@@ -68,7 +68,6 @@ public class PTFOperator extends Operato
   @Override
   protected void initializeOp(Configuration jobConf) throws HiveException {
     hiveConf = jobConf;
-    // if the parent is ExtractOperator, this invocation is from reduce-side
     isMapOperator = conf.isMapSide();
 
     reconstructQueryDef(hiveConf);
@@ -157,7 +156,7 @@ public class PTFOperator extends Operato
       /*
        * Why cannot we just use the ExprNodeEvaluator on the column?
        * - because on the reduce-side it is initialized based on the rowOI of the HiveTable
-       *   and not the OI of the ExtractOp ( the parent of this Operator on the reduce-side)
+       *   and not the OI of the parent of this Operator on the reduce-side
        */
       keyFields[i] = ExprNodeEvaluatorFactory.get(exprDef.getExprNode());
       keyOIs[i] = keyFields[i].initialize(inputOI);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketingSortingReduceSinkOptimizer.java Mon Feb  2 06:41:02 2015
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.hive.common.ObjectPair;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Order;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
@@ -84,7 +83,7 @@ public class BucketingSortingReduceSinkO
     // process reduce sink added by hive.enforce.bucketing or hive.enforce.sorting
     opRules.put(new RuleRegExp("R1",
         ReduceSinkOperator.getOperatorName() + "%" +
-            ExtractOperator.getOperatorName() + "%" +
+            SelectOperator.getOperatorName() + "%" +
             FileSinkOperator.getOperatorName() + "%"),
         getBucketSortReduceSinkProc(pctx));
 
@@ -362,8 +361,7 @@ public class BucketingSortingReduceSinkO
 
       // If the reduce sink has not been introduced due to bucketing/sorting, ignore it
       FileSinkOperator fsOp = (FileSinkOperator) nd;
-      ExtractOperator exOp = (ExtractOperator) fsOp.getParentOperators().get(0);
-      ReduceSinkOperator rsOp = (ReduceSinkOperator) exOp.getParentOperators().get(0);
+      ReduceSinkOperator rsOp = (ReduceSinkOperator) fsOp.getParentOperators().get(0).getParentOperators().get(0);
 
       List<ReduceSinkOperator> rsOps = pGraphContext
           .getReduceSinkOperatorsAddedByEnforceBucketingSorting();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConstantPropagateProcFactory.java Mon Feb  2 06:41:02 2015
@@ -372,7 +372,7 @@ public final class ConstantPropagateProc
             // if false return false
             return childExpr;
           }
-        } else // Try to fold (key = 86) and (key is not null) to (key = 86) 
+        } else // Try to fold (key = 86) and (key is not null) to (key = 86)
         if (childExpr instanceof ExprNodeGenericFuncDesc &&
             ((ExprNodeGenericFuncDesc)childExpr).getGenericUDF() instanceof GenericUDFOPNotNull &&
             childExpr.getChildren().get(0) instanceof ExprNodeColumnDesc && other instanceof ExprNodeGenericFuncDesc
@@ -429,7 +429,7 @@ public final class ConstantPropagateProc
     }
     if (ci == null) {
       LOG.error("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn());
-      throw new RuntimeException("Can't resolve " + desc.getTabAlias() + "." + desc.getColumn());
+      return null;
     }
     ExprNodeDesc constant = null;
     // Additional work for union operator, see union27.q

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java Mon Feb  2 06:41:02 2015
@@ -34,14 +34,15 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
 import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -59,12 +60,12 @@ import org.apache.hadoop.hive.ql.plan.Dy
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
 import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
@@ -224,19 +225,28 @@ public class SortedDynPartitionOptimizer
               rsConf, new RowSchema(outRS.getSignature()), fsParent);
       rsOp.setColumnExprMap(colExprMap);
 
-      // Create ExtractDesc
-      RowSchema exRR = new RowSchema(outRS);
-      ExtractDesc exConf = new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
-          Utilities.ReduceField.VALUE.toString(), "", false));
-
-      // Create Extract Operator
-      ExtractOperator exOp = (ExtractOperator) OperatorFactory.getAndMakeChild(
-              exConf, exRR, rsOp);
+      List<ExprNodeDesc> valCols = rsConf.getValueCols();
+      List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>(valCols.size());
+      List<String> colNames = new ArrayList<String>();
+      String colName;
+      for (ExprNodeDesc valCol : valCols) {
+        colName = PlanUtils.stripQuotes(valCol.getExprString());
+        colNames.add(colName);
+        descs.add(new ExprNodeColumnDesc(valCol.getTypeInfo(), ReduceField.VALUE.toString()+"."+colName, null, false));
+      }
+
+      // Create SelectDesc
+      SelectDesc selConf = new SelectDesc(descs, colNames);
+      RowSchema selRS = new RowSchema(outRS);
+
+      // Create Select Operator
+      SelectOperator selOp = (SelectOperator) OperatorFactory.getAndMakeChild(
+              selConf, selRS, rsOp);
 
-      // link EX to FS
+      // link SEL to FS
       fsOp.getParentOperators().clear();
-      fsOp.getParentOperators().add(exOp);
-      exOp.getChildOperators().add(fsOp);
+      fsOp.getParentOperators().add(selOp);
+      selOp.getChildOperators().add(fsOp);
 
       // Set if partition sorted or partition bucket sorted
       fsOp.getConf().setDpSortState(FileSinkDesc.DPSortState.PARTITION_SORTED);
@@ -249,13 +259,13 @@ public class SortedDynPartitionOptimizer
           .getSchema().getSignature());
       fsOp.getConf().setPartitionCols(partitionColumns);
 
-      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + exOp.getOperatorId()
+      LOG.info("Inserted " + rsOp.getOperatorId() + " and " + selOp.getOperatorId()
           + " as parent of " + fsOp.getOperatorId() + " and child of " + fsParent.getOperatorId());
       return null;
     }
 
-    // Remove RS and EX introduced by enforce bucketing/sorting config
-    // Convert PARENT -> RS -> EX -> FS to PARENT -> FS
+    // Remove RS and SEL introduced by enforce bucketing/sorting config
+    // Convert PARENT -> RS -> SEL -> FS to PARENT -> FS
     private boolean removeRSInsertedByEnforceBucketing(FileSinkOperator fsOp) {
       HiveConf hconf = parseCtx.getConf();
       boolean enforceBucketing = HiveConf.getBoolVar(hconf, ConfVars.HIVEENFORCEBUCKETING);
@@ -290,7 +300,7 @@ public class SortedDynPartitionOptimizer
           Operator<? extends OperatorDesc> rsChild = rsToRemove.getChildOperators().get(0);
           Operator<? extends OperatorDesc> rsGrandChild = rsChild.getChildOperators().get(0);
 
-          if (rsChild instanceof ExtractOperator) {
+          if (rsChild instanceof SelectOperator) {
             // if schema size cannot be matched, then it could be because of constant folding
             // converting partition column expression to constant expression. The constant
             // expression will then get pruned by column pruner since it will not reference to

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/CorrelationUtilities.java Mon Feb  2 06:41:02 2015
@@ -30,7 +30,6 @@ import java.util.Map.Entry;
 import java.util.Set;
 
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.ForwardOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
@@ -43,10 +42,12 @@ import org.apache.hadoop.hive.ql.exec.Sc
 import org.apache.hadoop.hive.ql.exec.SelectOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.exec.Utilities.ReduceField;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.GroupByDesc;
@@ -237,7 +238,6 @@ public final class CorrelationUtilities
       }
       if (!(cursor instanceof SelectOperator
           || cursor instanceof FilterOperator
-          || cursor instanceof ExtractOperator
           || cursor instanceof ForwardOperator
           || cursor instanceof ScriptOperator
           || cursor instanceof ReduceSinkOperator)) {
@@ -332,46 +332,64 @@ public final class CorrelationUtilities
     return child.getParentOperators();
   }
 
+  // replace the cRS to SEL operator
   protected static SelectOperator replaceReduceSinkWithSelectOperator(ReduceSinkOperator childRS,
       ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {
-    SelectOperator select = replaceOperatorWithSelect(childRS, context, procCtx);
-    select.getConf().setOutputColumnNames(childRS.getConf().getOutputValueColumnNames());
-    select.getConf().setColList(childRS.getConf().getValueCols());
-    return select;
-  }
-
-  // replace the cRS to SEL operator
-  // If child if cRS is EXT, EXT also should be removed
-  protected static SelectOperator replaceOperatorWithSelect(Operator<?> operator,
-      ParseContext context, AbstractCorrelationProcCtx procCtx)
-      throws SemanticException {
-    RowSchema inputRS = operator.getSchema();
-    SelectDesc select = new SelectDesc(null, null);
-
-    Operator<?> parent = getSingleParent(operator);
-    Operator<?> child = getSingleChild(operator);
+    RowSchema inputRS = childRS.getSchema();
+    SelectDesc select = new SelectDesc(childRS.getConf().getValueCols(), childRS.getConf().getOutputValueColumnNames());
 
+    Operator<?> parent = getSingleParent(childRS);
     parent.getChildOperators().clear();
 
     SelectOperator sel = (SelectOperator) OperatorFactory.getAndMakeChild(
             select, new RowSchema(inputRS.getSignature()), parent);
 
-    sel.setColumnExprMap(operator.getColumnExprMap());
+    sel.setColumnExprMap(childRS.getColumnExprMap());
 
-    sel.setChildOperators(operator.getChildOperators());
-    for (Operator<? extends Serializable> ch : operator.getChildOperators()) {
-      ch.replaceParent(operator, sel);
-    }
-    if (child instanceof ExtractOperator) {
-      removeOperator(child, getSingleChild(child), sel, context);
-      procCtx.addRemovedOperator(child);
-    }
-    operator.setChildOperators(null);
-    operator.setParentOperators(null);
-    procCtx.addRemovedOperator(operator);
+    sel.setChildOperators(childRS.getChildOperators());
+    for (Operator<? extends Serializable> ch : childRS.getChildOperators()) {
+      ch.replaceParent(childRS, sel);
+    }
+
+    removeChildSelIfApplicable(getSingleChild(childRS), sel, context, procCtx);
+    childRS.setChildOperators(null);
+    childRS.setParentOperators(null);
+    procCtx.addRemovedOperator(childRS);
     return sel;
   }
 
+  //TODO: ideally this method should be removed in future, as in we need not to rely on removing
+  // this select operator which likely is introduced by SortedDynPartitionOptimizer.
+  // NonblockingdedupOptimizer should be able to merge this select Operator with its
+  // parent. But, that is not working at the moment. See: dynpart_sort_optimization2.q
+
+  private static void removeChildSelIfApplicable(Operator<?> child, SelectOperator sel,
+      ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {
+
+    if (!(child instanceof SelectOperator)) {
+     return;
+   }
+   if (child.getColumnExprMap() != null) {
+     return;
+   }
+
+   SelectOperator selOp = (SelectOperator) child;
+
+   for (ExprNodeDesc desc : selOp.getConf().getColList()) {
+     if (!(desc instanceof ExprNodeColumnDesc)) {
+       return;
+     }
+     ExprNodeColumnDesc col = (ExprNodeColumnDesc) desc;
+     if(!col.getColumn().startsWith(ReduceField.VALUE.toString()+".") ||
+         col.getTabAlias() != null || col.getIsPartitionColOrVirtualCol()){
+       return;
+     }
+   }
+
+   removeOperator(child, getSingleChild(child), sel, context);
+   procCtx.addRemovedOperator(child);
+  }
+
   protected static void removeReduceSinkForGroupBy(ReduceSinkOperator cRS, GroupByOperator cGBYr,
       ParseContext context, AbstractCorrelationProcCtx procCtx) throws SemanticException {
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java Mon Feb  2 06:41:02 2015
@@ -29,7 +29,6 @@ import java.util.Stack;
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
@@ -146,6 +145,7 @@ public class ReduceSinkDeDuplication imp
 
   public abstract static class AbsctractReducerReducerProc implements NodeProcessor {
 
+    @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       ReduceSinkDeduplicateProcCtx dedupCtx = (ReduceSinkDeduplicateProcCtx) procCtx;
@@ -164,7 +164,7 @@ public class ReduceSinkDeDuplication imp
         }
         return false;
       }
-      if (child instanceof ExtractOperator || child instanceof SelectOperator) {
+      if (child instanceof SelectOperator) {
         return process(cRS, dedupCtx);
       }
       return false;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/OpProcFactory.java Mon Feb  2 06:41:02 2015
@@ -32,7 +32,6 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.ForwardOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
@@ -459,12 +458,6 @@ public class OpProcFactory {
         for(ExprNodeDesc expr : rop.getConf().getValueCols()) {
           lCtx.getIndex().putDependency(rop, col_infos.get(cnt++),
               ExprProcFactory.getExprDependency(lCtx, inpOp, expr));
-        }
-      } else if (op instanceof ExtractOperator) {
-        ArrayList<ColumnInfo> col_infos = rop.getSchema().getSignature();
-        for(ExprNodeDesc expr : rop.getConf().getValueCols()) {
-          lCtx.getIndex().putDependency(rop, col_infos.get(cnt++),
-              ExprProcFactory.getExprDependency(lCtx, inpOp, expr));
         }
       } else {
         RowSchema schema = rop.getSchema();

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingInferenceOptimizer.java Mon Feb  2 06:41:02 2015
@@ -23,7 +23,6 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.ForwardOperator;
@@ -112,9 +111,6 @@ public class BucketingSortingInferenceOp
           BucketingSortingOpProcFactory.getJoinProc());
       opRules.put(new RuleRegExp("R5", FileSinkOperator.getOperatorName() + "%"),
           BucketingSortingOpProcFactory.getFileSinkProc());
-      // Matches only ExtractOperators which are reducers
-      opRules.put(new RuleExactMatch("R6", ExtractOperator.getOperatorName() + "%"),
-          BucketingSortingOpProcFactory.getExtractProc());
       opRules.put(new RuleRegExp("R7", FilterOperator.getOperatorName() + "%"),
           BucketingSortingOpProcFactory.getFilterProc());
       opRules.put(new RuleRegExp("R8", LimitOperator.getOperatorName() + "%"),

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/BucketingSortingOpProcFactory.java Mon Feb  2 06:41:02 2015
@@ -26,7 +26,6 @@ import java.util.Map;
 import java.util.Stack;
 
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.ForwardOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
@@ -487,49 +486,13 @@ public class BucketingSortingOpProcFacto
 
   }
 
-  /**
-   * Processor for Extract operator.
-   *
-   * Only handles the case where the tree looks like
-   *
-   * ReduceSinkOperator --- ExtractOperator
-   *
-   * This is the case for distribute by, sort by, order by, cluster by operators.
-   */
-  public static class ExtractInferrer extends DefaultInferrer implements NodeProcessor {
-    @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-
-      BucketingSortingCtx bctx = (BucketingSortingCtx)procCtx;
-      ExtractOperator exop = (ExtractOperator)nd;
-
-      // As of writing this, there is no case where this could be false, this is just protection
-      // from possible future changes
-      if (exop.getParentOperators().size() != 1) {
-        return null;
-      }
-
-      Operator<? extends OperatorDesc> parent = exop.getParentOperators().get(0);
-
-      // The caller of this method should guarantee this
-      if (parent instanceof ReduceSinkOperator) {
-        extractTraits(bctx, (ReduceSinkOperator)parent, exop);
-      }
-
-      return null;
-    }
-  }
-
-  static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> exop)
+  static void extractTraits(BucketingSortingCtx bctx, ReduceSinkOperator rop, Operator<?> childop)
       throws SemanticException {
 
     List<ExprNodeDesc> outputValues = Collections.emptyList();
-    if (exop instanceof ExtractOperator) {
-      outputValues = rop.getConf().getValueCols();
-    } else if (exop instanceof SelectOperator) {
-      SelectDesc select = ((SelectOperator)exop).getConf();
-      outputValues = ExprNodeDescUtils.backtrack(select.getColList(), exop, rop);
+    if (childop instanceof SelectOperator) {
+      SelectDesc select = ((SelectOperator)childop).getConf();
+      outputValues = ExprNodeDescUtils.backtrack(select.getColList(), childop, rop);
     }
     if (outputValues.isEmpty()) {
       return;
@@ -543,16 +506,16 @@ public class BucketingSortingOpProcFacto
     // These represent the sorted columns
     List<SortCol> sortCols = extractSortCols(rop, outputValues);
 
-    List<ColumnInfo> colInfos = exop.getSchema().getSignature();
+    List<ColumnInfo> colInfos = childop.getSchema().getSignature();
 
     if (!bucketCols.isEmpty()) {
       List<BucketCol> newBucketCols = getNewBucketCols(bucketCols, colInfos);
-      bctx.setBucketedCols(exop, newBucketCols);
+      bctx.setBucketedCols(childop, newBucketCols);
     }
 
     if (!sortCols.isEmpty()) {
       List<SortCol> newSortCols = getNewSortCols(sortCols, colInfos);
-      bctx.setSortedCols(exop, newSortCols);
+      bctx.setSortedCols(childop, newSortCols);
     }
   }
 
@@ -778,10 +741,6 @@ public class BucketingSortingOpProcFacto
     return new FileSinkInferrer();
   }
 
-  public static NodeProcessor getExtractProc() {
-    return new ExtractInferrer();
-  }
-
   public static NodeProcessor getFilterProc() {
     return new ForwardingInferrer();
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java Mon Feb  2 06:41:02 2015
@@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.exec.*;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
-import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext;
@@ -76,7 +75,6 @@ import org.apache.hadoop.hive.ql.plan.Pa
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
 import org.apache.hadoop.hive.ql.plan.SMBJoinDesc;
 import org.apache.hadoop.hive.ql.plan.SparkWork;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.hive.ql.plan.TezWork;
 import org.apache.hadoop.hive.ql.plan.VectorGroupByDesc;
@@ -283,7 +281,7 @@ public class Vectorizer implements Physi
 
   class VectorizationDispatcher implements Dispatcher {
 
-    private PhysicalContext pctx;
+    private final PhysicalContext pctx;
 
     private List<String> reduceColumnNames;
     private List<TypeInfo> reduceTypeInfos;
@@ -449,9 +447,8 @@ public class Vectorizer implements Physi
     }
 
     private void addReduceWorkRules(Map<Rule, NodeProcessor> opRules, NodeProcessor np) {
-      opRules.put(new RuleRegExp("R1", ExtractOperator.getOperatorName() + ".*"), np);
-      opRules.put(new RuleRegExp("R2", GroupByOperator.getOperatorName() + ".*"), np);
-      opRules.put(new RuleRegExp("R3", SelectOperator.getOperatorName() + ".*"), np);
+      opRules.put(new RuleRegExp("R1", GroupByOperator.getOperatorName() + ".*"), np);
+      opRules.put(new RuleRegExp("R2", SelectOperator.getOperatorName() + ".*"), np);
     }
 
     private boolean validateReduceWork(ReduceWork reduceWork) throws SemanticException {
@@ -485,7 +482,7 @@ public class Vectorizer implements Physi
     private void vectorizeReduceWork(ReduceWork reduceWork) throws SemanticException {
       LOG.info("Vectorizing ReduceWork...");
       reduceWork.setVectorMode(true);
- 
+
       // For some reason, the DefaultGraphWalker does not descend down from the reducer Operator as
       // expected.  We need to descend down, otherwise it breaks our algorithm that determines
       // VectorizationContext...  Do we use PreOrderWalker instead of DefaultGraphWalker.
@@ -506,11 +503,6 @@ public class Vectorizer implements Physi
       // Necessary since we are vectorizing the root operator in reduce.
       reduceWork.setReducer(vnp.getRootVectorOp());
 
-      Operator<? extends OperatorDesc> reducer = reduceWork.getReducer();
-      if (reducer.getType().equals(OperatorType.EXTRACT)) {
-        ((VectorExtractOperator)reducer).setReduceTypeInfos(reduceTypeInfos);
-      }
-
       Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps = vnp.getAllScratchColumnVectorTypeMaps();
       reduceWork.setAllScratchColumnVectorTypeMaps(allScratchColumnVectorTypeMaps);
       Map<String, Map<String, Integer>> allColumnVectorMaps = vnp.getAllColumnVectorMaps();
@@ -525,8 +517,8 @@ public class Vectorizer implements Physi
 
   class MapWorkValidationNodeProcessor implements NodeProcessor {
 
-    private MapWork mapWork;
-    private boolean isTez;
+    private final MapWork mapWork;
+    private final boolean isTez;
 
     public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) {
       this.mapWork = mapWork;
@@ -658,7 +650,7 @@ public class Vectorizer implements Physi
       throw new SemanticException("Must be overridden");
     }
   }
-  
+
   class MapWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
 
     private final MapWork mWork;
@@ -723,8 +715,6 @@ public class Vectorizer implements Physi
       if (LOG.isDebugEnabled()) {
         LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString());
         if (vectorOp instanceof VectorizationContextRegion) {
-          VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
-          VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
           LOG.debug("Vectorized MapWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString());
         }
       }
@@ -735,8 +725,8 @@ public class Vectorizer implements Physi
 
   class ReduceWorkVectorizationNodeProcessor extends VectorizationNodeProcessor {
 
-    private List<String> reduceColumnNames;
-    
+    private final List<String> reduceColumnNames;
+
     private VectorizationContext reduceShuffleVectorizationContext;
 
     private Operator<? extends OperatorDesc> rootVectorOp;
@@ -801,8 +791,6 @@ public class Vectorizer implements Physi
       if (LOG.isDebugEnabled()) {
         LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " vectorization context " + vContext.toString());
         if (vectorOp instanceof VectorizationContextRegion) {
-          VectorizationContextRegion vcRegion = (VectorizationContextRegion) vectorOp;
-          VectorizationContext vOutContext = vcRegion.getOuputVectorizationContext();
           LOG.debug("Vectorized ReduceWork operator " + vectorOp.getName() + " added vectorization context " + vContext.toString());
         }
       }
@@ -897,9 +885,6 @@ public class Vectorizer implements Physi
   boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
     boolean ret = false;
     switch (op.getType()) {
-      case EXTRACT:
-        ret = validateExtractOperator((ExtractOperator) op);
-        break;
       case MAPJOIN:
         // Does MAPJOIN actually get planned in Reduce?
         if (op instanceof MapJoinOperator) {
@@ -1034,7 +1019,7 @@ public class Vectorizer implements Physi
     MapJoinDesc desc = op.getConf();
     return validateMapJoinDesc(desc);
   }
-  
+
   private boolean validateMapJoinDesc(MapJoinDesc desc) {
     byte posBigTable = (byte) desc.getPosBigTable();
     List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
@@ -1123,15 +1108,6 @@ public class Vectorizer implements Physi
     return true;
   }
 
-  private boolean validateExtractOperator(ExtractOperator op) {
-    ExprNodeDesc expr = op.getConf().getCol();
-    boolean ret = validateExprNodeDesc(expr);
-    if (!ret) {
-      return false;
-    }
-    return true;
-  }
-
   private boolean validateFileSinkOperator(FileSinkOperator op) {
    return true;
   }
@@ -1300,7 +1276,7 @@ public class Vectorizer implements Physi
     return vContext;
   }
 
-  private void fixupParentChildOperators(Operator<? extends OperatorDesc> op, 
+  private void fixupParentChildOperators(Operator<? extends OperatorDesc> op,
           Operator<? extends OperatorDesc> vectorOp) {
     if (op.getParentOperators() != null) {
       vectorOp.setParentOperators(op.getParentOperators());
@@ -1354,7 +1330,7 @@ public class Vectorizer implements Physi
     return false;
   }
 
-  public void debugDisplayAllMaps(Map<String, Map<String, Integer>> allColumnVectorMaps, 
+  public void debugDisplayAllMaps(Map<String, Map<String, Integer>> allColumnVectorMaps,
           Map<String, Map<Integer, String>> allScratchColumnVectorTypeMaps) {
 
     // Context keys grow in length since they are a path...

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Mon Feb  2 06:41:02 2015
@@ -94,6 +94,7 @@ import org.apache.hadoop.hive.ql.hooks.R
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.AcidOutputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.io.AcidUtils.Operation;
 import org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -149,7 +150,6 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc.sampleDesc;
@@ -229,12 +229,12 @@ public class SemanticAnalyzer extends Ba
   private HashMap<TableScanOperator, ExprNodeDesc> opToPartPruner;
   private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
   private HashMap<String, Operator<? extends OperatorDesc>> topOps;
-  private HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
+  private final HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
   private LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx;
   private List<LoadTableDesc> loadTableWork;
   private List<LoadFileDesc> loadFileWork;
-  private Map<JoinOperator, QBJoinTree> joinContext;
-  private Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
+  private final Map<JoinOperator, QBJoinTree> joinContext;
+  private final Map<SMBMapJoinOperator, QBJoinTree> smbMapJoinContext;
   private final HashMap<TableScanOperator, Table> topToTable;
   private final Map<FileSinkOperator, Table> fsopToTable;
   private final List<ReduceSinkOperator> reduceSinkOperatorsAddedByEnforceBucketingSorting;
@@ -5975,8 +5975,13 @@ public class SemanticAnalyzer extends Ba
         maxReducers = numBuckets;
       }
 
-      input = genReduceSinkPlanForSortingBucketing(dest_tab, input,
-          sortCols, sortOrders, partnCols, maxReducers);
+      StringBuilder order = new StringBuilder();
+      for (int sortOrder : sortOrders) {
+        order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ? '+' : '-');
+      }
+      input = genReduceSinkPlan(input, partnCols, sortCols, order.toString(),  maxReducers,
+        (isAcidTable(dest_tab) ? getAcidType() : AcidUtils.Operation.NOT_ACID));
+      reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator)input.getParentOperators().get(0));
       ctx.setMultiFileSpray(multiFileSpray);
       ctx.setNumFiles(numFiles);
       ctx.setPartnCols(partnColsNoConvert);
@@ -6448,7 +6453,7 @@ public class SemanticAnalyzer extends Ba
       fileSinkDesc.setWriteType(wt);
       acidFileSinks.add(fileSinkDesc);
     }
-    
+
     fileSinkDesc.setTemporary(destTableIsTemporary);
 
     /* Set List Bucketing context. */
@@ -6930,7 +6935,6 @@ public class SemanticAnalyzer extends Ba
   private ArrayList<ExprNodeDesc> getSortCols(String dest, QB qb, Table tab, TableDesc table_desc,
       Operator input, boolean convert)
       throws SemanticException {
-    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
     List<Order> tabSortCols = tab.getSortCols();
     List<FieldSchema> tabCols = tab.getCols();
 
@@ -6940,7 +6944,6 @@ public class SemanticAnalyzer extends Ba
       int pos = 0;
       for (FieldSchema tabCol : tabCols) {
         if (sortCol.getCol().equals(tabCol.getName())) {
-          ColumnInfo colInfo = inputRR.getColumnInfos().get(pos);
           posns.add(pos);
           break;
         }
@@ -6953,7 +6956,6 @@ public class SemanticAnalyzer extends Ba
 
   private ArrayList<Integer> getSortOrders(String dest, QB qb, Table tab, Operator input)
       throws SemanticException {
-    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
     List<Order> tabSortCols = tab.getSortCols();
     List<FieldSchema> tabCols = tab.getCols();
 
@@ -6969,74 +6971,11 @@ public class SemanticAnalyzer extends Ba
     return orders;
   }
 
-  @SuppressWarnings("nls")
-  private Operator genReduceSinkPlanForSortingBucketing(Table tab, Operator input,
-      ArrayList<ExprNodeDesc> sortCols,
-      List<Integer> sortOrders,
-      ArrayList<ExprNodeDesc> partitionCols,
-      int numReducers)
-      throws SemanticException {
-    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-
-    // For the generation of the values expression just get the inputs
-    // signature and generate field expressions for those
-    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
-    ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
-    ArrayList<String> outputColumns = new ArrayList<String>();
-    int i = 0;
-    for (ColumnInfo colInfo : inputRR.getColumnInfos()) {
-      String internalName = getColumnInternalName(i++);
-      outputColumns.add(internalName);
-      valueCols.add(new ExprNodeColumnDesc(colInfo));
-      colExprMap.put(internalName, valueCols
-          .get(valueCols.size() - 1));
-    }
-
-    StringBuilder order = new StringBuilder();
-    for (int sortOrder : sortOrders) {
-      order.append(sortOrder == BaseSemanticAnalyzer.HIVE_COLUMN_ORDER_ASC ? '+' : '-');
-    }
-
-    AcidUtils.Operation acidOp = (isAcidTable(tab) ? getAcidType() : AcidUtils.Operation.NOT_ACID);
-
-    Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
-        .getReduceSinkDesc(sortCols, valueCols, outputColumns, false, -1,
-            partitionCols, order.toString(), numReducers, acidOp),
-        new RowSchema(inputRR.getColumnInfos()), input), inputRR);
-    interim.setColumnExprMap(colExprMap);
-    reduceSinkOperatorsAddedByEnforceBucketingSorting.add((ReduceSinkOperator) interim);
-
-    // Add the extract operator to get the value fields
-    RowResolver out_rwsch = new RowResolver();
-    RowResolver interim_rwsch = inputRR;
-    Integer pos = Integer.valueOf(0);
-    for (ColumnInfo colInfo : interim_rwsch.getColumnInfos()) {
-      String[] info = interim_rwsch.reverseLookup(colInfo.getInternalName());
-      out_rwsch.put(info[0], info[1], new ColumnInfo(
-          getColumnInternalName(pos), colInfo.getType(), info[0],
-          colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol()));
-      pos = Integer.valueOf(pos.intValue() + 1);
-    }
-
-    Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
-        new ExtractDesc(new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
-            Utilities.ReduceField.VALUE.toString(), "", false)), new RowSchema(
-            out_rwsch.getColumnInfos()), interim), out_rwsch);
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Created ReduceSink Plan for table: " + tab.getTableName() +
-          " row schema: " + out_rwsch.toString());
-    }
-
-    return output;
-
-  }
-
   private Operator genReduceSinkPlan(String dest, QB qb, Operator<?> input,
       int numReducers) throws SemanticException {
-    
+
     RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-    
+
     // First generate the expression for the partition and sort keys
     // The cluster by clause / distribute by clause has the aliases for
     // partition function
@@ -7094,16 +7033,16 @@ public class SemanticAnalyzer extends Ba
         sortCols.add(exprNode);
       }
     }
-    return genReduceSinkPlan(input, partCols, sortCols, order.toString(), numReducers);
+    return genReduceSinkPlan(input, partCols, sortCols, order.toString(), numReducers, Operation.NOT_ACID);
   }
-  
+
   @SuppressWarnings("nls")
   private Operator genReduceSinkPlan(Operator<?> input,
-      ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols, 
-      String sortOrder, int numReducers) throws SemanticException {
+      ArrayList<ExprNodeDesc> partitionCols, ArrayList<ExprNodeDesc> sortCols,
+      String sortOrder, int numReducers, AcidUtils.Operation acidOp) throws SemanticException {
 
     RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-    
+
     Operator dummy = Operator.createDummy();
     dummy.setParentOperators(Arrays.asList(input));
 
@@ -7166,9 +7105,8 @@ public class SemanticAnalyzer extends Ba
 
     dummy.setParentOperators(null);
 
-    // TODO Not 100% sure NOT_ACID is always right here.
     ReduceSinkDesc rsdesc = PlanUtils.getReduceSinkDesc(sortCols, valueCols, outputColumns,
-        false, -1, partitionCols, sortOrder, numReducers, AcidUtils.Operation.NOT_ACID);
+        false, -1, partitionCols, sortOrder, numReducers, acidOp);
     Operator interim = putOpInsertMap(OperatorFactory.getAndMakeChild(rsdesc,
         new RowSchema(rsRR.getColumnInfos()), input), rsRR);
 
@@ -11894,13 +11832,8 @@ public class SemanticAnalyzer extends Ba
   void buildPTFReduceSinkDetails(PartitionedTableFunctionDef tabDef,
       RowResolver inputRR,
       ArrayList<ExprNodeDesc> partCols,
-      ArrayList<ExprNodeDesc> valueCols,
       ArrayList<ExprNodeDesc> orderCols,
-      Map<String, ExprNodeDesc> colExprMap,
-      List<String> outputColumnNames,
-      StringBuilder orderString,
-      RowResolver rsOpRR,
-      RowResolver extractRR) throws SemanticException {
+      StringBuilder orderString) throws SemanticException {
 
     List<PTFExpressionDef> partColList = tabDef.getPartition().getExpressions();
 
@@ -11928,68 +11861,6 @@ public class SemanticAnalyzer extends Ba
       }
       orderCols.add(colDef.getExprNode());
     }
-
-    ArrayList<ColumnInfo> colInfoList = inputRR.getColumnInfos();
-    /*
-     * construct the ReduceSinkRR
-     */
-    int pos = 0;
-    for (ColumnInfo colInfo : colInfoList) {
-        ExprNodeDesc valueColExpr = new ExprNodeColumnDesc(colInfo);
-        valueCols.add(valueColExpr);
-        String internalName = SemanticAnalyzer.getColumnInternalName(pos++);
-        outputColumnNames.add(internalName);
-        colExprMap.put(internalName, valueColExpr);
-
-        String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
-        ColumnInfo newColInfo = new ColumnInfo(
-            internalName, colInfo.getType(), alias[0],
-            colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
-        rsOpRR.put(alias[0], alias[1], newColInfo);
-    }
-
-    /*
-     * construct the ExtractRR
-     */
-    LinkedHashMap<String[], ColumnInfo> colsAddedByHaving =
-        new LinkedHashMap<String[], ColumnInfo>();
-    pos = 0;
-    for (ColumnInfo colInfo : colInfoList) {
-      String[] alias = inputRR.reverseLookup(colInfo.getInternalName());
-      /*
-       * if we have already encountered this colInfo internalName.
-       * We encounter it again because it must be put for the Having clause.
-       * We will add these entries in the end; in a loop on colsAddedByHaving. See below.
-       */
-      if ( colsAddedByHaving.containsKey(alias)) {
-        continue;
-      }
-      ASTNode astNode = PTFTranslator.getASTNode(colInfo, inputRR);
-      ColumnInfo eColInfo = new ColumnInfo(
-          SemanticAnalyzer.getColumnInternalName(pos++), colInfo.getType(), alias[0],
-          colInfo.getIsVirtualCol(), colInfo.isHiddenVirtualCol());
-
-      if ( astNode == null ) {
-        extractRR.put(alias[0], alias[1], eColInfo);
-      }
-      else {
-        /*
-         * in case having clause refers to this column may have been added twice;
-         * once with the ASTNode.toStringTree as the alias
-         * and then with the real alias.
-         */
-        extractRR.putExpression(astNode, eColInfo);
-        if ( !astNode.toStringTree().toLowerCase().equals(alias[1]) ) {
-          colsAddedByHaving.put(alias, eColInfo);
-        }
-      }
-    }
-
-    for(Map.Entry<String[], ColumnInfo> columnAddedByHaving : colsAddedByHaving.entrySet() ) {
-      String[] alias = columnAddedByHaving.getKey();
-      ColumnInfo eColInfo = columnAddedByHaving.getValue();
-      extractRR.put(alias[0], alias[1], eColInfo);
-    }
   }
 
   private Operator genPTFPlanForComponentQuery(PTFInvocationSpec ptfQSpec, Operator input)
@@ -12000,27 +11871,6 @@ public class SemanticAnalyzer extends Ba
     RowResolver rr = opParseCtx.get(input).getRowResolver();
     PTFDesc ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
 
-    RowResolver rsOpRR = new RowResolver();
-    /*
-     * Build an RR for the Extract Op from the ReduceSink Op's RR.
-     * Why?
-     * We need to remove the Virtual Columns present in the RS's RR. The OI
-     * that gets passed to Extract at runtime doesn't contain the Virtual Columns.
-     * So internal names get changed. Consider testCase testJoinWithLeadLag,
-     * which is a self join on part and also has a Windowing expression.
-     * The RR of the RS op at translation time looks something like this:
-     * (_co1,_col2,..,_col7, _col8(vc=true),_col9(vc=true),
-     * _col10,_col11,.._col15(vc=true),_col16(vc=true),..)
-     * At runtime the Virtual columns are removed and all the columns after _col7
-     * are shifted 1 or 2 positions.
-     * So in child Operators ColumnExprNodeDesc's are no longer referring to the right columns.
-     *
-     * So we build a new RR for the Extract Op, with the Virtual Columns removed.
-     * We hand this to the PTFTranslator as the
-     * starting RR to use to translate a PTF Chain.
-     */
-    RowResolver extractOpRR = new RowResolver();
-
     /*
      * 2. build Map-side Op Graph. Graph template is either:
      * Input -> PTF_map -> ReduceSink
@@ -12051,10 +11901,7 @@ public class SemanticAnalyzer extends Ba
        */
 
       ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
-      ArrayList<ExprNodeDesc> valueCols = new ArrayList<ExprNodeDesc>();
       ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
-      Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
-      List<String> outputColumnNames = new ArrayList<String>();
       StringBuilder orderString = new StringBuilder();
 
       /*
@@ -12063,45 +11910,20 @@ public class SemanticAnalyzer extends Ba
        * If the parent of ReduceSinkOperator is PTFOperator, use it's
        * output RR.
        */
-      buildPTFReduceSinkDetails(tabDef,
-          rr,
-          partCols,
-          valueCols,
-          orderCols,
-          colExprMap,
-          outputColumnNames,
-          orderString,
-          rsOpRR,
-          extractOpRR);
-
-      input = putOpInsertMap(OperatorFactory.getAndMakeChild(PlanUtils
-          .getReduceSinkDesc(orderCols,
-              valueCols, outputColumnNames, false,
-              -1, partCols, orderString.toString(), -1, AcidUtils.Operation.NOT_ACID),
-          new RowSchema(rsOpRR.getColumnInfos()), input), rsOpRR);
-      input.setColumnExprMap(colExprMap);
+      buildPTFReduceSinkDetails(tabDef, rr, partCols, orderCols, orderString);
+      input = genReduceSinkPlan(input, partCols, orderCols, orderString.toString(), -1, Operation.NOT_ACID);
     }
 
     /*
      * 3. build Reduce-side Op Graph
      */
     {
-      /*
-       * b. Construct Extract Operator.
-       */
-      input = putOpInsertMap(OperatorFactory.getAndMakeChild(
-          new ExtractDesc(
-              new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo,
-                  Utilities.ReduceField.VALUE
-                  .toString(), "", false)),
-          new RowSchema(extractOpRR.getColumnInfos()),
-          input), extractOpRR);
 
       /*
        * c. Rebuilt the QueryDef.
        * Why?
        * - so that the ExprNodeDescriptors in the QueryDef are based on the
-       *   Extract Operator's RowResolver
+       *   Select Operator's RowResolver
        */
       rr = opParseCtx.get(input).getRowResolver();
       ptfDesc = translatePTFInvocationSpec(ptfQSpec, rr);
@@ -12115,9 +11937,7 @@ public class SemanticAnalyzer extends Ba
           input), ptfOpRR);
 
     }
-
     return input;
-
   }
 
 //--------------------------- Windowing handling: PTFInvocationSpec to PTFDesc --------------------
@@ -12145,7 +11965,7 @@ public class SemanticAnalyzer extends Ba
 
   private Operator genReduceSinkPlanForWindowing(WindowingSpec spec,
       RowResolver inputRR, Operator input) throws SemanticException{
-    
+
     ArrayList<ExprNodeDesc> partCols = new ArrayList<ExprNodeDesc>();
     ArrayList<ExprNodeDesc> orderCols = new ArrayList<ExprNodeDesc>();
     StringBuilder order = new StringBuilder();
@@ -12169,7 +11989,7 @@ public class SemanticAnalyzer extends Ba
       }
     }
 
-    return genReduceSinkPlan(input, partCols, orderCols, order.toString(), -1);
+    return genReduceSinkPlan(input, partCols, orderCols, order.toString(), -1, Operation.NOT_ACID);
   }
 
   public static ArrayList<WindowExpressionSpec> parseSelect(String selectExprStr)

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/SelectDesc.java Mon Feb  2 06:41:02 2015
@@ -56,14 +56,6 @@ public class SelectDesc extends Abstract
     this.outputColumnNames = outputColumnNames;
   }
 
-  public SelectDesc(
-    final List<org.apache.hadoop.hive.ql.plan.ExprNodeDesc> colList,
-    final boolean selectStar, final boolean selStarNoCompute) {
-    this.colList = colList;
-    this.selectStar = selectStar;
-    this.selStarNoCompute = selStarNoCompute;
-  }
-
   @Override
   public Object clone() {
     SelectDesc ret = new SelectDesc();

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java Mon Feb  2 06:41:02 2015
@@ -38,7 +38,7 @@ import org.apache.hadoop.hive.ql.Windows
 import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
-import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
+import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -47,7 +47,6 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExtractDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
 import org.apache.hadoop.hive.ql.plan.FilterDesc;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
@@ -94,7 +93,7 @@ public class TestExecDriver extends Test
       tmppath = new Path(tmpdir);
 
       fs = FileSystem.get(conf);
-      if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDir()) {
+      if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDirectory()) {
         throw new RuntimeException(tmpdir + " exists but is not a directory");
       }
 
@@ -137,7 +136,7 @@ public class TestExecDriver extends Test
       for (String src : srctables) {
         db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true);
         db.createTable(src, cols, null, TextInputFormat.class,
-            IgnoreKeyTextOutputFormat.class);
+            HiveIgnoreKeyTextOutputFormat.class);
         db.loadTable(hadoopDataFile[i], src, false, false, true, false, false);
         i++;
       }
@@ -161,20 +160,19 @@ public class TestExecDriver extends Test
 
   private static void fileDiff(String datafile, String testdir) throws Exception {
     String testFileDir = conf.get("test.data.files");
-    FileInputStream fi_gold = new FileInputStream(new File(testFileDir,
-        datafile));
 
     // inbuilt assumption that the testdir has only one output file.
     Path di_test = new Path(tmppath, testdir);
     if (!fs.exists(di_test)) {
       throw new RuntimeException(tmpdir + File.separator + testdir + " does not exist");
     }
-    if (!fs.getFileStatus(di_test).isDir()) {
+    if (!fs.getFileStatus(di_test).isDirectory()) {
       throw new RuntimeException(tmpdir + File.separator + testdir + " is not a directory");
     }
     FSDataInputStream fi_test = fs.open((fs.listStatus(di_test))[0].getPath());
 
     boolean ignoreWhitespace = Shell.WINDOWS;
+    FileInputStream fi_gold = new FileInputStream(new File(testFileDir,datafile));
     if (!Utilities.contentsEqual(fi_gold, fi_test, ignoreWhitespace)) {
       LOG.error(di_test.toString() + " does not match " + datafile);
       assertEquals(false, true);
@@ -260,8 +258,11 @@ public class TestExecDriver extends Test
     Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
         + "mapredplan1.out"), Utilities.defaultTd, false));
 
-    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
-        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
+    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(HiveConf.getColumnInternalName(2));
+    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, colNames), op3);
 
     rWork.setReducer(op2);
   }
@@ -292,8 +293,10 @@ public class TestExecDriver extends Test
 
     Operator<FilterDesc> op3 = OperatorFactory.get(getTestFilterDesc("0"), op4);
 
-    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
-        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
+    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
+    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
 
     rWork.setReducer(op2);
   }
@@ -376,10 +379,10 @@ public class TestExecDriver extends Test
     // reduce side work
     Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
         + "mapredplan4.out"), Utilities.defaultTd, false));
-
-    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
-        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
-
+    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
+    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
     rWork.setReducer(op2);
   }
 
@@ -416,9 +419,10 @@ public class TestExecDriver extends Test
     Operator<FileSinkDesc> op3 = OperatorFactory.get(new FileSinkDesc(new Path(tmpdir + File.separator
         + "mapredplan5.out"), Utilities.defaultTd, false));
 
-    Operator<ExtractDesc> op2 = OperatorFactory.get(new ExtractDesc(
-        getStringColumn(Utilities.ReduceField.VALUE.toString())), op3);
-
+    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
+    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+    Operator<SelectDesc> op2 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op3);
     rWork.setReducer(op2);
   }
 
@@ -459,8 +463,10 @@ public class TestExecDriver extends Test
 
     Operator<FilterDesc> op2 = OperatorFactory.get(getTestFilterDesc("0"), op3);
 
-    Operator<ExtractDesc> op5 = OperatorFactory.get(new ExtractDesc(
-        getStringColumn(Utilities.ReduceField.VALUE.toString())), op2);
+    List<ExprNodeDesc> cols = new ArrayList<ExprNodeDesc>();
+    cols.add(getStringColumn(Utilities.ReduceField.KEY + ".reducesinkkey" + 0));
+    cols.add(getStringColumn(Utilities.ReduceField.VALUE.toString()+"."+outputColumns.get(1)));
+    Operator<SelectDesc> op5 = OperatorFactory.get(new SelectDesc(cols, outputColumns), op2);
 
     rWork.setReducer(op5);
   }

Modified: hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket1.q.out Mon Feb  2 06:41:02 2015
@@ -110,39 +110,37 @@ STAGE PLANS:
         /src [src]
       Needs Tagging: false
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              GlobalTableId: 1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 1
 #### A masked pattern was here ####
-              NumFilesPerFileSink: 1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count 100
-                    bucket_field_name key
-                    columns key,value
-                    columns.comments 
-                    columns.types int:string
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  bucket_count 100
+                  bucket_field_name key
+                  columns key,value
+                  columns.comments 
+                  columns.types int:string
 #### A masked pattern was here ####
-                    name default.bucket1_1
-                    serialization.ddl struct bucket1_1 { i32 key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name default.bucket1_1
+                  serialization.ddl struct bucket1_1 { i32 key, string value}
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.bucket1_1
-              TotalFiles: 1
-              GatherStats: true
-              MultiFileSpray: false
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.bucket1_1
+            TotalFiles: 1
+            GatherStats: true
+            MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket2.q.out Mon Feb  2 06:41:02 2015
@@ -110,39 +110,37 @@ STAGE PLANS:
         /src [src]
       Needs Tagging: false
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              GlobalTableId: 1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 1
 #### A masked pattern was here ####
-              NumFilesPerFileSink: 2
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            NumFilesPerFileSink: 2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count 2
-                    bucket_field_name key
-                    columns key,value
-                    columns.comments 
-                    columns.types int:string
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  bucket_count 2
+                  bucket_field_name key
+                  columns key,value
+                  columns.comments 
+                  columns.types int:string
 #### A masked pattern was here ####
-                    name default.bucket2_1
-                    serialization.ddl struct bucket2_1 { i32 key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name default.bucket2_1
+                  serialization.ddl struct bucket2_1 { i32 key, string value}
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.bucket2_1
-              TotalFiles: 2
-              GatherStats: true
-              MultiFileSpray: true
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.bucket2_1
+            TotalFiles: 2
+            GatherStats: true
+            MultiFileSpray: true
 
   Stage: Stage-0
     Move Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket3.q.out Mon Feb  2 06:41:02 2015
@@ -114,42 +114,40 @@ STAGE PLANS:
         /src [src]
       Needs Tagging: false
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              GlobalTableId: 1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 1
 #### A masked pattern was here ####
-              NumFilesPerFileSink: 2
-              Static Partition Specification: ds=1/
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            NumFilesPerFileSink: 2
+            Static Partition Specification: ds=1/
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    bucket_count 2
-                    bucket_field_name key
-                    columns key,value
-                    columns.comments 
-                    columns.types int:string
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  bucket_count 2
+                  bucket_field_name key
+                  columns key,value
+                  columns.comments 
+                  columns.types int:string
 #### A masked pattern was here ####
-                    name default.bucket3_1
-                    partition_columns ds
-                    partition_columns.types string
-                    serialization.ddl struct bucket3_1 { i32 key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name default.bucket3_1
+                  partition_columns ds
+                  partition_columns.types string
+                  serialization.ddl struct bucket3_1 { i32 key, string value}
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.bucket3_1
-              TotalFiles: 2
-              GatherStats: true
-              MultiFileSpray: true
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.bucket3_1
+            TotalFiles: 2
+            GatherStats: true
+            MultiFileSpray: true
 
   Stage: Stage-0
     Move Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket4.q.out Mon Feb  2 06:41:02 2015
@@ -107,40 +107,38 @@ STAGE PLANS:
         /src [src]
       Needs Tagging: false
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              GlobalTableId: 1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 1
 #### A masked pattern was here ####
-              NumFilesPerFileSink: 2
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            NumFilesPerFileSink: 2
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    SORTBUCKETCOLSPREFIX TRUE
-                    bucket_count 2
-                    bucket_field_name key
-                    columns key,value
-                    columns.comments 
-                    columns.types int:string
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  SORTBUCKETCOLSPREFIX TRUE
+                  bucket_count 2
+                  bucket_field_name key
+                  columns key,value
+                  columns.comments 
+                  columns.types int:string
 #### A masked pattern was here ####
-                    name default.bucket4_1
-                    serialization.ddl struct bucket4_1 { i32 key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name default.bucket4_1
+                  serialization.ddl struct bucket4_1 { i32 key, string value}
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.bucket4_1
-              TotalFiles: 2
-              GatherStats: true
-              MultiFileSpray: true
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.bucket4_1
+            TotalFiles: 2
+            GatherStats: true
+            MultiFileSpray: true
 
   Stage: Stage-0
     Move Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket5.q.out Mon Feb  2 06:41:02 2015
@@ -171,40 +171,38 @@ STAGE PLANS:
         /src [src]
       Needs Tagging: false
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-          Select Operator
-            expressions: UDFToInteger(_col0) (type: int), _col1 (type: string)
-            outputColumnNames: _col0, _col1
-            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-            File Output Operator
-              compressed: false
-              GlobalTableId: 1
+          File Output Operator
+            compressed: false
+            GlobalTableId: 1
 #### A masked pattern was here ####
-              NumFilesPerFileSink: 1
-              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            NumFilesPerFileSink: 1
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
-              table:
-                  input format: org.apache.hadoop.mapred.TextInputFormat
-                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
-                  properties:
-                    SORTBUCKETCOLSPREFIX TRUE
-                    bucket_count 2
-                    bucket_field_name key
-                    columns key,value
-                    columns.comments 
-                    columns.types int:string
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                properties:
+                  SORTBUCKETCOLSPREFIX TRUE
+                  bucket_count 2
+                  bucket_field_name key
+                  columns key,value
+                  columns.comments 
+                  columns.types int:string
 #### A masked pattern was here ####
-                    name default.bucketed_table
-                    serialization.ddl struct bucketed_table { i32 key, string value}
-                    serialization.format 1
-                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name default.bucketed_table
+                  serialization.ddl struct bucketed_table { i32 key, string value}
+                  serialization.format 1
+                  serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 #### A masked pattern was here ####
-                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-                  name: default.bucketed_table
-              TotalFiles: 1
-              GatherStats: true
-              MultiFileSpray: false
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                name: default.bucketed_table
+            TotalFiles: 1
+            GatherStats: true
+            MultiFileSpray: false
 
   Stage: Stage-0
     Move Operator

Modified: hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucket6.q.out Mon Feb  2 06:41:02 2015
@@ -33,9 +33,11 @@ STAGE PLANS:
                 sort order: +
                 Map-reduce partition columns: _col0 (type: string)
                 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col0 (type: string), _col1 (type: string)
+                value expressions: _col1 (type: string)
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false

Modified: hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out?rev=1656393&r1=1656392&r2=1656393&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out Mon Feb  2 06:41:02 2015
@@ -343,9 +343,11 @@ STAGE PLANS:
                 sort order: +
                 Map-reduce partition columns: _col0 (type: int)
                 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
-                value expressions: _col0 (type: int), _col1 (type: string)
+                value expressions: _col1 (type: string)
       Reduce Operator Tree:
-        Extract
+        Select Operator
+          expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
+          outputColumnNames: _col0, _col1
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false



Mime
View raw message