hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From br...@apache.org
Subject svn commit: r1659014 [11/25] - in /hive/branches/parquet: ./ beeline/src/java/org/apache/hive/beeline/ beeline/src/main/resources/ beeline/src/test/org/apache/hive/beeline/ bin/ common/src/java/org/apache/hadoop/hive/conf/ data/conf/ data/conf/spark/ d...
Date Wed, 11 Feb 2015 17:48:41 GMT
Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/optimizer/spark/SparkSortMergeJoinFactory.java Wed Feb 11 17:48:36 2015
@@ -1,20 +1,20 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.hadoop.hive.ql.optimizer.spark;
 
 import java.util.List;
@@ -36,8 +36,8 @@ import org.apache.hadoop.hive.ql.plan.Ma
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 
 /**
-* Operator factory for Spark SMBJoin processing.
-*/
+ * Operator factory for Spark SMBJoin processing.
+ */
 public final class SparkSortMergeJoinFactory {
 
   private SparkSortMergeJoinFactory() {
@@ -45,131 +45,79 @@ public final class SparkSortMergeJoinFac
   }
 
   /**
-   * Get the branch on which we are invoked (walking) from.  See diagram below.
-   * We are at the SMBJoinOp and could have come from TS of any of the input tables.
-   */
-  public static int getPositionParent(SMBMapJoinOperator op,
-      Stack<Node> stack) {
-    int size = stack.size();
-    assert size >= 2 && stack.get(size - 1) == op;
-    @SuppressWarnings("unchecked")
-    Operator<? extends OperatorDesc> parent =
-        (Operator<? extends OperatorDesc>) stack.get(size - 2);
-    List<Operator<? extends OperatorDesc>> parOp = op.getParentOperators();
-    int pos = parOp.indexOf(parent);
-    return pos;
-  }
-
-  /**
-   * SortMergeMapJoin processor, input is a SMBJoinOp that is part of a MapWork:
-   *
-   *  MapWork:
-   *
-   *   (Big)   (Small)  (Small)
-   *    TS       TS       TS
-   *     \       |       /
-   *       \     DS     DS
-   *         \   |    /
-   *          SMBJoinOP
+   * Annotate MapWork, input is a SMBJoinOp that is part of a MapWork, and its root TS operator.
    *
    * 1. Initializes the MapWork's aliasToWork, pointing to big-table's TS.
    * 2. Adds the bucketing information to the MapWork.
    * 3. Adds localwork to the MapWork, with localWork's aliasToWork pointing to small-table's TS.
+   * @param context proc walker context
+   * @param mapWork mapwork to annotate
+   * @param smbMapJoinOp SMB Map Join operator to get data
+   * @param ts Table Scan operator to get data
+   * @param local Whether ts is from a 'local' source (small-table that will be loaded by SMBJoin 'local' task)
    */
-  private static class SortMergeJoinProcessor implements NodeProcessor {
+  public static void annotateMapWork(GenSparkProcContext context, MapWork mapWork,
+    SMBMapJoinOperator smbMapJoinOp, TableScanOperator ts, boolean local)
+    throws SemanticException {
+    initSMBJoinPlan(context, mapWork, ts, local);
+    setupBucketMapJoinInfo(mapWork, smbMapJoinOp);
+  }
 
-    public static void setupBucketMapJoinInfo(MapWork plan, SMBMapJoinOperator currMapJoinOp) {
-      if (currMapJoinOp != null) {
-        Map<String, Map<String, List<String>>> aliasBucketFileNameMapping =
-            currMapJoinOp.getConf().getAliasBucketFileNameMapping();
-        if (aliasBucketFileNameMapping != null) {
-          MapredLocalWork localPlan = plan.getMapRedLocalWork();
-          if (localPlan == null) {
-            localPlan = currMapJoinOp.getConf().getLocalWork();
-          } else {
-            // local plan is not null, we want to merge it into SMBMapJoinOperator's local work
-            MapredLocalWork smbLocalWork = currMapJoinOp.getConf().getLocalWork();
-            if (smbLocalWork != null) {
-              localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
-              localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
-            }
+  private static void setupBucketMapJoinInfo(MapWork plan, SMBMapJoinOperator currMapJoinOp) {
+    if (currMapJoinOp != null) {
+      Map<String, Map<String, List<String>>> aliasBucketFileNameMapping =
+        currMapJoinOp.getConf().getAliasBucketFileNameMapping();
+      if (aliasBucketFileNameMapping != null) {
+        MapredLocalWork localPlan = plan.getMapRedLocalWork();
+        if (localPlan == null) {
+          localPlan = currMapJoinOp.getConf().getLocalWork();
+        } else {
+          // local plan is not null, we want to merge it into SMBMapJoinOperator's local work
+          MapredLocalWork smbLocalWork = currMapJoinOp.getConf().getLocalWork();
+          if (smbLocalWork != null) {
+            localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
+            localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
           }
+        }
 
-          if (localPlan == null) {
-            return;
-          }
-          plan.setMapRedLocalWork(null);
-          currMapJoinOp.getConf().setLocalWork(localPlan);
+        if (localPlan == null) {
+          return;
+        }
+        plan.setMapRedLocalWork(null);
+        currMapJoinOp.getConf().setLocalWork(localPlan);
 
-          BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
-          localPlan.setBucketMapjoinContext(bucketMJCxt);
-          bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
-          bucketMJCxt.setBucketFileNameMapping(
-              currMapJoinOp.getConf().getBigTableBucketNumMapping());
-          localPlan.setInputFileChangeSensitive(true);
-          bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
-          bucketMJCxt
-              .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
-          bucketMJCxt.setBigTablePartSpecToFileMapping(
-              currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
+        BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
+        localPlan.setBucketMapjoinContext(bucketMJCxt);
+        bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
+        bucketMJCxt.setBucketFileNameMapping(
+          currMapJoinOp.getConf().getBigTableBucketNumMapping());
+        localPlan.setInputFileChangeSensitive(true);
+        bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
+        bucketMJCxt
+          .setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
+        bucketMJCxt.setBigTablePartSpecToFileMapping(
+          currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
 
-          plan.setUseBucketizedHiveInputFormat(true);
+        plan.setUseBucketizedHiveInputFormat(true);
 
-        }
       }
     }
+  }
 
-    /**
-     * Initialize the mapWork.
-     *
-     * @param opProcCtx
-     *          processing context
-     */
-    private static void initSMBJoinPlan(MapWork mapWork,
-                                        GenSparkProcContext opProcCtx, boolean local)
-            throws SemanticException {
-      TableScanOperator ts = (TableScanOperator) opProcCtx.currentRootOperator;
-      String currAliasId = findAliasId(opProcCtx, ts);
-      GenMapRedUtils.setMapWork(mapWork, opProcCtx.parseContext,
-         opProcCtx.inputs, null, ts, currAliasId, opProcCtx.conf, local);
-    }
+  private static void initSMBJoinPlan(GenSparkProcContext opProcCtx,
+    MapWork mapWork, TableScanOperator currentRootOperator, boolean local)
+    throws SemanticException {
+    String currAliasId = findAliasId(opProcCtx, currentRootOperator);
+    GenMapRedUtils.setMapWork(mapWork, opProcCtx.parseContext,
+      opProcCtx.inputs, null, currentRootOperator, currAliasId, opProcCtx.conf, local);
+  }
 
-    private static String findAliasId(GenSparkProcContext opProcCtx, TableScanOperator ts) {
-      for (String alias : opProcCtx.topOps.keySet()) {
-        if (opProcCtx.topOps.get(alias) == ts) {
-          return alias;
-        }
+  private static String findAliasId(GenSparkProcContext opProcCtx, TableScanOperator ts) {
+    for (String alias : opProcCtx.topOps.keySet()) {
+      if (opProcCtx.topOps.get(alias) == ts) {
+        return alias;
       }
-      return null;
     }
-
-    /**
-     * 1. Initializes the MapWork's aliasToWork, pointing to big-table's TS.
-     * 2. Adds the bucketing information to the MapWork.
-     * 3. Adds localwork to the MapWork, with localWork's aliasToWork pointing to small-table's TS.
-     */
-    @Override
-    public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
-        Object... nodeOutputs) throws SemanticException {
-      SMBMapJoinOperator mapJoin = (SMBMapJoinOperator) nd;
-      GenSparkProcContext ctx = (GenSparkProcContext) procCtx;
-
-      // find the branch on which this processor was invoked
-      int pos = getPositionParent(mapJoin, stack);
-      boolean local = pos != mapJoin.getConf().getPosBigTable();
-
-      MapWork mapWork = ctx.smbJoinWorkMap.get(mapJoin);
-      initSMBJoinPlan(mapWork, ctx, local);
-
-      // find the associated mapWork that contains this processor.
-      setupBucketMapJoinInfo(mapWork, mapJoin);
-
-      // local aliases need not to hand over context further
-      return false;
-    }
-  }
-
-  public static NodeProcessor getTableScanMapJoin() {
-    return new SortMergeJoinProcessor();
+    return null;
   }
 }

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java Wed Feb 11 17:48:36 2015
@@ -78,12 +78,14 @@ import org.apache.calcite.rel.type.RelDa
 import org.apache.calcite.rel.type.RelDataTypeFactory;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexExecutorImpl;
 import org.apache.calcite.rex.RexFieldCollation;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.rex.RexWindowBound;
 import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.Schemas;
 import org.apache.calcite.sql.SqlAggFunction;
 import org.apache.calcite.sql.SqlCall;
 import org.apache.calcite.sql.SqlExplainLevel;
@@ -716,7 +718,7 @@ public class CalcitePlanner extends Sema
       hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE);
       hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE);
       hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE);
-      hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE);
+      hepPgmBldr.addRuleInstance(ProjectRemoveRule.NAME_CALC_INSTANCE);
       hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE);
 
       hepPgm = hepPgmBldr.build();
@@ -790,7 +792,7 @@ public class CalcitePlanner extends Sema
       RelFieldTrimmer fieldTrimmer = new RelFieldTrimmer(null, HiveProject.DEFAULT_PROJECT_FACTORY,
           HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY,
           RelFactories.DEFAULT_SEMI_JOIN_FACTORY, HiveSort.HIVE_SORT_REL_FACTORY,
-          HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY);
+          HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY, true);
       basePlan = fieldTrimmer.trim(basePlan);
 
       // 5. Rerun PPD through Project as column pruning would have introduced DT
@@ -833,6 +835,11 @@ public class CalcitePlanner extends Sema
       basePlan.getCluster().setMetadataProvider(
           new CachingRelMetadataProvider(chainedProvider, planner));
 
+      // Executor is required for constant-reduction rules; see [CALCITE-566]
+      final RexExecutorImpl executor =
+          new RexExecutorImpl(Schemas.createDataContext(null));
+      basePlan.getCluster().getPlanner().setExecutor(executor);
+
       planner.setRoot(basePlan);
       optimizedRelNode = planner.findBestExp();
 

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Feb 11 17:48:36 2015
@@ -3038,7 +3038,8 @@ public class DDLSemanticAnalyzer extends
 
     for (Entry<String, String> e : partSpec.entrySet()) {
       for (String s : reservedPartitionValues) {
-        if (e.getValue().contains(s)) {
+        String value = e.getValue();
+        if (value != null && value.contains(s)) {
           throw new SemanticException(ErrorMsg.RESERVED_PART_VAL.getMsg(
               "(User value: " + e.getValue() + " Reserved substring: " + s + ")"));
         }

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Wed Feb 11 17:48:36 2015
@@ -85,7 +85,8 @@ TOK_ORDERBY;
 TOK_CLUSTERBY;
 TOK_DISTRIBUTEBY;
 TOK_SORTBY;
-TOK_UNION;
+TOK_UNIONALL;
+TOK_UNIONDISTINCT;
 TOK_JOIN;
 TOK_LEFTOUTERJOIN;
 TOK_RIGHTOUTERJOIN;
@@ -612,10 +613,12 @@ import java.util.HashMap;
 
   // counter to generate unique union aliases
   private int aliasCounter;
-  
   private String generateUnionAlias() {
     return "_u" + (++aliasCounter);
   }
+  private CommonTree throwSetOpException() throws RecognitionException {
+    throw new FailedPredicateException(input, "orderByClause clusterByClause distributeByClause sortByClause limitClause can only be applied to the whole union.", "");
+  }
 }
 
 @rulecatch {
@@ -2043,7 +2046,8 @@ unionType
 setOperator
 @init { pushMsg("set operator", state); }
 @after { popMsg(state); }
-    : KW_UNION KW_ALL -> ^(TOK_UNION)
+    : KW_UNION KW_ALL -> ^(TOK_UNIONALL)
+    | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT)
     ;
 
 queryStatementExpression[boolean topLevel]
@@ -2131,41 +2135,98 @@ regularBody[boolean topLevel]
    selectStatement[topLevel]
    ;
 
- selectStatement[boolean topLevel]
- : (singleSelectStatement -> singleSelectStatement)
-   (u=setOperator b=singleSelectStatement
-       -> ^($u {$selectStatement.tree} $b)
-   )*
-   -> {u != null && topLevel}? ^(TOK_QUERY
-         ^(TOK_FROM
-           ^(TOK_SUBQUERY
-             {$selectStatement.tree}
+selectStatement[boolean topLevel]
+   :
+   (
+   s=selectClause
+   f=fromClause?
+   w=whereClause?
+   g=groupByClause?
+   h=havingClause?
+   o=orderByClause?
+   c=clusterByClause?
+   d=distributeByClause?
+   sort=sortByClause?
+   win=window_clause?
+   l=limitClause?
+   -> ^(TOK_QUERY $f? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
+                     $s $w? $g? $h? $o? $c?
+                     $d? $sort? $win? $l?))
+   )
+   (set=setOpSelectStatement[$selectStatement.tree, topLevel])?
+   -> {set == null}?
+      {$selectStatement.tree}
+   -> {o==null && c==null && d==null && sort==null && l==null}?
+      {$set.tree}
+   -> {throwSetOpException()}
+   ;
+
+setOpSelectStatement[CommonTree t, boolean topLevel]
+   :
+   (u=setOperator b=simpleSelectStatement
+   -> {$setOpSelectStatement.tree != null && u.tree.getType()==HiveParser.TOK_UNIONDISTINCT}?
+      ^(TOK_QUERY
+          ^(TOK_FROM
+            ^(TOK_SUBQUERY
+              ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b)
               {adaptor.create(Identifier, generateUnionAlias())}
              )
           )
-         ^(TOK_INSERT 
+          ^(TOK_INSERT
+             ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
+             ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
+          )
+       )
+   -> {$setOpSelectStatement.tree != null && u.tree.getType()!=HiveParser.TOK_UNIONDISTINCT}?
+      ^(TOK_UNIONALL {$setOpSelectStatement.tree} $b)
+   -> {$setOpSelectStatement.tree == null && u.tree.getType()==HiveParser.TOK_UNIONDISTINCT}?
+      ^(TOK_QUERY
+          ^(TOK_FROM
+            ^(TOK_SUBQUERY
+              ^(TOK_UNIONALL {$t} $b)
+              {adaptor.create(Identifier, generateUnionAlias())}
+             )
+           )
+          ^(TOK_INSERT
             ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
-            ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))
+            ^(TOK_SELECTDI ^(TOK_SELEXPR TOK_ALLCOLREF))
+         )
+       )
+   -> ^(TOK_UNIONALL {$t} $b)
+   )+
+   o=orderByClause?
+   c=clusterByClause?
+   d=distributeByClause?
+   sort=sortByClause?
+   win=window_clause?
+   l=limitClause?
+   -> {o==null && c==null && d==null && sort==null && win==null && l==null && !topLevel}?
+      {$setOpSelectStatement.tree}
+   -> ^(TOK_QUERY
+          ^(TOK_FROM
+            ^(TOK_SUBQUERY
+              {$setOpSelectStatement.tree}
+              {adaptor.create(Identifier, generateUnionAlias())}
+             )
           )
-        )
-    -> {$selectStatement.tree}
- ;
+          ^(TOK_INSERT
+             ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
+             ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))
+             $o? $c? $d? $sort? $win? $l?
+          )
+       )
+   ;
 
-singleSelectStatement
+simpleSelectStatement
    :
    selectClause
    fromClause?
    whereClause?
    groupByClause?
    havingClause?
-   orderByClause?
-   clusterByClause?
-   distributeByClause?
-   sortByClause?
    window_clause?
-   limitClause? -> ^(TOK_QUERY fromClause? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
-                     selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause?
-                     distributeByClause? sortByClause? window_clause? limitClause?))
+   -> ^(TOK_QUERY fromClause? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE))
+                     selectClause whereClause? groupByClause? havingClause? window_clause?))
    ;
 
 selectStatementWithCTE

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g Wed Feb 11 17:48:36 2015
@@ -572,5 +572,5 @@ principalIdentifier
 
 nonReserved
     :
-    KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | 
 KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_AN
 ALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES | KW_URI | KW_SERVER
+    KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | 
 KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_
 BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES | KW_URI | KW_SERVER
     ;

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java Wed Feb 11 17:48:36 2015
@@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.Context
 import org.apache.hadoop.hive.ql.QueryProperties;
 import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
 import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
@@ -83,7 +82,6 @@ public class ParseContext {
   private List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer; // list of map join
   // operators with no
   // reducer
-  private Map<GroupByOperator, Set<String>> groupOpToInputTables;
   private Map<String, PrunedPartitionList> prunedPartitions;
   private Map<String, ReadEntity> viewAliasToInput;
 
@@ -153,7 +151,6 @@ public class ParseContext {
       List<LoadTableDesc> loadTableWork, List<LoadFileDesc> loadFileWork,
       Context ctx, HashMap<String, String> idToTableNameMap, int destTableId,
       UnionProcContext uCtx, List<AbstractMapJoinOperator<? extends MapJoinDesc>> listMapJoinOpsNoReducer,
-      Map<GroupByOperator, Set<String>> groupOpToInputTables,
       Map<String, PrunedPartitionList> prunedPartitions,
       HashMap<TableScanOperator, sampleDesc> opToSamplePruner,
       GlobalLimitCtx globalLimitCtx,
@@ -177,7 +174,6 @@ public class ParseContext {
     this.destTableId = destTableId;
     this.uCtx = uCtx;
     this.listMapJoinOpsNoReducer = listMapJoinOpsNoReducer;
-    this.groupOpToInputTables = groupOpToInputTables;
     this.prunedPartitions = prunedPartitions;
     this.opToSamplePruner = opToSamplePruner;
     this.nameToSplitSample = nameToSplitSample;
@@ -391,21 +387,6 @@ public class ParseContext {
   }
 
   /**
-   * @return the groupOpToInputTables
-   */
-  public Map<GroupByOperator, Set<String>> getGroupOpToInputTables() {
-    return groupOpToInputTables;
-  }
-
-  /**
-   * @param groupOpToInputTables
-   */
-  public void setGroupOpToInputTables(
-      Map<GroupByOperator, Set<String>> groupOpToInputTables) {
-    this.groupOpToInputTables = groupOpToInputTables;
-  }
-
-  /**
    * @return pruned partition map
    */
   public Map<String, PrunedPartitionList> getPrunedPartitions() {

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/ProcessAnalyzeTable.java Wed Feb 11 17:48:36 2015
@@ -95,16 +95,25 @@ public class ProcessAnalyzeTable impleme
       assert alias != null;
 
       TezWork tezWork = context.currentTask.getWork();
-      boolean partialScan = parseInfo.isPartialScanAnalyzeCommand();
-      boolean noScan = parseInfo.isNoScanAnalyzeCommand();
-      if (inputFormat.equals(OrcInputFormat.class) && (noScan || partialScan)) {
-
+      if (inputFormat.equals(OrcInputFormat.class)) {
+        // For ORC, all the following statements are the same
+        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
         // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
         // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
+
         // There will not be any Tez job above this task
         StatsNoJobWork snjWork = new StatsNoJobWork(parseContext.getQB().getParseInfo().getTableSpec());
         snjWork.setStatsReliable(parseContext.getConf().getBoolVar(
             HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+        // If partition is specified, get pruned partition list
+        Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(parseInfo);
+        if (confirmedParts.size() > 0) {
+          Table source = parseContext.getQB().getMetaData().getTableForAlias(alias);
+          List<String> partCols = GenMapRedUtils.getPartitionColumns(parseInfo);
+          PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts,
+              partCols, false);
+          snjWork.setPrunedPartitionList(partList);
+        }
         Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
         snjTask.setParentTasks(null);
         context.rootTasks.remove(context.currentTask);

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java Wed Feb 11 17:48:36 2015
@@ -465,4 +465,15 @@ public class RowResolver implements Seri
     }
     return combinedRR;
   }
+
+  public RowResolver duplicate() {
+    RowResolver resolver = new RowResolver();
+    resolver.rowSchema = new RowSchema(rowSchema);
+    resolver.rslvMap.putAll(rslvMap);
+    resolver.invRslvMap.putAll(invRslvMap);
+    resolver.altInvRslvMap.putAll(altInvRslvMap);
+    resolver.expressionMap.putAll(expressionMap);
+    resolver.isExprResolver = isExprResolver;
+    return resolver;
+  }
 }

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Feb 11 17:48:36 2015
@@ -230,7 +230,7 @@ public class SemanticAnalyzer extends Ba
   private HashMap<TableScanOperator, PrunedPartitionList> opToPartList;
   private HashMap<String, Operator<? extends OperatorDesc>> topOps;
   private final HashMap<String, Operator<? extends OperatorDesc>> topSelOps;
-  private LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx;
+  private final LinkedHashMap<Operator<? extends OperatorDesc>, OpParseContext> opParseCtx;
   private List<LoadTableDesc> loadTableWork;
   private List<LoadFileDesc> loadFileWork;
   private final Map<JoinOperator, QBJoinTree> joinContext;
@@ -389,7 +389,6 @@ public class SemanticAnalyzer extends Ba
     uCtx = pctx.getUCtx();
     listMapJoinOpsNoReducer = pctx.getListMapJoinOpsNoReducer();
     qb = pctx.getQB();
-    groupOpToInputTables = pctx.getGroupOpToInputTables();
     prunedPartitions = pctx.getPrunedPartitions();
     fetchTask = pctx.getFetchTask();
     setLineageInfo(pctx.getLineageInfo());
@@ -400,7 +399,7 @@ public class SemanticAnalyzer extends Ba
         new HashSet<JoinOperator>(joinContext.keySet()),
         new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
         loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
-        listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions,
+        listMapJoinOpsNoReducer, prunedPartitions,
         opToSamplePruner, globalLimitCtx, nameToSplitSample, inputs, rootTasks,
         opToPartToSkewedPruner, viewAliasToInput,
         reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties);
@@ -421,7 +420,7 @@ public class SemanticAnalyzer extends Ba
       qbexpr.setQB(qb);
     }
       break;
-    case HiveParser.TOK_UNION: {
+    case HiveParser.TOK_UNIONALL: {
       qbexpr.setOpcode(QBExpr.Opcode.UNION);
       // query 1
       assert (ast.getChild(0) != null);
@@ -431,7 +430,7 @@ public class SemanticAnalyzer extends Ba
       qbexpr.setQBExpr1(qbexpr1);
 
       // query 2
-      assert (ast.getChild(0) != null);
+      assert (ast.getChild(1) != null);
       QBExpr qbexpr2 = new QBExpr(alias + "-subquery2");
       doPhase1QBExpr((ASTNode) ast.getChild(1), qbexpr2, id + "-subquery2",
           alias + "-subquery2");
@@ -1325,7 +1324,7 @@ public class SemanticAnalyzer extends Ba
 
         break;
 
-      case HiveParser.TOK_UNION:
+      case HiveParser.TOK_UNIONALL:
         if (!qbp.getIsSubQ()) {
           // this shouldn't happen. The parser should have converted the union to be
           // contained in a subquery. Just in case, we keep the error as a fallback.
@@ -4171,8 +4170,6 @@ public class SemanticAnalyzer extends Ba
    * @param genericUDAFEvaluators
    *          The mapping from Aggregation StringTree to the
    *          genericUDAFEvaluator.
-   * @param distPartAggr
-   *          partial aggregation for distincts
    * @param groupingSets
    *          list of grouping sets
    * @param groupingSetsPresent
@@ -4185,7 +4182,6 @@ public class SemanticAnalyzer extends Ba
   private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo,
       String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode,
       Map<String, GenericUDAFEvaluator> genericUDAFEvaluators,
-      boolean distPartAgg,
       List<Integer> groupingSets,
       boolean groupingSetsPresent,
       boolean groupingSetsNeedAdditionalMRJob) throws SemanticException {
@@ -4284,8 +4280,7 @@ public class SemanticAnalyzer extends Ba
       // Otherwise, we look for b+c.
       // For distincts, partial aggregation is never performed on the client
       // side, so always look for the parameters: d+e
-      boolean partialAggDone = !(distPartAgg || isDistinct);
-      if (!partialAggDone) {
+      if (isDistinct) {
         // 0 is the function name
         for (int i = 1; i < value.getChildCount(); i++) {
           ASTNode paraExpr = (ASTNode) value.getChild(i);
@@ -4304,7 +4299,6 @@ public class SemanticAnalyzer extends Ba
             paraExpression = Utilities.ReduceField.KEY.name() + "." +
                 lastKeyColName + ":" + numDistinctUDFs + "."
                 + getColumnInternalName(i - 1);
-
           }
 
           ExprNodeDesc expr = new ExprNodeColumnDesc(paraExprInfo.getType(),
@@ -4317,9 +4311,7 @@ public class SemanticAnalyzer extends Ba
             // this parameter is a constant
             expr = reduceValue;
           }
-
           aggParameters.add(expr);
-
         }
       } else {
         ColumnInfo paraExprInfo = groupByInputRowResolver.getExpression(value);
@@ -4335,19 +4327,11 @@ public class SemanticAnalyzer extends Ba
       if (isDistinct) {
         numDistinctUDFs++;
       }
-      boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR;
+
       Mode amode = groupByDescModeToUDAFMode(mode, isDistinct);
       GenericUDAFEvaluator genericUDAFEvaluator = null;
-      // For distincts, partial aggregations have not been done
-      if (distPartAgg) {
-        genericUDAFEvaluator = getGenericUDAFEvaluator(aggName, aggParameters,
-            value, isDistinct, isAllColumns);
-        assert (genericUDAFEvaluator != null);
-        genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator);
-      } else {
-        genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
-        assert (genericUDAFEvaluator != null);
-      }
+      genericUDAFEvaluator = genericUDAFEvaluators.get(entry.getKey());
+      assert (genericUDAFEvaluator != null);
 
       GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode,
           aggParameters);
@@ -4371,7 +4355,7 @@ public class SemanticAnalyzer extends Ba
     // additional rows corresponding to grouping sets need to be created here.
     Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild(
         new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations,
-            distPartAgg, groupByMemoryUsage, memoryThreshold,
+            groupByMemoryUsage, memoryThreshold,
             groupingSets,
             groupingSetsPresent && groupingSetsNeedAdditionalMRJob,
             groupingSetsPosition, containsDistinctAggr),
@@ -5216,7 +5200,7 @@ public class SemanticAnalyzer extends Ba
 
     // insert a select operator here used by the ColumnPruner to reduce
     // the data to shuffle
-    Operator select = insertSelectAllPlanForGroupBy(selectInput);
+    Operator select = genSelectAllDesc(selectInput);
 
     // Generate ReduceSinkOperator
     ReduceSinkOperator reduceSinkOperatorInfo =
@@ -5260,64 +5244,6 @@ public class SemanticAnalyzer extends Ba
   }
 
   /**
-   * Generate a Multi Group-By plan using a 2 map-reduce jobs.
-   *
-   * @param dest
-   * @param qb
-   * @param input
-   * @return
-   * @throws SemanticException
-   *
-   *           Generate a Group-By plan using a 2 map-reduce jobs. Spray by the
-   *           distinct key in hope of getting a uniform distribution, and
-   *           compute partial aggregates by the grouping key. Evaluate partial
-   *           aggregates first, and spray by the grouping key to compute actual
-   *           aggregates in the second phase. The aggregation evaluation
-   *           functions are as follows: Partitioning Key: distinct key
-   *
-   *           Sorting Key: distinct key
-   *
-   *           Reducer: iterate/terminatePartial (mode = PARTIAL1)
-   *
-   *           STAGE 2
-   *
-   *           Partitioning Key: grouping key
-   *
-   *           Sorting Key: grouping key
-   *
-   *           Reducer: merge/terminate (mode = FINAL)
-   */
-  @SuppressWarnings("nls")
-  private Operator genGroupByPlan2MRMultiGroupBy(String dest, QB qb,
-      Operator input) throws SemanticException {
-
-    // ////// Generate GroupbyOperator for a map-side partial aggregation
-    Map<String, GenericUDAFEvaluator> genericUDAFEvaluators =
-        new LinkedHashMap<String, GenericUDAFEvaluator>();
-
-    QBParseInfo parseInfo = qb.getParseInfo();
-
-    // ////// 2. Generate GroupbyOperator
-    Operator groupByOperatorInfo = genGroupByPlanGroupByOperator1(parseInfo,
-        dest, input, GroupByDesc.Mode.HASH, genericUDAFEvaluators, true,
-        null, false, false);
-
-    int numReducers = -1;
-    List<ASTNode> grpByExprs = getGroupByForClause(parseInfo, dest);
-
-    // ////// 3. Generate ReduceSinkOperator2
-    Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator2MR(
-        parseInfo, dest, groupByOperatorInfo, grpByExprs.size(), numReducers, false);
-
-    // ////// 4. Generate GroupbyOperator2
-    Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator2MR(parseInfo,
-        dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL,
-        genericUDAFEvaluators, false);
-
-    return groupByOperatorInfo2;
-  }
-
-  /**
    * Generate a Group-By plan using a 2 map-reduce jobs (5 operators will be
    * inserted):
    *
@@ -5641,7 +5567,7 @@ public class SemanticAnalyzer extends Ba
       // on the reducer.
       return genGroupByPlanGroupByOperator1(parseInfo, dest,
           reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL,
-          genericUDAFEvaluators, false,
+          genericUDAFEvaluators,
           groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob);
     }
     else
@@ -5654,7 +5580,7 @@ public class SemanticAnalyzer extends Ba
       Operator groupByOperatorInfo2 =
           genGroupByPlanGroupByOperator1(parseInfo, dest,
               reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS,
-              genericUDAFEvaluators, false,
+              genericUDAFEvaluators,
               groupingSets, groupingSetsPresent, groupingSetsNeedAdditionalMRJob);
 
       // ////// Generate ReduceSinkOperator2
@@ -5785,7 +5711,7 @@ public class SemanticAnalyzer extends Ba
       // ////// Generate GroupbyOperator for a partial aggregation
       Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo,
           dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS,
-          genericUDAFEvaluators, false,
+          genericUDAFEvaluators,
           groupingSets, groupingSetsPresent, false);
 
       int numReducers = -1;
@@ -8398,8 +8324,7 @@ public class SemanticAnalyzer extends Ba
     return type;
   }
 
-  private Operator insertSelectAllPlanForGroupBy(Operator input)
-      throws SemanticException {
+  private Operator genSelectAllDesc(Operator input) throws SemanticException {
     OpParseContext inputCtx = opParseCtx.get(input);
     RowResolver inputRR = inputCtx.getRowResolver();
     ArrayList<ColumnInfo> columns = inputRR.getColumnInfos();
@@ -8413,173 +8338,14 @@ public class SemanticAnalyzer extends Ba
       columnNames.add(col.getInternalName());
       columnExprMap.put(col.getInternalName(), new ExprNodeColumnDesc(col));
     }
+    RowResolver outputRR = inputRR.duplicate();
     Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild(
-        new SelectDesc(colList, columnNames, true), new RowSchema(inputRR
-            .getColumnInfos()), input), inputRR);
+        new SelectDesc(colList, columnNames, true), 
+        outputRR.getRowSchema(), input), outputRR);
     output.setColumnExprMap(columnExprMap);
     return output;
   }
 
-  // Return the common distinct expression
-  // There should be more than 1 destination, with group bys in all of them.
-  private List<ASTNode> getCommonDistinctExprs(QB qb, Operator input) {
-    QBParseInfo qbp = qb.getParseInfo();
-    // If a grouping set aggregation is present, common processing is not possible
-    if (!qbp.getDestCubes().isEmpty() || !qbp.getDestRollups().isEmpty()
-        || !qbp.getDestToLateralView().isEmpty()) {
-      return null;
-    }
-
-    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-    TreeSet<String> ks = new TreeSet<String>();
-    ks.addAll(qbp.getClauseNames());
-
-    // Go over all the destination tables
-    if (ks.size() <= 1) {
-      return null;
-    }
-
-    List<ExprNodeDesc> oldList = null;
-    List<ASTNode> oldASTList = null;
-
-    for (String dest : ks) {
-      // If a filter is present, common processing is not possible
-      if (qbp.getWhrForClause(dest) != null) {
-        return null;
-      }
-
-      if (qbp.getAggregationExprsForClause(dest).size() == 0
-          && getGroupByForClause(qbp, dest).size() == 0) {
-        return null;
-      }
-
-      // All distinct expressions must be the same
-      List<ASTNode> list = qbp.getDistinctFuncExprsForClause(dest);
-      if (list.isEmpty()) {
-        return null;
-      }
-
-      List<ExprNodeDesc> currDestList;
-      try {
-        currDestList = getDistinctExprs(qbp, dest, inputRR);
-      } catch (SemanticException e) {
-        return null;
-      }
-
-      List<ASTNode> currASTList = new ArrayList<ASTNode>();
-      for (ASTNode value : list) {
-        // 0 is function name
-        for (int i = 1; i < value.getChildCount(); i++) {
-          ASTNode parameter = (ASTNode) value.getChild(i);
-          currASTList.add(parameter);
-        }
-        if (oldList == null) {
-          oldList = currDestList;
-          oldASTList = currASTList;
-        } else {
-          if (!matchExprLists(oldList, currDestList)) {
-            return null;
-          }
-        }
-      }
-    }
-
-    return oldASTList;
-  }
-
-  private Operator createCommonReduceSink(QB qb, Operator input)
-      throws SemanticException {
-    // Go over all the tables and extract the common distinct key
-    List<ASTNode> distExprs = getCommonDistinctExprs(qb, input);
-
-    QBParseInfo qbp = qb.getParseInfo();
-    TreeSet<String> ks = new TreeSet<String>();
-    ks.addAll(qbp.getClauseNames());
-
-    // Pass the entire row
-    RowResolver inputRR = opParseCtx.get(input).getRowResolver();
-    RowResolver reduceSinkOutputRowResolver = new RowResolver();
-    reduceSinkOutputRowResolver.setIsExprResolver(true);
-    ArrayList<ExprNodeDesc> reduceKeys = new ArrayList<ExprNodeDesc>();
-    ArrayList<ExprNodeDesc> reduceValues = new ArrayList<ExprNodeDesc>();
-    Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
-
-    // Pre-compute distinct group-by keys and store in reduceKeys
-
-    List<String> outputColumnNames = new ArrayList<String>();
-    for (ASTNode distn : distExprs) {
-      ExprNodeDesc distExpr = genExprNodeDesc(distn, inputRR);
-      if (reduceSinkOutputRowResolver.getExpression(distn) == null) {
-        reduceKeys.add(distExpr);
-        outputColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
-        String field = Utilities.ReduceField.KEY.toString() + "."
-            + getColumnInternalName(reduceKeys.size() - 1);
-        ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
-            reduceKeys.size() - 1).getTypeInfo(), "", false);
-        reduceSinkOutputRowResolver.putExpression(distn, colInfo);
-        colExprMap.put(colInfo.getInternalName(), distExpr);
-      }
-    }
-
-    // Go over all the grouping keys and aggregations
-    for (String dest : ks) {
-
-      List<ASTNode> grpByExprs = getGroupByForClause(qbp, dest);
-      for (int i = 0; i < grpByExprs.size(); ++i) {
-        ASTNode grpbyExpr = grpByExprs.get(i);
-
-        if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) {
-          ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, inputRR);
-          reduceValues.add(grpByExprNode);
-          String field = Utilities.ReduceField.VALUE.toString() + "."
-              + getColumnInternalName(reduceValues.size() - 1);
-          ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(
-              reduceValues.size() - 1).getTypeInfo(), "", false);
-          reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
-          outputColumnNames.add(getColumnInternalName(reduceValues.size() - 1));
-          colExprMap.put(field, grpByExprNode);
-        }
-      }
-
-      // For each aggregation
-      HashMap<String, ASTNode> aggregationTrees = qbp
-          .getAggregationExprsForClause(dest);
-      assert (aggregationTrees != null);
-
-      for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
-        ASTNode value = entry.getValue();
-
-        // 0 is the function name
-        for (int i = 1; i < value.getChildCount(); i++) {
-          ASTNode paraExpr = (ASTNode) value.getChild(i);
-
-          if (reduceSinkOutputRowResolver.getExpression(paraExpr) == null) {
-            ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, inputRR);
-            reduceValues.add(paraExprNode);
-            String field = Utilities.ReduceField.VALUE.toString() + "."
-                + getColumnInternalName(reduceValues.size() - 1);
-            ColumnInfo colInfo = new ColumnInfo(field, reduceValues.get(
-                reduceValues.size() - 1).getTypeInfo(), "", false);
-            reduceSinkOutputRowResolver.putExpression(paraExpr, colInfo);
-            outputColumnNames
-                .add(getColumnInternalName(reduceValues.size() - 1));
-            colExprMap.put(field, paraExprNode);
-          }
-        }
-      }
-    }
-
-    ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
-        OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
-            reduceValues, outputColumnNames, true, -1, reduceKeys.size(), -1,
-                AcidUtils.Operation.NOT_ACID),
-            new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), input),
-        reduceSinkOutputRowResolver);
-
-    rsOp.setColumnExprMap(colExprMap);
-    return rsOp;
-  }
-
   // Groups the clause names into lists so that any two clauses in the same list has the same
   // group by and distinct keys and no clause appears in more than one list. Returns a list of the
   // lists of clauses.
@@ -8757,157 +8523,114 @@ public class SemanticAnalyzer extends Ba
 
     TreeSet<String> ks = new TreeSet<String>(qbp.getClauseNames());
     Map<String, Operator<? extends OperatorDesc>> inputs = createInputForDests(qb, input, ks);
-    // For multi-group by with the same distinct, we ignore all user hints
-    // currently. It doesnt matter whether he has asked to do
-    // map-side aggregation or not. Map side aggregation is turned off
-    List<ASTNode> commonDistinctExprs = getCommonDistinctExprs(qb, input);
-
-    // Consider a query like:
-    //
-    //  from src
-    //    insert overwrite table dest1 select col1, count(distinct colx) group by col1
-    //    insert overwrite table dest2 select col2, count(distinct colx) group by col2;
-    //
-    // With HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS set to true, first we spray by the distinct
-    // value (colx), and then perform the 2 groups bys. This makes sense if map-side aggregation is
-    // turned off. However, with maps-side aggregation, it might be useful in some cases to treat
-    // the 2 inserts independently, thereby performing the query above in 2MR jobs instead of 3
-    // (due to spraying by distinct key first).
-    boolean optimizeMultiGroupBy = commonDistinctExprs != null &&
-        conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_MULTI_GROUPBY_COMMON_DISTINCTS);
 
     Operator curr = input;
 
-    // if there is a single distinct, optimize that. Spray initially by the
-    // distinct key,
-    // no computation at the mapper. Have multiple group by operators at the
-    // reducer - and then
-    // proceed
-    if (optimizeMultiGroupBy) {
-      curr = createCommonReduceSink(qb, input);
-
-      RowResolver currRR = opParseCtx.get(curr).getRowResolver();
-      // create a forward operator
-      input = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
-          new RowSchema(currRR.getColumnInfos()), curr), currRR);
-
-      for (String dest : ks) {
-        curr = input;
-        curr = genGroupByPlan2MRMultiGroupBy(dest, qb, curr);
-        curr = genSelectPlan(dest, qb, curr, null); // TODO: we may need to pass "input" here instead of null
-        Integer limit = qbp.getDestLimit(dest);
-        if (limit != null) {
-          curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), true);
-          qb.getParseInfo().setOuterQueryLimit(limit.intValue());
-        }
-        curr = genFileSinkPlan(dest, qb, curr);
-      }
-    } else {
-      List<List<String>> commonGroupByDestGroups = null;
+    List<List<String>> commonGroupByDestGroups = null;
 
-      // If we can put multiple group bys in a single reducer, determine suitable groups of
-      // expressions, otherwise treat all the expressions as a single group
-      if (conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) {
-        try {
-          commonGroupByDestGroups = getCommonGroupByDestGroups(qb, inputs);
-        } catch (SemanticException e) {
-          LOG.error("Failed to group clauses by common spray keys.", e);
-        }
+    // If we can put multiple group bys in a single reducer, determine suitable groups of
+    // expressions, otherwise treat all the expressions as a single group
+    if (conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) {
+      try {
+        commonGroupByDestGroups = getCommonGroupByDestGroups(qb, inputs);
+      } catch (SemanticException e) {
+        LOG.error("Failed to group clauses by common spray keys.", e);
       }
+    }
 
-      if (commonGroupByDestGroups == null) {
-        commonGroupByDestGroups = new ArrayList<List<String>>();
-        commonGroupByDestGroups.add(new ArrayList<String>(ks));
-      }
+    if (commonGroupByDestGroups == null) {
+      commonGroupByDestGroups = new ArrayList<List<String>>();
+      commonGroupByDestGroups.add(new ArrayList<String>(ks));
+    }
 
-      if (!commonGroupByDestGroups.isEmpty()) {
+    if (!commonGroupByDestGroups.isEmpty()) {
 
-        // Iterate over each group of subqueries with the same group by/distinct keys
-        for (List<String> commonGroupByDestGroup : commonGroupByDestGroups) {
-          if (commonGroupByDestGroup.isEmpty()) {
-            continue;
-          }
+      // Iterate over each group of subqueries with the same group by/distinct keys
+      for (List<String> commonGroupByDestGroup : commonGroupByDestGroups) {
+        if (commonGroupByDestGroup.isEmpty()) {
+          continue;
+        }
 
-          String firstDest = commonGroupByDestGroup.get(0);
-          input = inputs.get(firstDest);
+        String firstDest = commonGroupByDestGroup.get(0);
+        input = inputs.get(firstDest);
 
-          // Constructs a standard group by plan if:
-          // There is no other subquery with the same group by/distinct keys or
-          // (There are no aggregations in a representative query for the group and
-          // There is no group by in that representative query) or
-          // The data is skewed or
-          // The conf variable used to control combining group bys into a single reducer is false
-          if (commonGroupByDestGroup.size() == 1 ||
-              (qbp.getAggregationExprsForClause(firstDest).size() == 0 &&
-              getGroupByForClause(qbp, firstDest).size() == 0) ||
-              conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) ||
-              !conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) {
-
-            // Go over all the destination tables
-            for (String dest : commonGroupByDestGroup) {
-              curr = inputs.get(dest);
-
-              if (qbp.getWhrForClause(dest) != null) {
-                ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
-                curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false);
-              }
-              // Preserve operator before the GBY - we'll use it to resolve '*'
-              Operator<?> gbySource = curr;
+        // Constructs a standard group by plan if:
+        // There is no other subquery with the same group by/distinct keys or
+        // (There are no aggregations in a representative query for the group and
+        // There is no group by in that representative query) or
+        // The data is skewed or
+        // The conf variable used to control combining group bys into a single reducer is false
+        if (commonGroupByDestGroup.size() == 1 ||
+            (qbp.getAggregationExprsForClause(firstDest).size() == 0 &&
+            getGroupByForClause(qbp, firstDest).size() == 0) ||
+            conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) ||
+            !conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) {
+
+          // Go over all the destination tables
+          for (String dest : commonGroupByDestGroup) {
+            curr = inputs.get(dest);
+
+            if (qbp.getWhrForClause(dest) != null) {
+              ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest);
+              curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false);
+            }
+            // Preserve operator before the GBY - we'll use it to resolve '*'
+            Operator<?> gbySource = curr;
 
-              if (qbp.getAggregationExprsForClause(dest).size() != 0
-                  || getGroupByForClause(qbp, dest).size() > 0) {
-                // multiple distincts is not supported with skew in data
-                if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
-                    qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
-                  throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.
-                      getMsg());
-                }
-                // insert a select operator here used by the ColumnPruner to reduce
-                // the data to shuffle
-                curr = insertSelectAllPlanForGroupBy(curr);
-                // Check and transform group by *. This will only happen for select distinct *.
-                // Here the "genSelectPlan" is being leveraged.
-                // The main benefits are (1) remove virtual columns that should
-                // not be included in the group by; (2) add the fully qualified column names to unParseTranslator
-                // so that view is supported. The drawback is that an additional SEL op is added. If it is
-                // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match
-                // SEL%SEL% rule.
-                ASTNode selExprList = qbp.getSelForClause(dest);
-                if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI
-                    && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) {
-                  ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0);
-                  if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
-                    curr = genSelectPlan(dest, qb, curr, curr);
-                    RowResolver rr = opParseCtx.get(curr).getRowResolver();
-                    qbp.setSelExprForClause(dest, SemanticAnalyzer.genSelectDIAST(rr));
-                  }
+            if (qbp.getAggregationExprsForClause(dest).size() != 0
+                || getGroupByForClause(qbp, dest).size() > 0) {
+              // multiple distincts is not supported with skew in data
+              if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
+                  qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
+                throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.
+                    getMsg());
+              }
+              // insert a select operator here used by the ColumnPruner to reduce
+              // the data to shuffle
+              curr = genSelectAllDesc(curr);
+              // Check and transform group by *. This will only happen for select distinct *.
+              // Here the "genSelectPlan" is being leveraged.
+              // The main benefits are (1) remove virtual columns that should
+              // not be included in the group by; (2) add the fully qualified column names to unParseTranslator
+              // so that view is supported. The drawback is that an additional SEL op is added. If it is
+              // not necessary, it will be removed by NonBlockingOpDeDupProc Optimizer because it will match
+              // SEL%SEL% rule.
+              ASTNode selExprList = qbp.getSelForClause(dest);
+              if (selExprList.getToken().getType() == HiveParser.TOK_SELECTDI
+                  && selExprList.getChildCount() == 1 && selExprList.getChild(0).getChildCount() == 1) {
+                ASTNode node = (ASTNode) selExprList.getChild(0).getChild(0);
+                if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) {
+                  curr = genSelectPlan(dest, qb, curr, curr);
+                  RowResolver rr = opParseCtx.get(curr).getRowResolver();
+                  qbp.setSelExprForClause(dest, SemanticAnalyzer.genSelectDIAST(rr));
                 }
-                if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
-                  if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
-                    curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr);
-                  } else {
-                    curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
-                  }
-                } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
-                  curr = genGroupByPlan2MR(dest, qb, curr);
+              }
+              if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
+                if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+                  curr = genGroupByPlanMapAggrNoSkew(dest, qb, curr);
                 } else {
-                  curr = genGroupByPlan1MR(dest, qb, curr);
+                  curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
                 }
+              } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+                curr = genGroupByPlan2MR(dest, qb, curr);
+              } else {
+                curr = genGroupByPlan1MR(dest, qb, curr);
               }
-              if (LOG.isDebugEnabled()) {
-                LOG.debug("RR before GB " + opParseCtx.get(gbySource).getRowResolver()
-                    + " after GB " + opParseCtx.get(curr).getRowResolver());
-              }
-
-              curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo, gbySource);
             }
-          } else {
-            curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("RR before GB " + opParseCtx.get(gbySource).getRowResolver()
+                  + " after GB " + opParseCtx.get(curr).getRowResolver());
+            }
+
+            curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo, gbySource);
           }
+        } else {
+          curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo);
         }
       }
     }
 
+
     if (LOG.isDebugEnabled()) {
       LOG.debug("Created Body Plan for Query Block " + qb.getId());
     }
@@ -10101,7 +9824,7 @@ public class SemanticAnalyzer extends Ba
         new HashSet<JoinOperator>(joinContext.keySet()),
         new HashSet<SMBMapJoinOperator>(smbMapJoinContext.keySet()),
         loadTableWork, loadFileWork, ctx, idToTableNameMap, destTableId, uCtx,
-        listMapJoinOpsNoReducer, groupOpToInputTables, prunedPartitions, opToSamplePruner,
+        listMapJoinOpsNoReducer, prunedPartitions, opToSamplePruner,
         globalLimitCtx, nameToSplitSample, inputs, rootTasks, opToPartToSkewedPruner,
         viewAliasToInput, reduceSinkOperatorsAddedByEnforceBucketingSorting, queryProperties);
 
@@ -11957,6 +11680,7 @@ public class SemanticAnalyzer extends Ba
       input = putOpInsertMap(OperatorFactory.getAndMakeChild(ptfDesc,
           new RowSchema(ptfOpRR.getColumnInfos()),
           input), ptfOpRR);
+      input = genSelectAllDesc(input);
       rr = ptfOpRR;
     }
 

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java Wed Feb 11 17:48:36 2015
@@ -41,6 +41,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.exec.TaskFactory;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mr.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.spark.SparkTask;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.metadata.Hive;
@@ -280,6 +281,10 @@ public abstract class TaskCompiler {
       for (ExecDriver tsk : mrTasks) {
         tsk.setRetryCmdWhenFail(true);
       }
+      List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
+      for (SparkTask sparkTask : sparkTasks) {
+        sparkTask.setRetryCmdWhenFail(true);
+      }
     }
 
     Interner<TableDesc> interner = Interners.newStrongInterner();
@@ -390,7 +395,7 @@ public abstract class TaskCompiler {
         pCtx.getJoinOps(), pCtx.getSmbMapJoinOps(),
         pCtx.getLoadTableWork(), pCtx.getLoadFileWork(), pCtx.getContext(),
         pCtx.getIdToTableNameMap(), pCtx.getDestTableId(), pCtx.getUCtx(),
-        pCtx.getListMapJoinOpsNoReducer(), pCtx.getGroupOpToInputTables(),
+        pCtx.getListMapJoinOpsNoReducer(),
         pCtx.getPrunedPartitions(), pCtx.getOpToSamplePruner(), pCtx.getGlobalLimitCtx(),
         pCtx.getNameToSplitSample(), pCtx.getSemanticInputs(), rootTasks,
         pCtx.getOpToPartToSkewedPruner(), pCtx.getViewAliasToInput(),

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java Wed Feb 11 17:48:36 2015
@@ -36,7 +36,6 @@ import org.apache.hadoop.hive.ql.lib.Nod
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.DependencyCollectionWork;
-import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
@@ -44,6 +43,7 @@ import org.apache.hadoop.hive.ql.plan.Sp
 import org.apache.hadoop.hive.ql.plan.SparkWork;
 
 import java.io.Serializable;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
 import java.util.LinkedList;
@@ -103,8 +103,8 @@ public class GenSparkProcContext impleme
   // map that says which mapjoin belongs to which work item
   public final Map<MapJoinOperator, List<BaseWork>> mapJoinWorkMap;
 
-  // a map to keep track of which MapWork item holds which SMBMapJoinOp
-  public final Map<SMBMapJoinOperator, MapWork> smbJoinWorkMap;
+  // Map to keep track of which SMB Join operators and their information to annotate their MapWork with.
+  public final Map<SMBMapJoinOperator, SparkSMBMapJoinInfo> smbMapJoinCtxMap;
 
   // a map to keep track of which root generated which work
   public final Map<Operator<?>, BaseWork> rootToWorkMap;
@@ -160,7 +160,7 @@ public class GenSparkProcContext impleme
         new LinkedHashMap<ReduceSinkOperator, ObjectPair<SparkEdgeProperty, ReduceWork>>();
     this.linkOpWithWorkMap = new LinkedHashMap<Operator<?>, Map<BaseWork, SparkEdgeProperty>>();
     this.linkWorkWithReduceSinkMap = new LinkedHashMap<BaseWork, List<ReduceSinkOperator>>();
-    this.smbJoinWorkMap = new LinkedHashMap<SMBMapJoinOperator, MapWork>();
+    this.smbMapJoinCtxMap = new HashMap<SMBMapJoinOperator, SparkSMBMapJoinInfo>();
     this.mapJoinWorkMap = new LinkedHashMap<MapJoinOperator, List<BaseWork>>();
     this.rootToWorkMap = new LinkedHashMap<Operator<?>, BaseWork>();
     this.childToWorkMap = new LinkedHashMap<Operator<?>, List<BaseWork>>();

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java Wed Feb 11 17:48:36 2015
@@ -41,10 +41,12 @@ import org.apache.hadoop.hive.ql.exec.Ha
 import org.apache.hadoop.hive.ql.exec.JoinOperator;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
 import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.exec.UnionOperator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory;
 import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
@@ -443,6 +445,25 @@ public class GenSparkUtils {
     return null;
   }
 
+  /**
+   * Fill MapWork with 'local' work and bucket information for SMB Join.
+   * @param context context, containing references to MapWorks and their SMB information.
+   * @throws SemanticException
+   */
+  public void annotateMapWork(GenSparkProcContext context) throws SemanticException {
+    for (SMBMapJoinOperator smbMapJoinOp : context.smbMapJoinCtxMap.keySet()) {
+      //initialize mapwork with smbMapJoin information.
+      SparkSMBMapJoinInfo smbMapJoinInfo = context.smbMapJoinCtxMap.get(smbMapJoinOp);
+      MapWork work = smbMapJoinInfo.mapWork;
+      SparkSortMergeJoinFactory.annotateMapWork(context, work, smbMapJoinOp,
+        (TableScanOperator) smbMapJoinInfo.bigTableRootOp, false);
+      for (Operator<?> smallTableRootOp : smbMapJoinInfo.smallTableRootOps) {
+        SparkSortMergeJoinFactory.annotateMapWork(context, work, smbMapJoinOp,
+          (TableScanOperator) smallTableRootOp, true);
+      }
+    }
+  }
+
   public synchronized int getNextSeqNumber() {
     return ++sequenceNumber;
   }

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkWork.java Wed Feb 11 17:48:36 2015
@@ -34,10 +34,12 @@ import org.apache.hadoop.hive.ql.exec.Op
 import org.apache.hadoop.hive.ql.exec.OperatorFactory;
 import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
 import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
 import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
 import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.optimizer.spark.SparkSortMergeJoinFactory;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.BaseWork;
 import org.apache.hadoop.hive.ql.plan.MapWork;
@@ -118,18 +120,12 @@ public class GenSparkWork implements Nod
     } else {
       // create a new vertex
       if (context.preceedingWork == null) {
-        if (smbOp != null) {
-          // This logic is for SortMergeBucket MapJoin case.
-          // This MapWork (of big-table, see above..) is later initialized by SparkMapJoinFactory
-          // processor, so don't initialize it here. Just keep track of it in the context,
-          // for later processing.
-          work = utils.createMapWork(context, root, sparkWork, null, true);
-          if (context.smbJoinWorkMap.get(smbOp) != null) {
-            throw new SemanticException("Each SMBMapJoin should be associated only with one Mapwork");
-          }
-          context.smbJoinWorkMap.put(smbOp, (MapWork) work);
-        } else {
+        if (smbOp == null) {
           work = utils.createMapWork(context, root, sparkWork, null);
+        } else {
+          //save work to be initialized later with SMB information.
+          work = utils.createMapWork(context, root, sparkWork, null, true);
+          context.smbMapJoinCtxMap.get(smbOp).mapWork = (MapWork) work;
         }
       } else {
         work = utils.createReduceWork(context, root, sparkWork);

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java Wed Feb 11 17:48:36 2015
@@ -186,17 +186,30 @@ public class SparkCompiler extends TaskC
      *
      * Some of the other processors are expecting only one traversal beyond SMBJoinOp.
      * We need to traverse from the big-table path only, and stop traversing on the small-table path once we reach SMBJoinOp.
+     * Also add some SMB join information to the context, so we can properly annotate the MapWork later on.
      */
     opRules.put(new TypeRule(SMBMapJoinOperator.class),
       new NodeProcessor() {
         @Override
         public Object process(Node currNode, Stack<Node> stack,
                               NodeProcessorCtx procCtx, Object... os) throws SemanticException {
+          GenSparkProcContext context = (GenSparkProcContext) procCtx;
+          SMBMapJoinOperator currSmbNode = (SMBMapJoinOperator) currNode;
+          SparkSMBMapJoinInfo smbMapJoinCtx = context.smbMapJoinCtxMap.get(currSmbNode);
+          if (smbMapJoinCtx == null) {
+            smbMapJoinCtx = new SparkSMBMapJoinInfo();
+            context.smbMapJoinCtxMap.put(currSmbNode, smbMapJoinCtx);
+          }
+
           for (Node stackNode : stack) {
             if (stackNode instanceof DummyStoreOperator) {
+              //If coming from small-table side, do some book-keeping, and skip traversal.
+              smbMapJoinCtx.smallTableRootOps.add(context.currentRootOperator);
               return true;
             }
           }
+          //If coming from big-table side, do some book-keeping, and continue traversal
+          smbMapJoinCtx.bigTableRootOp = context.currentRootOperator;
           return false;
         }
       }
@@ -210,24 +223,14 @@ public class SparkCompiler extends TaskC
     GraphWalker ogw = new GenSparkWorkWalker(disp, procCtx);
     ogw.startWalking(topNodes, null);
 
-
-    // ------------------- Second Pass -----------------------
-    // SMB Join optimizations to add the "localWork" and bucketing data structures to MapWork.
-    opRules.clear();
-    opRules.put(new TypeRule(SMBMapJoinOperator.class),
-       SparkSortMergeJoinFactory.getTableScanMapJoin());
-
-    disp = new DefaultRuleDispatcher(null, opRules, procCtx);
-    topNodes = new ArrayList<Node>();
-    topNodes.addAll(pCtx.getTopOps().values());
-    ogw = new GenSparkWorkWalker(disp, procCtx);
-    ogw.startWalking(topNodes, null);
-
     // we need to clone some operator plans and remove union operators still
     for (BaseWork w: procCtx.workWithUnionOperators) {
       GenSparkUtils.getUtils().removeUnionOperators(conf, procCtx, w);
     }
 
+    // we need to fill MapWork with 'local' work and bucket information for SMB Join.
+    GenSparkUtils.getUtils().annotateMapWork(procCtx);
+
     // finally make sure the file sink operators are set up right
     for (FileSinkOperator fileSink: procCtx.fileSinkSet) {
       GenSparkUtils.getUtils().processFileSink(procCtx, fileSink);

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java Wed Feb 11 17:48:36 2015
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.udf.UDFType;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
 import org.apache.hive.common.util.AnnotationUtils;
@@ -53,7 +54,6 @@ public class GroupByDesc extends Abstrac
   };
 
   private Mode mode;
-  private boolean groupKeyNotReductionKey;
 
   // no hash aggregations for group by
   private boolean bucketGroup;
@@ -81,14 +81,13 @@ public class GroupByDesc extends Abstrac
       final ArrayList<java.lang.String> outputColumnNames,
       final ArrayList<ExprNodeDesc> keys,
       final ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> aggregators,
-      final boolean groupKeyNotReductionKey,
       final float groupByMemoryUsage,
       final float memoryThreshold,
       final List<Integer> listGroupingSets,
       final boolean groupingSetsPresent,
       final int groupingSetsPosition,
       final boolean isDistinct) {
-    this(mode, outputColumnNames, keys, aggregators, groupKeyNotReductionKey,
+    this(mode, outputColumnNames, keys, aggregators,
         false, groupByMemoryUsage, memoryThreshold, listGroupingSets,
         groupingSetsPresent, groupingSetsPosition, isDistinct);
   }
@@ -98,7 +97,6 @@ public class GroupByDesc extends Abstrac
       final ArrayList<java.lang.String> outputColumnNames,
       final ArrayList<ExprNodeDesc> keys,
       final ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> aggregators,
-      final boolean groupKeyNotReductionKey,
       final boolean bucketGroup,
       final float groupByMemoryUsage,
       final float memoryThreshold,
@@ -111,7 +109,6 @@ public class GroupByDesc extends Abstrac
     this.outputColumnNames = outputColumnNames;
     this.keys = keys;
     this.aggregators = aggregators;
-    this.groupKeyNotReductionKey = groupKeyNotReductionKey;
     this.bucketGroup = bucketGroup;
     this.groupByMemoryUsage = groupByMemoryUsage;
     this.memoryThreshold = memoryThreshold;
@@ -179,7 +176,7 @@ public class GroupByDesc extends Abstrac
 
   @Explain(displayName = "pruneGroupingSetId", displayOnlyOnTrue = true)
   public boolean pruneGroupingSetId() {
-    return groupingSetPosition >= 0 && 
+    return groupingSetPosition >= 0 &&
         outputColumnNames.size() != keys.size() + aggregators.size();
   }
 
@@ -222,14 +219,13 @@ public class GroupByDesc extends Abstrac
     this.aggregators = aggregators;
   }
 
-  public boolean getGroupKeyNotReductionKey() {
-    return groupKeyNotReductionKey;
-  }
-
-  public void setGroupKeyNotReductionKey(final boolean groupKeyNotReductionKey) {
-    this.groupKeyNotReductionKey = groupKeyNotReductionKey;
+  public boolean isAggregate() {
+    if (this.aggregators != null && !this.aggregators.isEmpty()) {
+      return true;
+    }
+    return false;
   }
-
+  
   @Explain(displayName = "bucketGroup", displayOnlyOnTrue = true)
   public boolean getBucketGroup() {
     return bucketGroup;

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java Wed Feb 11 17:48:36 2015
@@ -238,6 +238,13 @@ public class ReduceSinkDesc extends Abst
     this.partitionCols = partitionCols;
   }
 
+  public boolean isPartitioning() {
+    if (partitionCols != null && !partitionCols.isEmpty()) {
+      return true;
+    }
+    return false;
+  }
+
   @Explain(displayName = "tag", normalExplain = false)
   public int getTag() {
     return tag;
@@ -338,6 +345,13 @@ public class ReduceSinkDesc extends Abst
         orderStr);
   }
 
+  public boolean isOrdering() {
+    if (this.getOrder() != null && !this.getOrder().isEmpty()) {
+      return true;
+    }
+    return false;
+  }
+
   public List<List<Integer>> getDistinctColumnIndices() {
     return distinctColumnIndices;
   }

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/plan/StatsNoJobWork.java Wed Feb 11 17:48:36 2015
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
 import java.io.Serializable;
 
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.tableSpec;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
 
 /**
  * Client-side stats aggregator task.
@@ -31,6 +32,7 @@ public class StatsNoJobWork implements S
 
   private tableSpec tableSpecs;
   private boolean statsReliable;
+  private PrunedPartitionList prunedPartitionList;
 
   public StatsNoJobWork() {
   }
@@ -54,4 +56,12 @@ public class StatsNoJobWork implements S
   public void setStatsReliable(boolean statsReliable) {
     this.statsReliable = statsReliable;
   }
+
+  public void setPrunedPartitionList(PrunedPartitionList prunedPartitionList) {
+    this.prunedPartitionList = prunedPartitionList;
+  }
+
+  public PrunedPartitionList getPrunedPartitionList() {
+    return prunedPartitionList;
+  }
 }

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java Wed Feb 11 17:48:36 2015
@@ -87,6 +87,29 @@ public class GenericUDAFSum extends Abst
     }
   }
 
+  public static PrimitiveObjectInspector.PrimitiveCategory getReturnType(TypeInfo type) {
+    if (type.getCategory() != ObjectInspector.Category.PRIMITIVE) {
+      return null;
+    }
+    switch (((PrimitiveTypeInfo) type).getPrimitiveCategory()) {
+      case BYTE:
+      case SHORT:
+      case INT:
+      case LONG:
+        return PrimitiveObjectInspector.PrimitiveCategory.LONG;
+      case TIMESTAMP:
+      case FLOAT:
+      case DOUBLE:
+      case STRING:
+      case VARCHAR:
+      case CHAR:
+        return PrimitiveObjectInspector.PrimitiveCategory.DOUBLE;
+      case DECIMAL:
+        return PrimitiveObjectInspector.PrimitiveCategory.DECIMAL;
+    }
+    return null;
+  }
+
   /**
    * GenericUDAFSumHiveDecimal.
    *

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFAddMonths.java Wed Feb 11 17:48:36 2015
@@ -73,11 +73,11 @@ public class GenericUDFAddMonths extends
     }
     if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
       throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
-          + arguments[0].getTypeName() + " is passed. as first arguments");
+          + arguments[0].getTypeName() + " is passed as first arguments");
     }
     if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
       throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
-          + arguments[2].getTypeName() + " is passed. as second arguments");
+          + arguments[1].getTypeName() + " is passed as second arguments");
     }
     inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();
     ObjectInspector outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector;
@@ -100,14 +100,14 @@ public class GenericUDFAddMonths extends
           PrimitiveObjectInspectorFactory.writableDateObjectInspector);
       break;
     default:
-      throw new UDFArgumentException(
-          " ADD_MONTHS() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got "
+      throw new UDFArgumentTypeException(0,
+          "ADD_MONTHS() only takes STRING/TIMESTAMP/DATEWRITABLE types as first argument, got "
               + inputType1);
     }
     inputType2 = ((PrimitiveObjectInspector) arguments[1]).getPrimitiveCategory();
     if (inputType2 != PrimitiveCategory.INT) {
-      throw new UDFArgumentException(" ADD_MONTHS() only takes INT types as second argument, got "
-          + inputType2);
+      throw new UDFArgumentTypeException(1,
+          "ADD_MONTHS() only takes INT types as second argument, got " + inputType2);
     }
     intWritableConverter = ObjectInspectorConverters.getConverter(
         (PrimitiveObjectInspector) arguments[1],
@@ -144,7 +144,7 @@ public class GenericUDFAddMonths extends
       date = dw.get();
       break;
     default:
-      throw new UDFArgumentException(
+      throw new UDFArgumentTypeException(0,
           "ADD_MONTHS() only takes STRING/TIMESTAMP/DATEWRITABLE types, got " + inputType1);
     }
     int numMonth = toBeAdded.get();

Modified: hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java
URL: http://svn.apache.org/viewvc/hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java?rev=1659014&r1=1659013&r2=1659014&view=diff
==============================================================================
--- hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java (original)
+++ hive/branches/parquet/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFDateAdd.java Wed Feb 11 17:48:36 2015
@@ -89,7 +89,7 @@ public class GenericUDFDateAdd extends G
     if (arguments[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
       throw new UDFArgumentTypeException(1,
         "Only primitive type arguments are accepted but "
-        + arguments[2].getTypeName() + " is passed. as second arguments");
+        + arguments[1].getTypeName() + " is passed. as second arguments");
     }
 
     inputType1 = ((PrimitiveObjectInspector) arguments[0]).getPrimitiveCategory();



Mime
View raw message