hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gunt...@apache.org
Subject svn commit: r1550684 [4/24] - in /hive/branches/tez: ./ ant/src/org/apache/hadoop/hive/ant/ beeline/ beeline/src/java/org/apache/hive/beeline/ common/src/java/org/apache/hadoop/hive/conf/ contrib/src/test/results/clientnegative/ contrib/src/test/result...
Date Fri, 13 Dec 2013 10:56:54 GMT
Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Fri Dec 13 10:56:38 2013
@@ -23,12 +23,12 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
-import org.apache.hadoop.hive.ql.exec.DemuxOperator;
 import org.apache.hadoop.hive.ql.exec.FilterOperator;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.LimitOperator;
@@ -53,7 +53,6 @@ import org.apache.hadoop.hive.ql.plan.Ex
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
 import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
-import org.apache.hadoop.hive.ql.plan.Statistics.State;
 import org.apache.hadoop.hive.ql.stats.StatsUtils;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
@@ -75,13 +74,16 @@ import com.google.common.collect.Maps;
 
 public class StatsRulesProcFactory {
 
+  private static final Log LOG = LogFactory.getLog(StatsRulesProcFactory.class.getName());
+
   /**
-   * Collect basic statistics like number of rows, data size and column level
-   * statistics from the table. Also sets the state of the available statistics.
-   * Basic and column statistics can have one of the following states
-   * COMPLETE, PARTIAL, NONE. In case of partitioned table, the basic and column
-   * stats are aggregated together to table level statistics.
-   *
+   * Collect basic statistics like number of rows, data size and column level statistics from the
+   * table. Also sets the state of the available statistics. Basic and column statistics can have
+   * one of the following states COMPLETE, PARTIAL, NONE. In case of partitioned table, the basic
+   * and column stats are aggregated together to table level statistics. Column statistics will not
+   * be collected if hive.stats.fetch.column.stats is set to false. If basic statistics is not
+   * available then number of rows will be estimated from file size and average row size (computed
+   * from schema).
    */
   public static class TableScanStatsRule extends DefaultStatsRule implements NodeProcessor {
 
@@ -102,6 +104,10 @@ public class StatsRulesProcFactory {
       Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table, tsop);
       try {
         tsop.setStatistics(stats.clone());
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("[0] STATS-" + tsop.toString() + ": " + stats.extendedToString());
+        }
       } catch (CloneNotSupportedException e) {
         throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
       }
@@ -110,23 +116,19 @@ public class StatsRulesProcFactory {
   }
 
   /**
-   * SELECT operator doesn't change the number of rows emitted from the parent
-   * operator. It changes the size of each tuple emitted. In a typical case,
-   * where only subset of columns are selected the average row size will
-   * reduce as some of the columns will be pruned. In order to accurately
-   * compute the average row size, column level statistics is required.
-   * Column level statistics stores average size of values in column which
-   * can be used to more reliably estimate the reduction in size of each
-   * tuple. In the absence of column level statistics, size of columns will be
-   * based on data type. For primitive data types size from
-   * {@link org.apache.hadoop.hive.ql.util.JavaDataModel} will be
-   * used and for variable length data types worst case will be assumed.
-   *
+   * SELECT operator doesn't change the number of rows emitted from the parent operator. It changes
+   * the size of each tuple emitted. In a typical case, where only subset of columns are selected
+   * the average row size will reduce as some of the columns will be pruned. In order to accurately
+   * compute the average row size, column level statistics is required. Column level statistics
+   * stores average size of values in column which can be used to more reliably estimate the
+   * reduction in size of each tuple. In the absence of column level statistics, size of columns
+   * will be based on data type. For primitive data types size from
+   * {@link org.apache.hadoop.hive.ql.util.JavaDataModel} will be used and for variable length data
+   * types worst case will be assumed.
    * <p>
    * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in
    * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i>
    * </p>
-   *
    */
   public static class SelectStatsRule extends DefaultStatsRule implements NodeProcessor {
 
@@ -155,15 +157,24 @@ public class StatsRulesProcFactory {
       try {
         if (satisfyPrecondition(parentStats)) {
           Statistics stats = parentStats.clone();
-          List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
-              sop.getColumnExprMap(), sop.getSchema());
+          List<ColStatistics> colStats =
+              StatsUtils.getColStatisticsFromExprMap(conf, parentStats, sop.getColumnExprMap(),
+                  sop.getSchema());
           long dataSize = StatsUtils.getDataSizeFromColumnStats(stats.getNumRows(), colStats);
           stats.setColumnStats(colStats);
           stats.setDataSize(dataSize);
           sop.setStatistics(stats);
+
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[0] STATS-" + sop.toString() + ": " + stats.extendedToString());
+          }
         } else {
           if (parentStats != null) {
             sop.setStatistics(parentStats.clone());
+
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[1] STATS-" + sop.toString() + ": " + parentStats.extendedToString());
+            }
           }
         }
       } catch (CloneNotSupportedException e) {
@@ -175,16 +186,13 @@ public class StatsRulesProcFactory {
   }
 
   /**
-   * FILTER operator does not change the average row size but it does change
-   * the number of rows emitted. The reduction in the number of rows emitted
-   * is dependent on the filter expression.
-   *
+   * FILTER operator does not change the average row size but it does change the number of rows
+   * emitted. The reduction in the number of rows emitted is dependent on the filter expression.
    * <ul>
    * <i>Notations:</i>
    * <li>T(S) - Number of tuples in relations S</li>
    * <li>V(S,A) - Number of distinct values of attribute A in relation S</li>
    * </ul>
-   *
    * <ul>
    * <i>Rules:</i> <b>
    * <li>Column equals a constant</li></b> T(S) = T(R) / V(R,A)
@@ -207,20 +215,18 @@ public class StatsRulesProcFactory {
    * <li>Multiple OR conditions</li></b> - Simple formula is to evaluate conditions independently
    * and sum the results T(S) = m1 + m2
    * <p>
-   *
    * - Alternate formula T(S) = T(R) * ( 1 - ( 1 - m1/T(R) ) * ( 1 - m2/T(R) ))
    * <p>
    * where, m1 is the number of tuples that satisfy condition1 and m2 is the number of tuples that
    * satisfy condition2
    * </ul>
    * <p>
-   * <i>Worst case:</i> If no column statistics are available, then T(R) = T(R)/2 will be
-   * used as heuristics.
+   * <i>Worst case:</i> If no column statistics are available, then evaluation of predicate
+   * expression will assume worst case (i.e; half the input rows) for each of predicate expression.
    * <p>
    * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in
    * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i>
    * </p>
-   *
    */
   public static class FilterStatsRule extends DefaultStatsRule implements NodeProcessor {
 
@@ -231,26 +237,47 @@ public class StatsRulesProcFactory {
       FilterOperator fop = (FilterOperator) nd;
       Operator<? extends OperatorDesc> parent = fop.getParentOperators().get(0);
       Statistics parentStats = parent.getStatistics();
+      List<String> neededCols = null;
+      if (parent instanceof TableScanOperator) {
+        TableScanOperator tsop = (TableScanOperator) parent;
+        neededCols = tsop.getNeededColumns();
+      }
 
       try {
-        if (satisfyPrecondition(parentStats)) {
+        if (parentStats != null) {
           ExprNodeDesc pred = fop.getConf().getPredicate();
 
           // evaluate filter expression and update statistics
-          long newNumRows = evaluateExpression(parentStats, pred, aspCtx);
+          long newNumRows = evaluateExpression(parentStats, pred, aspCtx, neededCols);
           Statistics st = parentStats.clone();
-          updateStats(st, newNumRows);
-          fop.setStatistics(st);
-        } else {
-          if (parentStats != null) {
 
-            // worst case, in the absence of column statistics assume half the rows are emitted
-            Statistics wcStats = getWorstCaseStats(parentStats.clone());
-            fop.setStatistics(wcStats);
+          if (satisfyPrecondition(parentStats)) {
+
+            // update statistics based on column statistics.
+            // OR conditions keeps adding the stats independently, this may
+            // result in number of rows getting more than the input rows in
+            // which case stats need not be updated
+            if (newNumRows <= parentStats.getNumRows()) {
+              updateStats(st, newNumRows, true);
+            }
+
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[0] STATS-" + fop.toString() + ": " + st.extendedToString());
+            }
+          } else {
+
+            // update only the basic statistics in the absence of column statistics
+            if (newNumRows <= parentStats.getNumRows()) {
+              updateStats(st, newNumRows, false);
+            }
+
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[1] STATS-" + fop.toString() + ": " + st.extendedToString());
+            }
           }
+          fop.setStatistics(st);
+          aspCtx.setAndExprStats(null);
         }
-
-        aspCtx.setAndExprStats(null);
       } catch (CloneNotSupportedException e) {
         throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
       }
@@ -258,7 +285,7 @@ public class StatsRulesProcFactory {
     }
 
     private long evaluateExpression(Statistics stats, ExprNodeDesc pred,
-        AnnotateStatsProcCtx aspCtx) throws CloneNotSupportedException {
+        AnnotateStatsProcCtx aspCtx, List<String> neededCols) throws CloneNotSupportedException {
       long newNumRows = 0;
       Statistics andStats = null;
       if (pred instanceof ExprNodeGenericFuncDesc) {
@@ -272,28 +299,24 @@ public class StatsRulesProcFactory {
 
           // evaluate children
           for (ExprNodeDesc child : genFunc.getChildren()) {
-            newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx);
-            updateStats(aspCtx.getAndExprStats(), newNumRows);
+            newNumRows = evaluateChildExpr(aspCtx.getAndExprStats(), child, aspCtx, neededCols);
+            if (satisfyPrecondition(aspCtx.getAndExprStats())) {
+              updateStats(aspCtx.getAndExprStats(), newNumRows, true);
+            } else {
+              updateStats(aspCtx.getAndExprStats(), newNumRows, false);
+            }
           }
-        } else {
-
+        } else if (udf instanceof GenericUDFOPOr) {
           // for OR condition independently compute and update stats
-          if (udf instanceof GenericUDFOPOr) {
-            for (ExprNodeDesc child : genFunc.getChildren()) {
-              newNumRows += evaluateChildExpr(stats, child, aspCtx);
-            }
-          } else if (udf instanceof GenericUDFOPNot) {
-            newNumRows = evaluateNotExpr(stats, pred, aspCtx);
-          } else if (udf instanceof GenericUDFOPNotNull) {
-            newNumRows = evaluateColEqualsNullExpr(stats, pred, aspCtx);
-            newNumRows = stats.getNumRows() - newNumRows;
-          } else if (udf instanceof GenericUDFOPNull) {
-            newNumRows = evaluateColEqualsNullExpr(stats, pred, aspCtx);
-          } else {
-
-            // single predicate condition
-            newNumRows = evaluateChildExpr(stats, pred, aspCtx);
+          for (ExprNodeDesc child : genFunc.getChildren()) {
+            newNumRows += evaluateChildExpr(stats, child, aspCtx, neededCols);
           }
+        } else if (udf instanceof GenericUDFOPNot) {
+          newNumRows = evaluateNotExpr(stats, pred, aspCtx, neededCols);
+        } else {
+
+          // single predicate condition
+          newNumRows = evaluateChildExpr(stats, pred, aspCtx, neededCols);
         }
       } else if (pred instanceof ExprNodeColumnDesc) {
 
@@ -304,19 +327,20 @@ public class StatsRulesProcFactory {
         String colType = encd.getTypeString();
         if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
           ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
-          return cs.getNumTrues();
-        } else {
-
-          // if not boolean column return half the number of rows
-          return stats.getNumRows() / 2;
+          if (cs != null) {
+            return cs.getNumTrues();
+          }
         }
+
+        // if not boolean column return half the number of rows
+        return stats.getNumRows() / 2;
       }
 
       return newNumRows;
     }
 
-    private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx)
-        throws CloneNotSupportedException {
+    private long evaluateNotExpr(Statistics stats, ExprNodeDesc pred, AnnotateStatsProcCtx aspCtx,
+        List<String> neededCols) throws CloneNotSupportedException {
 
       long numRows = stats.getNumRows();
 
@@ -329,7 +353,7 @@ public class StatsRulesProcFactory {
             // GenericUDF
             long newNumRows = 0;
             for (ExprNodeDesc child : ((ExprNodeGenericFuncDesc) pred).getChildren()) {
-              newNumRows = evaluateChildExpr(stats, child, aspCtx);
+              newNumRows = evaluateChildExpr(stats, child, aspCtx, neededCols);
             }
             return numRows - newNumRows;
           } else if (leaf instanceof ExprNodeConstantDesc) {
@@ -348,18 +372,18 @@ public class StatsRulesProcFactory {
             String colType = encd.getTypeString();
             if (colType.equalsIgnoreCase(serdeConstants.BOOLEAN_TYPE_NAME)) {
               ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
-              return cs.getNumFalses();
-            } else {
-
-              // if not boolean column return half the number of rows
-              return numRows / 2;
+              if (cs != null) {
+                return cs.getNumFalses();
+              }
             }
+            // if not boolean column return half the number of rows
+            return numRows / 2;
           }
         }
       }
 
       // worst case
-      return numRows;
+      return numRows / 2;
     }
 
     private long evaluateColEqualsNullExpr(Statistics stats, ExprNodeDesc pred,
@@ -380,26 +404,19 @@ public class StatsRulesProcFactory {
             ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
             if (cs != null) {
               long dvs = cs.getCountDistint();
-              // if NULLs exists, add 1 to distinct count
-              if (cs.getNumNulls() > 0) {
-                dvs += 1;
-              }
-              if (dvs != 0) {
-                return numRows / dvs;
-              } else {
-                return numRows;
-              }
+              numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
+              return numRows;
             }
           }
         }
       }
 
       // worst case
-      return numRows;
+      return numRows / 2;
     }
 
-    private long evaluateChildExpr(Statistics stats, ExprNodeDesc child, AnnotateStatsProcCtx aspCtx)
-        throws CloneNotSupportedException {
+    private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
+        AnnotateStatsProcCtx aspCtx, List<String> neededCols) throws CloneNotSupportedException {
 
       long numRows = stats.getNumRows();
 
@@ -421,19 +438,19 @@ public class StatsRulesProcFactory {
                 isConst = true;
                 continue;
               }
+
+              // if column name is not contained in needed column list then it
+              // is a partition column. We do not need to evaluate partition columns
+              // in filter expression since it will be taken care by partitio pruner
+              if (neededCols != null && !neededCols.contains(colName)) {
+                return numRows;
+              }
+
               ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
               if (cs != null) {
                 long dvs = cs.getCountDistint();
-                // if NULLs exists, add 1 to distinct count
-                if (cs.getNumNulls() > 0) {
-                  dvs += 1;
-                }
-
-                if (dvs != 0) {
-                  return numRows / dvs;
-                } else {
-                  return numRows;
-                }
+                numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
+                return numRows;
               }
             } else if (leaf instanceof ExprNodeColumnDesc) {
               ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf;
@@ -442,53 +459,56 @@ public class StatsRulesProcFactory {
 
               // if const is first argument then evaluate the result
               if (isConst) {
+
+                // if column name is not contained in needed column list then it
+                // is a partition column. We do not need to evaluate partition columns
+                // in filter expression since it will be taken care by partitio pruner
+                if (neededCols != null && neededCols.indexOf(colName) == -1) {
+                  return numRows;
+                }
+
                 ColStatistics cs = stats.getColumnStatisticsForColumn(tabAlias, colName);
                 if (cs != null) {
                   long dvs = cs.getCountDistint();
-                  // if NULLs exists, add 1 to distinct count
-                  if (cs.getNumNulls() > 0) {
-                    dvs += 1;
-                  }
-
-                  if (dvs != 0) {
-                    return numRows / dvs;
-                  } else {
-                    return numRows;
-                  }
+                  numRows = dvs == 0 ? numRows / 2 : numRows / dvs;
+                  return numRows;
                 }
               }
             }
           }
         } else if (udf instanceof GenericUDFOPNotEqual) {
           return numRows;
-        } else if (udf instanceof GenericUDFOPEqualOrGreaterThan ||
-            udf instanceof GenericUDFOPEqualOrLessThan ||
-            udf instanceof GenericUDFOPGreaterThan ||
-            udf instanceof GenericUDFOPLessThan) {
+        } else if (udf instanceof GenericUDFOPEqualOrGreaterThan
+            || udf instanceof GenericUDFOPEqualOrLessThan || udf instanceof GenericUDFOPGreaterThan
+            || udf instanceof GenericUDFOPLessThan) {
           return numRows / 3;
-        } else {
-          return evaluateExpression(stats, genFunc, aspCtx);
+        } else if (udf instanceof GenericUDFOPNotNull) {
+          long newNumRows = evaluateColEqualsNullExpr(stats, genFunc, aspCtx);
+          return stats.getNumRows() - newNumRows;
+        } else if (udf instanceof GenericUDFOPNull) {
+          return evaluateColEqualsNullExpr(stats, genFunc, aspCtx);
+        } else if (udf instanceof GenericUDFOPAnd || udf instanceof GenericUDFOPOr
+            || udf instanceof GenericUDFOPNot) {
+          return evaluateExpression(stats, genFunc, aspCtx, neededCols);
         }
       }
 
       // worst case
-      return numRows;
+      return numRows / 2;
     }
 
   }
 
   /**
-   * GROUPBY operator changes the number of rows. The number of rows emitted
-   * by GBY operator will be atleast 1 or utmost T(R) (number of rows in relation T)
-   * based on the aggregation. A better estimate can be found if we have column statistics
-   * on the columns that we are grouping on.
+   * GROUPBY operator changes the number of rows. The number of rows emitted by GBY operator will be
+   * atleast 1 or utmost T(R) (number of rows in relation T) based on the aggregation. A better
+   * estimate can be found if we have column statistics on the columns that we are grouping on.
    * <p>
    * Suppose if we are grouping by attributes A,B,C and if statistics for columns A,B,C are
    * available then a better estimate can be found by taking the smaller of product of V(R,[A,B,C])
    * (product of distinct cardinalities of A,B,C) and T(R)/2.
    * <p>
    * T(R) = min (T(R)/2 , V(R,[A,B,C]) ---> [1]
-   *
    * <p>
    * In the presence of grouping sets, map-side GBY will emit more rows depending on the size of
    * grouping set (input rows * size of grouping set). These rows will get reduced because of
@@ -503,27 +523,23 @@ public class StatsRulesProcFactory {
    * T(R) = min(T(R)/2, T(R, GBY(A,B)) + T(R, GBY(A)) + T(R, GBY(B)) + 1))
    * <p>
    * where, GBY(A,B), GBY(B), GBY(B) are the GBY rules mentioned above [1]
-   *
    * <p>
    * If <b>hash-aggregation is disabled</b>, apply the GBY rule [1] and then multiply the result by
    * number of elements in grouping set T(R) = T(R) * length_of_grouping_set. Since we do not know
    * if hash-aggregation is enabled or disabled during compile time, we will assume worst-case i.e,
    * hash-aggregation is disabled
-   *
    * <p>
    * NOTE: The number of rows from map-side GBY operator is dependent on map-side parallelism i.e,
    * number of mappers. The map-side parallelism is expected from hive config
    * "hive.stats.map.parallelism". If the config is not set then default parallelism of 1 will be
    * assumed.
-   *
    * <p>
-   * <i>Worst case:</i> If no column statistics are available, then T(R) = T(R)/2 will be
-   * used as heuristics.
+   * <i>Worst case:</i> If no column statistics are available, then T(R) = T(R)/2 will be used as
+   * heuristics.
    * <p>
    * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in
    * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i>
    * </p>
-   *
    */
   public static class GroupByStatsRule extends DefaultStatsRule implements NodeProcessor {
 
@@ -535,8 +551,8 @@ public class StatsRulesProcFactory {
       Statistics parentStats = parent.getStatistics();
       AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
       HiveConf conf = aspCtx.getConf();
-      int mapSideParallelism = HiveConf.getIntVar(conf,
-          HiveConf.ConfVars.HIVE_STATS_MAP_SIDE_PARALLELISM);
+      int mapSideParallelism =
+          HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_MAP_SIDE_PARALLELISM);
       List<AggregationDesc> aggDesc = gop.getConf().getAggregators();
       Map<String, ExprNodeDesc> colExprMap = gop.getColumnExprMap();
       RowSchema rs = gop.getSchema();
@@ -546,8 +562,8 @@ public class StatsRulesProcFactory {
         if (satisfyPrecondition(parentStats)) {
           stats = parentStats.clone();
 
-          List<ColStatistics> colStats = StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
-              colExprMap, rs);
+          List<ColStatistics> colStats =
+              StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
           stats.setColumnStats(colStats);
           long dvProd = 1;
           long newNumRows = 0;
@@ -563,7 +579,8 @@ public class StatsRulesProcFactory {
             } else {
 
               // partial column statistics on grouping attributes case.
-              // if column statistics on grouping attribute is missing, then assume worst case.
+              // if column statistics on grouping attribute is missing, then
+              // assume worst case.
               // GBY rule will emit half the number of rows if dvProd is 0
               dvProd = 0;
               break;
@@ -574,8 +591,8 @@ public class StatsRulesProcFactory {
           if (gop.getChildOperators().get(0) instanceof ReduceSinkOperator) {
 
             // since we do not know if hash-aggregation will be enabled or disabled
-            // at runtime we will assume that map-side group by does not do any reduction.
-            // hence no group by rule will be applied
+            // at runtime we will assume that map-side group by does not do any
+            // reduction.hence no group by rule will be applied
 
             // map-side grouping set present. if grouping set is present then
             // multiply the number of rows by number of elements in grouping set
@@ -599,13 +616,13 @@ public class StatsRulesProcFactory {
 
               // map side no grouping set
               newNumRows = stats.getNumRows() * mapSideParallelism;
-              updateStats(stats, newNumRows);
+              updateStats(stats, newNumRows, true);
             }
           } else {
 
             // reduce side
             newNumRows = applyGBYRule(stats.getNumRows(), dvProd);
-            updateStats(stats, newNumRows);
+            updateStats(stats, newNumRows, true);
           }
         } else {
           if (parentStats != null) {
@@ -618,7 +635,9 @@ public class StatsRulesProcFactory {
             } else {
 
               // reduce side
-              stats = getWorstCaseStats(parentStats);
+              stats = parentStats.clone();
+              long newNumRows = parentStats.getNumRows() / 2;
+              updateStats(stats, newNumRows, false);
             }
           }
         }
@@ -647,14 +666,19 @@ public class StatsRulesProcFactory {
           stats.addToColumnStats(aggColStats);
 
           // if UDAF present and if column expression map is empty then it must
-          // be full aggregation query like count(*) in which case number of rows will be 1
+          // be full aggregation query like count(*) in which case number of
+          // rows will be 1
           if (colExprMap.isEmpty()) {
             stats.setNumRows(1);
-            updateStats(stats, 1);
+            updateStats(stats, 1, true);
           }
         }
 
         gop.setStatistics(stats);
+
+        if (LOG.isDebugEnabled() && stats != null) {
+          LOG.debug("[0] STATS-" + gop.toString() + ": " + stats.extendedToString());
+        }
       } catch (CloneNotSupportedException e) {
         throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
       }
@@ -683,7 +707,6 @@ public class StatsRulesProcFactory {
    * will have a tuple in R T(RXS) = T(S) (we need histograms for this)</li> <li>Both R & S relation
    * have same value for join-key. Ex: bool column with all true values T(RXS) = T(R) * T(S) (we
    * need histograms for this. counDistinct = 1 and same value)</li>
-   *
    * <p>
    * In the absence of histograms, we can use the following general case
    * <p>
@@ -695,10 +718,11 @@ public class StatsRulesProcFactory {
    * <p>
    * T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join
    * attributes
-   *
    * <p>
-   * <i>Worst case:</i> If no column statistics are available, then T(RXS) = T(R)*T(S)/2 will be
-   * used as heuristics.
+   * <i>Worst case:</i> If no column statistics are available, then T(RXS) = joinFactor * max(T(R),
+   * T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor
+   * hive config. In the worst case, since we do not know any information about join keys (and hence
+   * which of the 3 cases to use), we let it to the user to provide the join factor.
    * <p>
    * <i>For more information, refer 'Estimating The Cost Of Operations' chapter in
    * "Database Systems: The Complete Book" by Garcia-Molina et. al.</i>
@@ -730,16 +754,15 @@ public class StatsRulesProcFactory {
           }
         }
 
-        try {
         if (allSatisfyPreCondition) {
 
-          // statistics object that is combination of statistics from all relations involved in JOIN
+          // statistics object that is combination of statistics from all
+          // relations involved in JOIN
           Statistics stats = new Statistics();
           long prodRows = 1;
           List<Long> distinctVals = Lists.newArrayList();
           boolean multiAttr = false;
 
-
           Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
           Map<Integer, List<String>> joinKeys = Maps.newHashMap();
 
@@ -756,33 +779,34 @@ public class StatsRulesProcFactory {
               multiAttr = true;
             }
 
-            // compute fully qualified join key column names. this name will be used to
-            // quickly look-up for column statistics of join key.
-            // TODO: expressions in join condition will be ignored. assign internal name
-            // for expressions and estimate column statistics for expression.
-            List<String> fqCols = StatsUtils.getFullQualifedColNameFromExprs(keyExprs,
-                parent.getColumnExprMap());
+            // compute fully qualified join key column names. this name will be
+            // used to quickly look-up for column statistics of join key.
+            // TODO: expressions in join condition will be ignored. assign
+            // internal name for expressions and estimate column statistics for expression.
+            List<String> fqCols =
+                StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap());
             joinKeys.put(pos, fqCols);
 
             Map<String, ExprNodeDesc> colExprMap = parent.getColumnExprMap();
             RowSchema rs = parent.getSchema();
 
             // get column statistics for all output columns
-            List<ColStatistics> cs = StatsUtils.getColStatisticsFromExprMap(conf, parentStats,
-                colExprMap, rs);
+            List<ColStatistics> cs =
+                StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
             for (ColStatistics c : cs) {
               if (c != null) {
                 joinedColStats.put(c.getFullyQualifiedColName(), c);
               }
             }
 
-            // since new statistics is derived from all relations involved in JOIN,
-            // we need to update the state information accordingly
+            // since new statistics is derived from all relations involved in
+            // JOIN, we need to update the state information accordingly
             stats.updateColumnStatsState(parentStats.getColumnStatsState());
           }
 
-          // compute denominator i.e, max(V(R,Y), V(S,Y)) in case of single attribute join.
-          // else max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)) in case of multi-attribute join
+          // compute denominator i.e, max(V(R,Y), V(S,Y)) in case of single
+          // attribute join, else max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2))
+          // in case of multi-attribute join
           long denom = 1;
           if (multiAttr) {
             List<Long> perAttrDVs = Lists.newArrayList();
@@ -845,28 +869,45 @@ public class StatsRulesProcFactory {
           stats.setNumRows(newRowCount);
           stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats));
           jop.setStatistics(stats);
-          } else {
 
-            // worst case, when no column statistics are available
-            if (parents.size() > 1) {
-              Statistics wcStats = new Statistics();
-              Statistics stp1 = parents.get(0).getStatistics();
-              long numRows = stp1.getNumRows();
-              long avgRowSize = stp1.getAvgRowSize();
-              for (int i = 1; i < parents.size(); i++) {
-                stp1 = parents.get(i).getStatistics();
-                numRows = (numRows * stp1.getNumRows()) / 2;
-                avgRowSize += stp1.getAvgRowSize();
-              }
-              wcStats.setNumRows(numRows);
-              wcStats.setDataSize(numRows * avgRowSize);
-              jop.setStatistics(wcStats);
-            } else {
-              jop.setStatistics(parents.get(0).getStatistics().clone());
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString());
+          }
+        } else {
+
+          // worst case when there are no column statistics
+          float joinFactor = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_JOIN_FACTOR);
+          int numParents = parents.size();
+          List<Long> parentRows = Lists.newArrayList();
+          List<Long> parentSizes = Lists.newArrayList();
+          int maxRowIdx = 0;
+          long maxRowCount = 0;
+          int idx = 0;
+
+          for (Operator<? extends OperatorDesc> op : parents) {
+            Statistics ps = op.getStatistics();
+            long rowCount = ps.getNumRows();
+            if (rowCount > maxRowCount) {
+              maxRowCount = rowCount;
+              maxRowIdx = idx;
             }
+            parentRows.add(rowCount);
+            parentSizes.add(ps.getDataSize());
+            idx++;
+          }
+
+          long maxDataSize = parentSizes.get(maxRowIdx);
+          long newNumRows = (long) (joinFactor * maxRowCount * (numParents - 1));
+          long newDataSize = (long) (joinFactor * maxDataSize * (numParents - 1));
+
+          Statistics wcStats = new Statistics();
+          wcStats.setNumRows(newNumRows);
+          wcStats.setDataSize(newDataSize);
+          jop.setStatistics(wcStats);
+
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
           }
-        } catch (CloneNotSupportedException e) {
-          throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
         }
       }
       return null;
@@ -874,22 +915,22 @@ public class StatsRulesProcFactory {
 
     private long getDenominator(List<Long> distinctVals) {
 
-      if(distinctVals.isEmpty()) {
+      if (distinctVals.isEmpty()) {
 
-        // TODO: in union20.q the tab alias is not properly propagated down the operator
-        // tree. This happens when UNION ALL is used as sub query. Hence, even if column
-        // statistics are available, the tab alias will be null which will fail to get
-        // proper column statistics. For now assume, worst case in which denominator is 2.
+        // TODO: in union20.q the tab alias is not properly propagated down the
+        // operator tree. This happens when UNION ALL is used as sub query. Hence, even
+        // if column statistics are available, the tab alias will be null which will fail
+        // to get proper column statistics. For now assume, worst case in which
+        // denominator is 2.
         return 2;
       }
 
-      // simple join from 2 relations
-      // denom = max(v1, v2)
+      // simple join from 2 relations: denom = max(v1, v2)
       if (distinctVals.size() <= 2) {
         return Collections.max(distinctVals);
       } else {
 
-        // join from multiple relations
+        // join from multiple relations:
         // denom = max(v1, v2) * max(v2, v3) * max(v3, v4)
         long denom = 1;
         for (int i = 0; i < distinctVals.size() - 1; i++) {
@@ -909,7 +950,6 @@ public class StatsRulesProcFactory {
 
   /**
    * LIMIT operator changes the number of rows and thereby the data size.
-   *
    */
   public static class LimitStatsRule extends DefaultStatsRule implements NodeProcessor {
 
@@ -919,6 +959,8 @@ public class StatsRulesProcFactory {
       LimitOperator lop = (LimitOperator) nd;
       Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
       Statistics parentStats = parent.getStatistics();
+      AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
+      HiveConf conf = aspCtx.getConf();
 
       try {
         long limit = -1;
@@ -927,25 +969,37 @@ public class StatsRulesProcFactory {
         if (satisfyPrecondition(parentStats)) {
           Statistics stats = parentStats.clone();
 
-          // if limit is greater than available rows then do not update statistics
+          // if limit is greater than available rows then do not update
+          // statistics
           if (limit <= parentStats.getNumRows()) {
-            updateStats(stats, limit);
+            updateStats(stats, limit, true);
           }
           lop.setStatistics(stats);
+
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("[0] STATS-" + lop.toString() + ": " + stats.extendedToString());
+          }
         } else {
           if (parentStats != null) {
 
-            // in the absence of column statistics, compute data size based on based
-            // on average row size
+            // in the absence of column statistics, compute data size based on
+            // based on average row size
             Statistics wcStats = parentStats.clone();
             if (limit <= parentStats.getNumRows()) {
               long numRows = limit;
               long avgRowSize = parentStats.getAvgRowSize();
+              if (avgRowSize <= 0) {
+                avgRowSize = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_STATS_AVG_ROW_SIZE);
+              }
               long dataSize = avgRowSize * limit;
               wcStats.setNumRows(numRows);
               wcStats.setDataSize(dataSize);
             }
             lop.setStatistics(wcStats);
+
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("[1] STATS-" + lop.toString() + ": " + wcStats.extendedToString());
+            }
           }
         }
       } catch (CloneNotSupportedException e) {
@@ -958,7 +1012,6 @@ public class StatsRulesProcFactory {
 
   /**
    * Default rule is to aggregate the statistics from all its parent operators.
-   *
    */
   public static class DefaultStatsRule implements NodeProcessor {
 
@@ -973,8 +1026,8 @@ public class StatsRulesProcFactory {
         if (stats == null) {
           if (op.getParentOperators() != null) {
 
-            // if parent statistics is null then that branch of the tree is not walked yet.
-            // don't update the stats until all branches are walked
+            // if parent statistics is null then that branch of the tree is not
+            // walked yet. don't update the stats until all branches are walked
             if (isAllParentsContainStatistics(op)) {
               stats = new Statistics();
               for (Operator<? extends OperatorDesc> parent : op.getParentOperators()) {
@@ -985,6 +1038,10 @@ public class StatsRulesProcFactory {
                   stats.updateColumnStatsState(parentStats.getColumnStatsState());
                   stats.addToColumnStats(parentStats.getColumnStats());
                   op.getConf().setStatistics(stats);
+
+                  if (LOG.isDebugEnabled()) {
+                    LOG.debug("[0] STATS-" + op.toString() + ": " + stats.extendedToString());
+                  }
                 }
               }
             }
@@ -1036,37 +1093,43 @@ public class StatsRulesProcFactory {
 
   /**
    * Update the basic statistics of the statistics object based on the row number
-   *
    * @param stats
    *          - statistics to be updated
    * @param newNumRows
    *          - new number of rows
+   * @param useColStats
+   *          - use column statistics to compute data size
    */
-  static void updateStats(Statistics stats, long newNumRows) {
+  static void updateStats(Statistics stats, long newNumRows, boolean useColStats) {
     long oldRowCount = stats.getNumRows();
     double ratio = (double) newNumRows / (double) oldRowCount;
     stats.setNumRows(newNumRows);
 
-    List<ColStatistics> colStats = stats.getColumnStats();
-    for (ColStatistics cs : colStats) {
-      long oldNumNulls = cs.getNumNulls();
-      long oldDV = cs.getCountDistint();
-      long newNumNulls = Math.round(ratio * oldNumNulls);
-      long newDV = oldDV;
-
-      // if ratio is greater than 1, then number of rows increases. This can happen
-      // when some operators like GROUPBY duplicates the input rows in which case
-      // number of distincts should not change. Update the distinct count only when
-      // the output number of rows is less than input number of rows.
-      if (ratio <= 1.0) {
-        newDV = Math.round(ratio * oldDV);
+    if (useColStats) {
+      List<ColStatistics> colStats = stats.getColumnStats();
+      for (ColStatistics cs : colStats) {
+        long oldNumNulls = cs.getNumNulls();
+        long oldDV = cs.getCountDistint();
+        long newNumNulls = Math.round(ratio * oldNumNulls);
+        long newDV = oldDV;
+
+        // if ratio is greater than 1, then number of rows increases. This can happen
+        // when some operators like GROUPBY duplicates the input rows in which case
+        // number of distincts should not change. Update the distinct count only when
+        // the output number of rows is less than input number of rows.
+        if (ratio <= 1.0) {
+          newDV = Math.round(ratio * oldDV);
+        }
+        cs.setNumNulls(newNumNulls);
+        cs.setCountDistint(newDV);
       }
-      cs.setNumNulls(newNumNulls);
-      cs.setCountDistint(newDV);
+      stats.setColumnStats(colStats);
+      long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats);
+      stats.setDataSize(newDataSize);
+    } else {
+      long newDataSize = (long) (ratio * stats.getDataSize());
+      stats.setDataSize(newDataSize);
     }
-    stats.setColumnStats(colStats);
-    long newDataSize = StatsUtils.getDataSizeFromColumnStats(newNumRows, colStats);
-    stats.setDataSize(newDataSize);
   }
 
   static boolean satisfyPrecondition(Statistics stats) {
@@ -1074,16 +1137,4 @@ public class StatsRulesProcFactory {
         && !stats.getColumnStatsState().equals(Statistics.State.NONE);
   }
 
-  static Statistics getWorstCaseStats(Statistics stats) throws CloneNotSupportedException {
-    Statistics wcClone = stats.clone();
-    long numRows = wcClone.getNumRows() / 2;
-    long dataSize = wcClone.getDataSize() / 2;
-    long avgRowSize = wcClone.getAvgRowSize();
-    if (numRows > 0) {
-      dataSize = avgRowSize * numRows;
-    }
-    wcClone.setNumRows(numRows);
-    wcClone.setDataSize(dataSize);
-    return wcClone;
-  }
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java Fri Dec 13 10:56:38 2013
@@ -39,6 +39,7 @@ import org.antlr.runtime.tree.Tree;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.ql.Context;
@@ -67,6 +68,7 @@ import org.apache.hadoop.hive.ql.optimiz
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.PlanUtils;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -77,7 +79,6 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.mapred.SequenceFileInputFormat;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
 import org.apache.hadoop.mapred.TextInputFormat;
-import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -1134,48 +1135,53 @@ public abstract class BaseSemanticAnalyz
     return storedAsDirs;
   }
 
-  private static void getPartExprNodeDesc(ASTNode astNode,
-      Map<ASTNode, ExprNodeDesc> astExprNodeMap)
-          throws SemanticException, HiveException {
+  private static boolean getPartExprNodeDesc(ASTNode astNode,
+      Map<ASTNode, ExprNodeDesc> astExprNodeMap) throws SemanticException {
 
-    if ((astNode == null) || (astNode.getChildren() == null) || 
-        (astNode.getChildren().size() == 0)) {
-      return;
+    if (astNode == null) {
+      return true;
+    } else if ((astNode.getChildren() == null) || (astNode.getChildren().size() == 0)) {
+      return astNode.getType() != HiveParser.TOK_PARTVAL;
     }
 
     TypeCheckCtx typeCheckCtx = new TypeCheckCtx(null);
+    boolean result = true;
     for (Node childNode : astNode.getChildren()) {
       ASTNode childASTNode = (ASTNode)childNode;
 
       if (childASTNode.getType() != HiveParser.TOK_PARTVAL) {
-        getPartExprNodeDesc(childASTNode, astExprNodeMap);
+        result = getPartExprNodeDesc(childASTNode, astExprNodeMap) && result;
       } else {
-        if (childASTNode.getChildren().size() <= 1) {
-          throw new HiveException("This is dynamic partitioning");
+        boolean isDynamicPart = childASTNode.getChildren().size() <= 1;
+        result = !isDynamicPart && result;
+        if (!isDynamicPart) {
+          ASTNode partVal = (ASTNode)childASTNode.getChildren().get(1);
+          astExprNodeMap.put((ASTNode)childASTNode.getChildren().get(0),
+            TypeCheckProcFactory.genExprNode(partVal, typeCheckCtx).get(partVal));
         }
-
-        ASTNode partValASTChild = (ASTNode)childASTNode.getChildren().get(1);
-        astExprNodeMap.put((ASTNode)childASTNode.getChildren().get(0),
-            TypeCheckProcFactory.genExprNode(partValASTChild, typeCheckCtx).get(partValASTChild));
       }
     }
+    return result;
   }
 
   public static void validatePartSpec(Table tbl, Map<String, String> partSpec,
       ASTNode astNode, HiveConf conf) throws SemanticException {
-    Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
-
-    Utilities.validatePartSpec(tbl, partSpec);
+    Utilities.validatePartSpecColumnNames(tbl, partSpec);
 
     if (!HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_TYPE_CHECK_ON_INSERT)) {
       return;
     }
 
-    try {
-      getPartExprNodeDesc(astNode, astExprNodeMap);
-    } catch (HiveException e) {
-      return;
+    Map<ASTNode, ExprNodeDesc> astExprNodeMap = new HashMap<ASTNode, ExprNodeDesc>();
+    if (!getPartExprNodeDesc(astNode, astExprNodeMap)) {
+      STATIC_LOG.warn("Dynamic partitioning is used; only validating "
+          + astExprNodeMap.size() + " columns");
     }
+
+    if (astExprNodeMap.isEmpty()) {
+      return; // All columns are dynamic, nothing to do.
+    }
+
     List<FieldSchema> parts = tbl.getPartitionKeys();
     Map<String, String> partCols = new HashMap<String, String>(parts.size());
     for (FieldSchema col : parts) {
@@ -1243,4 +1249,84 @@ public abstract class BaseSemanticAnalyz
     }
     return partitionDateFormat.format(value);
   }
+
+  protected Database getDatabase(String dbName) throws SemanticException {
+    return getDatabase(dbName, true);
+  }
+
+  protected Database getDatabase(String dbName, boolean throwException) throws SemanticException {
+    try {
+      Database database = db.getDatabase(dbName);
+      if (database == null && throwException) {
+        throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(dbName));
+      }
+      return database;
+    } catch (HiveException e) {
+      throw new SemanticException(ErrorMsg.DATABASE_NOT_EXISTS.getMsg(dbName), e);
+    }
+  }
+
+  protected Table getTable(String tblName) throws SemanticException {
+    return getTable(null, tblName, true);
+  }
+
+  protected Table getTable(String tblName, boolean throwException) throws SemanticException {
+    String currentDb = SessionState.get().getCurrentDatabase();
+    return getTable(currentDb, tblName, throwException);
+  }
+
+  // qnName : possibly contains database name (dot seperated)
+  protected Table getTableWithQN(String qnName, boolean throwException) throws SemanticException {
+    int dot = qnName.indexOf('.');
+    if (dot < 0) {
+      String currentDb = SessionState.get().getCurrentDatabase();
+      return getTable(currentDb, qnName, throwException);
+    }
+    return getTable(qnName.substring(0, dot), qnName.substring(dot + 1), throwException);
+  }
+
+  protected Table getTable(String database, String tblName, boolean throwException)
+      throws SemanticException {
+    try {
+      Table tab = database == null ? db.getTable(tblName, false)
+          : db.getTable(database, tblName, false);
+      if (tab == null && throwException) {
+        throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tblName));
+      }
+      return tab;
+    } catch (HiveException e) {
+      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tblName), e);
+    }
+  }
+
+  protected Partition getPartition(Table table, Map<String, String> partSpec,
+      boolean throwException) throws SemanticException {
+    try {
+      Partition partition = db.getPartition(table, partSpec, false);
+      if (partition == null && throwException) {
+        throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec));
+      }
+      return partition;
+    } catch (HiveException e) {
+      throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec), e);
+    }
+  }
+
+  protected List<Partition> getPartitions(Table table, Map<String, String> partSpec,
+      boolean throwException) throws SemanticException {
+    try {
+      List<Partition> partitions = partSpec == null ? db.getPartitions(table) :
+          db.getPartitions(table, partSpec);
+      if (partitions.isEmpty() && throwException) {
+        throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec));
+      }
+      return partitions;
+    } catch (HiveException e) {
+      throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec), e);
+    }
+  }
+
+  protected String toMessage(ErrorMsg message, Object detail) {
+    return detail == null ? message.getMsg() : message.getMsg(detail.toString());
+  }
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Fri Dec 13 10:56:38 2013
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Index;
 import org.apache.hadoop.hive.metastore.api.MetaException;
@@ -95,6 +96,7 @@ import org.apache.hadoop.hive.ql.plan.Gr
 import org.apache.hadoop.hive.ql.plan.GrantRevokeRoleDDL;
 import org.apache.hadoop.hive.ql.plan.ListBucketingCtx;
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
+import org.apache.hadoop.hive.ql.plan.LockDatabaseDesc;
 import org.apache.hadoop.hive.ql.plan.LockTableDesc;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
 import org.apache.hadoop.hive.ql.plan.MsckDesc;
@@ -121,6 +123,7 @@ import org.apache.hadoop.hive.ql.plan.St
 import org.apache.hadoop.hive.ql.plan.SwitchDatabaseDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.ql.plan.TruncateTableDesc;
+import org.apache.hadoop.hive.ql.plan.UnlockDatabaseDesc;
 import org.apache.hadoop.hive.ql.plan.UnlockTableDesc;
 import org.apache.hadoop.hive.ql.security.authorization.Privilege;
 import org.apache.hadoop.hive.ql.security.authorization.PrivilegeRegistry;
@@ -298,6 +301,10 @@ public class DDLSemanticAnalyzer extends
       ctx.setResFile(new Path(ctx.getLocalTmpFileURI()));
       analyzeShowLocks(ast);
       break;
+    case HiveParser.TOK_SHOWDBLOCKS:
+      ctx.setResFile(new Path(ctx.getLocalTmpFileURI()));
+      analyzeShowDbLocks(ast);
+      break;
     case HiveParser.TOK_DESCFUNCTION:
       ctx.setResFile(new Path(ctx.getLocalTmpFileURI()));
       analyzeDescFunction(ast);
@@ -394,6 +401,12 @@ public class DDLSemanticAnalyzer extends
     case HiveParser.TOK_UNLOCKTABLE:
       analyzeUnlockTable(ast);
       break;
+    case HiveParser.TOK_LOCKDB:
+      analyzeLockDatabase(ast);
+      break;
+    case HiveParser.TOK_UNLOCKDB:
+      analyzeUnlockDatabase(ast);
+      break;
     case HiveParser.TOK_CREATEDATABASE:
       analyzeCreateDatabase(ast);
       break;
@@ -809,6 +822,14 @@ public class DDLSemanticAnalyzer extends
       ifCascade = true;
     }
 
+    Database database = getDatabase(dbName, !ifExists);
+    if (database == null) {
+      return;
+    }
+
+    inputs.add(new ReadEntity(database));
+    outputs.add(new WriteEntity(database));
+
     DropDatabaseDesc dropDatabaseDesc = new DropDatabaseDesc(dbName, ifExists, ifCascade);
     rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), dropDatabaseDesc), conf));
   }
@@ -2274,6 +2295,29 @@ public class DDLSemanticAnalyzer extends
     ctx.setNeedLockMgr(true);
   }
 
+   /**
+    * Add the task according to the parsed command tree. This is used for the CLI
+   * command "SHOW LOCKS DATABASE database [extended];".
+   *
+   * @param ast
+   *          The parsed command tree.
+   * @throws SemanticException
+   *           Parsing failed
+   */
+  private void analyzeShowDbLocks(ASTNode ast) throws SemanticException {
+    boolean isExtended = (ast.getChildCount() > 1);
+    String dbName = stripQuotes(ast.getChild(0).getText());
+
+    ShowLocksDesc showLocksDesc = new ShowLocksDesc(ctx.getResFile(), dbName,
+                                                    isExtended);
+    rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+        showLocksDesc), conf));
+    setFetchTask(createFetchTask(showLocksDesc.getSchema()));
+
+    // Need to initialize the lock manager
+    ctx.setNeedLockMgr(true);
+  }
+
   /**
    * Add the task according to the parsed command tree. This is used for the CLI
    * command "LOCK TABLE ..;".
@@ -2335,6 +2379,30 @@ public class DDLSemanticAnalyzer extends
     ctx.setNeedLockMgr(true);
   }
 
+  private void analyzeLockDatabase(ASTNode ast) throws SemanticException {
+    String dbName = unescapeIdentifier(ast.getChild(0).getText());
+    String mode  = unescapeIdentifier(ast.getChild(1).getText().toUpperCase());
+
+    //inputs.add(new ReadEntity(dbName));
+    //outputs.add(new WriteEntity(dbName));
+    LockDatabaseDesc lockDatabaseDesc = new LockDatabaseDesc(dbName, mode,
+                        HiveConf.getVar(conf, ConfVars.HIVEQUERYID));
+    lockDatabaseDesc.setQueryStr(ctx.getCmd());
+    DDLWork work = new DDLWork(getInputs(), getOutputs(), lockDatabaseDesc);
+    rootTasks.add(TaskFactory.get(work, conf));
+    ctx.setNeedLockMgr(true);
+  }
+
+  private void analyzeUnlockDatabase(ASTNode ast) throws SemanticException {
+    String dbName = unescapeIdentifier(ast.getChild(0).getText());
+
+    UnlockDatabaseDesc unlockDatabaseDesc = new UnlockDatabaseDesc(dbName);
+    DDLWork work = new DDLWork(getInputs(), getOutputs(), unlockDatabaseDesc);
+    rootTasks.add(TaskFactory.get(work, conf));
+    // Need to initialize the lock manager
+    ctx.setNeedLockMgr(true);
+  }
+
   /**
    * Add the task according to the parsed command tree. This is used for the CLI
    * command "DESCRIBE FUNCTION;".
@@ -2531,7 +2599,7 @@ public class DDLSemanticAnalyzer extends
 
     // check if table exists.
     try {
-      tab = db.getTable(SessionState.get().getCurrentDatabase(), tblName, true);
+      tab = getTable(tblName, true);
       inputs.add(new ReadEntity(tab));
     } catch (HiveException e) {
       throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tblName));
@@ -3280,57 +3348,4 @@ public class DDLSemanticAnalyzer extends
       throw new SemanticException(e);
     }
   }
-
-  private Table getTable(String tblName) throws SemanticException {
-    return getTable(null, tblName, true);
-  }
-
-  private Table getTable(String tblName, boolean throwException) throws SemanticException {
-    return getTable(SessionState.get().getCurrentDatabase(), tblName, throwException);
-  }
-
-  private Table getTable(String database, String tblName, boolean throwException)
-      throws SemanticException {
-    try {
-      Table tab = database == null ? db.getTable(tblName, false)
-          : db.getTable(database, tblName, false);
-      if (tab == null && throwException) {
-        throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tblName));
-      }
-      return tab;
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tblName));
-    }
-  }
-
-  private Partition getPartition(Table table, Map<String, String> partSpec, boolean throwException)
-      throws SemanticException {
-    try {
-      Partition partition = db.getPartition(table, partSpec, false);
-      if (partition == null && throwException) {
-        throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec));
-      }
-      return partition;
-    } catch (HiveException e) {
-      throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec), e);
-    }
-  }
-
-  private List<Partition> getPartitions(Table table, Map<String, String> partSpec,
-      boolean throwException) throws SemanticException {
-    try {
-      List<Partition> partitions = partSpec == null ? db.getPartitions(table) :
-          db.getPartitions(table, partSpec);
-      if (partitions.isEmpty() && throwException) {
-        throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec));
-      }
-      return partitions;
-    } catch (HiveException e) {
-      throw new SemanticException(toMessage(ErrorMsg.INVALID_PARTITION, partSpec), e);
-    }
-  }
-
-  private String toMessage(ErrorMsg message, Object detail) {
-    return detail == null ? message.getMsg() : message.getMsg(detail.toString());
-  }
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/ExportSemanticAnalyzer.java Fri Dec 13 10:56:38 2013
@@ -101,10 +101,12 @@ public class ExportSemanticAnalyzer exte
               .getMsg("Exception while writing out the local file"), e);
     }
 
+    Path parentPath = new Path(toURI);
+
     if (ts.tableHandle.isPartitioned()) {
       for (Partition partition : partitions) {
         URI fromURI = partition.getDataLocation();
-        Path toPartPath = new Path(toURI.toString(), partition.getName());
+        Path toPartPath = new Path(parentPath, partition.getName());
         Task<? extends Serializable> rTask = TaskFactory.get(
             new CopyWork(fromURI.toString(), toPartPath.toString(), false),
             conf);
@@ -113,13 +115,12 @@ public class ExportSemanticAnalyzer exte
       }
     } else {
       URI fromURI = ts.tableHandle.getDataLocation();
-      Path toDataPath = new Path(toURI.toString(), "data");
+      Path toDataPath = new Path(parentPath, "data");
       Task<? extends Serializable> rTask = TaskFactory.get(new CopyWork(
           fromURI.toString(), toDataPath.toString(), false), conf);
       rootTasks.add(rTask);
       inputs.add(new ReadEntity(ts.tableHandle));
     }
-    outputs.add(new WriteEntity(toURI.toString(),
-        toURI.getScheme().equals("hdfs") ? true : false));
+    outputs.add(new WriteEntity(parentPath, toURI.getScheme().equals("hdfs")));
   }
 }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Dec 13 10:56:38 2013
@@ -164,6 +164,8 @@ TOK_SHOW_TBLPROPERTIES;
 TOK_SHOWLOCKS;
 TOK_LOCKTABLE;
 TOK_UNLOCKTABLE;
+TOK_LOCKDB;
+TOK_UNLOCKDB;
 TOK_SWITCHDATABASE;
 TOK_DROPDATABASE;
 TOK_DROPTABLE;
@@ -273,6 +275,7 @@ TOK_GRANT_ROLE;
 TOK_REVOKE_ROLE;
 TOK_SHOW_ROLE_GRANT;
 TOK_SHOWINDEXES;
+TOK_SHOWDBLOCKS;
 TOK_INDEXCOMMENT;
 TOK_DESCDATABASE;
 TOK_DATABASEPROPERTIES;
@@ -624,6 +627,8 @@ ddlStatement
     | analyzeStatement
     | lockStatement
     | unlockStatement
+    | lockDatabase
+    | unlockDatabase
     | createRoleStatement
     | dropRoleStatement
     | grantPrivileges
@@ -1237,6 +1242,7 @@ showStatement
     -> ^(TOK_SHOW_TABLESTATUS showStmtIdentifier $db_name? partitionSpec?)
     | KW_SHOW KW_TBLPROPERTIES tblName=identifier (LPAREN prptyName=StringLiteral RPAREN)? -> ^(TOK_SHOW_TBLPROPERTIES $tblName $prptyName?)
     | KW_SHOW KW_LOCKS (parttype=partTypeExpr)? (isExtended=KW_EXTENDED)? -> ^(TOK_SHOWLOCKS $parttype? $isExtended?)
+    | KW_SHOW KW_LOCKS KW_DATABASE (dbName=Identifier) (isExtended=KW_EXTENDED)? -> ^(TOK_SHOWDBLOCKS $dbName $isExtended?)
     | KW_SHOW (showOptions=KW_FORMATTED)? (KW_INDEX|KW_INDEXES) KW_ON showStmtIdentifier ((KW_FROM|KW_IN) db_name=identifier)?
     -> ^(TOK_SHOWINDEXES showStmtIdentifier $showOptions? $db_name?)
     ;
@@ -1247,6 +1253,12 @@ lockStatement
     : KW_LOCK KW_TABLE tableName partitionSpec? lockMode -> ^(TOK_LOCKTABLE tableName lockMode partitionSpec?)
     ;
 
+lockDatabase
+@init { msgs.push("lock database statement"); }
+@after { msgs.pop(); }
+    : KW_LOCK KW_DATABASE (dbName=Identifier) lockMode -> ^(TOK_LOCKDB $dbName lockMode)
+    ;
+
 lockMode
 @init { msgs.push("lock mode"); }
 @after { msgs.pop(); }
@@ -1259,6 +1271,12 @@ unlockStatement
     : KW_UNLOCK KW_TABLE tableName partitionSpec?  -> ^(TOK_UNLOCKTABLE tableName partitionSpec?)
     ;
 
+unlockDatabase
+@init { msgs.push("unlock database statement"); }
+@after { msgs.pop(); }
+    : KW_UNLOCK KW_DATABASE (dbName=Identifier) -> ^(TOK_UNLOCKDB $dbName)
+    ;
+
 createRoleStatement
 @init { msgs.push("create role"); }
 @after { msgs.pop(); }

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java Fri Dec 13 10:56:38 2013
@@ -10,14 +10,15 @@ import org.apache.hadoop.hive.ql.ErrorMs
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
 import org.apache.hadoop.hive.ql.lib.Node;
 import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
 import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 
-public class QBSubQuery {
-
+public class QBSubQuery implements ISubQueryJoinInfo {
+  
   public static enum SubQueryType {
     EXISTS,
     NOT_EXISTS,
@@ -332,6 +333,83 @@ public class QBSubQuery {
 
   }
 
+  /*
+   * When transforming a Not In SubQuery we need to check for nulls in the 
+   * Joining expressions of the SubQuery. If there are nulls then the SubQuery always
+   * return false. For more details see 
+   * https://issues.apache.org/jira/secure/attachment/12614003/SubQuerySpec.pdf
+   * 
+   * Basically, SQL semantics say that:
+   * - R1.A not in (null, 1, 2, ...)
+   *   is always false. 
+   *   A 'not in' operator is equivalent to a '<> all'. Since a not equal check with null 
+   *   returns false, a not in predicate against aset with a 'null' value always returns false.
+   *   
+   * So for not in SubQuery predicates:
+   * - we join in a null count predicate.
+   * - And the joining condition is that the 'Null Count' query has a count of 0.
+   *   
+   */
+  class NotInCheck implements ISubQueryJoinInfo {
+    
+    private static final String CNT_ALIAS = "c1";
+    
+    /*
+     * expressions in SubQ that are joined to the Outer Query.
+     */
+    List<ASTNode> subQryCorrExprs;
+    
+    /*
+     * row resolver of the SubQuery.
+     * Set by the SemanticAnalyzer after the Plan for the SubQuery is genned.
+     * This is neede in case the SubQuery select list contains a TOK_ALLCOLREF
+     */
+    RowResolver sqRR;
+    
+    NotInCheck() {
+      subQryCorrExprs = new ArrayList<ASTNode>();
+    }
+    
+    void addCorrExpr(ASTNode corrExpr) {
+      subQryCorrExprs.add(corrExpr);
+    }
+    
+    public ASTNode getSubQueryAST() {
+      return SubQueryUtils.buildNotInNullCheckQuery(
+          QBSubQuery.this.getSubQueryAST(), 
+          QBSubQuery.this.getAlias(), 
+          CNT_ALIAS, 
+          subQryCorrExprs,
+          sqRR);
+    }
+    
+    public String getAlias() {
+      return QBSubQuery.this.getAlias() + "_notin_nullcheck";
+    }
+    
+    public JoinType getJoinType() {
+      return JoinType.LEFTSEMI;
+    }
+    
+    public ASTNode getJoinConditionAST() {
+      return 
+          SubQueryUtils.buildNotInNullJoinCond(getAlias(), CNT_ALIAS);
+    }
+    
+    public QBSubQuery getSubQuery() {
+      return QBSubQuery.this;
+    }
+    
+    public String getOuterQueryId() {
+      return QBSubQuery.this.getOuterQueryId();
+    }
+    
+    void setSQRR(RowResolver sqRR) {
+      this.sqRR = sqRR;
+    }
+        
+  }
+  
   private final String outerQueryId;
   private final int sqIdx;
   private final String alias;
@@ -355,6 +433,8 @@ public class QBSubQuery {
   private boolean groupbyAddedToSQ;
   
   private int numOuterCorrExprsForHaving;
+  
+  private NotInCheck notInCheck;
 
   public QBSubQuery(String outerQueryId,
       int sqIdx,
@@ -377,6 +457,10 @@ public class QBSubQuery {
     originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST);
     numOfCorrelationExprsAddedToSQSelect = 0;
     groupbyAddedToSQ = false;
+    
+    if ( operator.getType() == SubQueryType.NOT_IN ) {
+      notInCheck = new NotInCheck();
+    }
   }
 
   public ASTNode getSubQueryAST() {
@@ -655,6 +739,9 @@ public class QBSubQuery {
             ASTNode gBy = getSubQueryGroupByAST();
             SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr());
           }
+          if ( notInCheck != null ) {
+            notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(0));
+          }
         } else {
           if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) {
             rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, 
@@ -671,6 +758,9 @@ public class QBSubQuery {
             ASTNode gBy = getSubQueryGroupByAST();
             SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getRightExpr());
           }
+          if ( notInCheck != null ) {
+            notInCheck.addCorrExpr((ASTNode)conjunctAST.getChild(1));
+          }
         }
       } else {
         sqNewSearchCond = SubQueryUtils.andAST(sqNewSearchCond, conjunctAST);
@@ -746,6 +836,14 @@ public class QBSubQuery {
     return numOfCorrelationExprsAddedToSQSelect;
   }
   
+  public QBSubQuery getSubQuery() {
+    return this;
+  }
+  
+  NotInCheck getNotInCheck() {
+    return notInCheck;
+  }
+  
   private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode,
       boolean refersLeft,
       String outerQueryAlias,

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Fri Dec 13 10:56:38 2013
@@ -101,6 +101,7 @@ import org.apache.hadoop.hive.ql.parse.P
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitionedTableFunctionSpec;
 import org.apache.hadoop.hive.ql.parse.PTFInvocationSpec.PartitioningSpec;
 import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryType;
+import org.apache.hadoop.hive.ql.parse.SubQueryUtils.ISubQueryJoinInfo;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.BoundarySpec;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.CurrentRowSpec;
 import org.apache.hadoop.hive.ql.parse.WindowingSpec.Direction;
@@ -1922,6 +1923,17 @@ public class SemanticAnalyzer extends Ba
     output = putOpInsertMap(output, inputRR);
     return output;
   }
+  
+  private Operator genPlanForSubQueryPredicate(
+      QB qbSQ,
+      ISubQueryJoinInfo subQueryPredicate) throws SemanticException {
+    qbSQ.setSubQueryDef(subQueryPredicate.getSubQuery());
+    Phase1Ctx ctx_1 = initPhase1Ctx();
+    doPhase1(subQueryPredicate.getSubQueryAST(), qbSQ, ctx_1);
+    getMetaData(qbSQ);
+    Operator op = genPlan(qbSQ);
+    return op;
+  }
 
   @SuppressWarnings("nls")
   private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input,
@@ -2010,11 +2022,7 @@ public class SemanticAnalyzer extends Ba
         subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias);
 
         QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true);
-        qbSQ.setSubQueryDef(subQuery);
-        Phase1Ctx ctx_1 = initPhase1Ctx();
-        doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1);
-        getMetaData(qbSQ);
-        Operator sqPlanTopOp = genPlan(qbSQ);
+        Operator sqPlanTopOp = genPlanForSubQueryPredicate(qbSQ, subQuery);
         aliasToOpInfo.put(subQuery.getAlias(), sqPlanTopOp);
         RowResolver sqRR = opParseCtx.get(sqPlanTopOp).getRowResolver();
 
@@ -2029,6 +2037,27 @@ public class SemanticAnalyzer extends Ba
           throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg(
               subQueryAST, "SubQuery can contain only 1 item in Select List."));
         }
+        
+        /*
+         * If this is a Not In SubQuery Predicate then Join in the Null Check SubQuery.
+         * See QBSubQuery.NotInCheck for details on why and how this is constructed.
+         */
+        if ( subQuery.getNotInCheck() != null ) {
+          QBSubQuery.NotInCheck notInCheck = subQuery.getNotInCheck();
+          notInCheck.setSQRR(sqRR);
+          QB qbSQ_nic = new QB(subQuery.getOuterQueryId(), notInCheck.getAlias(), true);
+          Operator sqnicPlanTopOp = genPlanForSubQueryPredicate(qbSQ_nic, notInCheck);
+          aliasToOpInfo.put(notInCheck.getAlias(), sqnicPlanTopOp);
+          QBJoinTree joinTree_nic = genSQJoinTree(qb, notInCheck,
+              input,
+              aliasToOpInfo);
+          pushJoinFilters(qb, joinTree_nic, aliasToOpInfo, false);
+          input = genJoinOperator(qbSQ_nic, joinTree_nic, aliasToOpInfo, input);
+          inputRR = opParseCtx.get(input).getRowResolver();
+          if ( forHavingClause ) {
+            aliasToOpInfo.put(havingInputAlias, input);
+          }
+        }
 
         /*
          * Gen Join between outer Operator and SQ op
@@ -5267,7 +5296,7 @@ public class SemanticAnalyzer extends Ba
         }
         dpCtx = qbm.getDPCtx(dest);
         if (dpCtx == null) {
-          Utilities.validatePartSpec(dest_tab, partSpec);
+          Utilities.validatePartSpecColumnNames(dest_tab, partSpec);
           dpCtx = new DynamicPartitionCtx(dest_tab, partSpec,
               conf.getVar(HiveConf.ConfVars.DEFAULTPARTITIONNAME),
               conf.getIntVar(HiveConf.ConfVars.DYNAMICPARTITIONMAXPARTSPERNODE));
@@ -5535,7 +5564,7 @@ public class SemanticAnalyzer extends Ba
         table_desc = PlanUtils.getTableDesc(tblDesc, cols, colTypes);
       }
 
-      if (!outputs.add(new WriteEntity(destStr, !isDfsDir))) {
+      if (!outputs.add(new WriteEntity(dest_path, !isDfsDir))) {
         throw new SemanticException(ErrorMsg.OUTPUT_SPECIFIED_MULTIPLE_TIMES
             .getMsg(destStr));
       }
@@ -6760,7 +6789,7 @@ public class SemanticAnalyzer extends Ba
    * Given this information, once we initialize the QBJoinTree, we call the 'parseJoinCondition'
    * method to validate and parse Join conditions.
    */
-  private QBJoinTree genSQJoinTree(QB qb, QBSubQuery subQuery,
+  private QBJoinTree genSQJoinTree(QB qb, ISubQueryJoinInfo subQuery,
       Operator joiningOp,
       Map<String, Operator> aliasToOpInfo)
           throws SemanticException {
@@ -8513,14 +8542,14 @@ public class SemanticAnalyzer extends Ba
       // Theoretically the key prefix could be any unique string shared
       // between TableScanOperator (when publishing) and StatsTask (when aggregating).
       // Here we use
-      // table_name + partitionSec
+      // db_name.table_name + partitionSec
       // as the prefix for easy of read during explain and debugging.
       // Currently, partition spec can only be static partition.
       String k = tblName + Path.SEPARATOR;
-      tsDesc.setStatsAggPrefix(k);
+      tsDesc.setStatsAggPrefix(tab.getDbName()+"."+k);
 
       // set up WritenEntity for replication
-      outputs.add(new WriteEntity(tab, true));
+      outputs.add(new WriteEntity(tab));
 
       // add WriteEntity for each matching partition
       if (tab.isPartitioned()) {
@@ -8531,7 +8560,7 @@ public class SemanticAnalyzer extends Ba
         if (partitions != null) {
           for (Partition partn : partitions) {
             // inputs.add(new ReadEntity(partn)); // is this needed at all?
-            outputs.add(new WriteEntity(partn, true));
+            outputs.add(new WriteEntity(partn));
           }
         }
       }
@@ -9518,9 +9547,7 @@ public class SemanticAnalyzer extends Ba
     // check for existence of table
     if (ifNotExists) {
       try {
-        Table table = db.getTable(tableName, false); // use getTable(final String tableName, boolean
-                                                     // throwException) which doesn't throw
-                                                     // exception but null if table doesn't exist
+        Table table = getTableWithQN(tableName, false);
         if (table != null) { // table exists
           return null;
         }
@@ -9674,7 +9701,7 @@ public class SemanticAnalyzer extends Ba
   private void validateCreateView(CreateViewDesc createVwDesc)
     throws SemanticException {
     try {
-      Table oldView = db.getTable(createVwDesc.getViewName(), false);
+      Table oldView = getTableWithQN(createVwDesc.getViewName(), false);
 
       // ALTER VIEW AS SELECT requires the view must exist
       if (createVwDesc.getIsAlterViewAs() && oldView == null) {

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Fri Dec 13 10:56:38 2013
@@ -68,6 +68,7 @@ public final class SemanticAnalyzerFacto
     commandType.put(HiveParser.TOK_SHOWINDEXES, HiveOperation.SHOWINDEXES);
     commandType.put(HiveParser.TOK_SHOWPARTITIONS, HiveOperation.SHOWPARTITIONS);
     commandType.put(HiveParser.TOK_SHOWLOCKS, HiveOperation.SHOWLOCKS);
+    commandType.put(HiveParser.TOK_SHOWDBLOCKS, HiveOperation.SHOWLOCKS);
     commandType.put(HiveParser.TOK_CREATEFUNCTION, HiveOperation.CREATEFUNCTION);
     commandType.put(HiveParser.TOK_DROPFUNCTION, HiveOperation.DROPFUNCTION);
     commandType.put(HiveParser.TOK_CREATEMACRO, HiveOperation.CREATEMACRO);
@@ -85,6 +86,8 @@ public final class SemanticAnalyzerFacto
     commandType.put(HiveParser.TOK_QUERY, HiveOperation.QUERY);
     commandType.put(HiveParser.TOK_LOCKTABLE, HiveOperation.LOCKTABLE);
     commandType.put(HiveParser.TOK_UNLOCKTABLE, HiveOperation.UNLOCKTABLE);
+    commandType.put(HiveParser.TOK_LOCKDB, HiveOperation.LOCKDB);
+    commandType.put(HiveParser.TOK_UNLOCKDB, HiveOperation.UNLOCKDB);
     commandType.put(HiveParser.TOK_CREATEROLE, HiveOperation.CREATEROLE);
     commandType.put(HiveParser.TOK_DROPROLE, HiveOperation.DROPROLE);
     commandType.put(HiveParser.TOK_GRANT, HiveOperation.GRANT_PRIVILEGE);
@@ -185,6 +188,7 @@ public final class SemanticAnalyzerFacto
       case HiveParser.TOK_SHOWPARTITIONS:
       case HiveParser.TOK_SHOWINDEXES:
       case HiveParser.TOK_SHOWLOCKS:
+      case HiveParser.TOK_SHOWDBLOCKS:
       case HiveParser.TOK_CREATEINDEX:
       case HiveParser.TOK_DROPINDEX:
       case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT:
@@ -194,6 +198,8 @@ public final class SemanticAnalyzerFacto
       case HiveParser.TOK_ALTERTABLE_ALTERPARTS:
       case HiveParser.TOK_LOCKTABLE:
       case HiveParser.TOK_UNLOCKTABLE:
+      case HiveParser.TOK_LOCKDB:
+      case HiveParser.TOK_UNLOCKDB:
       case HiveParser.TOK_CREATEROLE:
       case HiveParser.TOK_DROPROLE:
       case HiveParser.TOK_GRANT:

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java Fri Dec 13 10:56:38 2013
@@ -410,6 +410,182 @@ public class SubQueryUtils {
     }
   }
 
+  /*
+   * Set of functions to create the Null Check Query for Not-In SubQuery predicates.
+   * For a SubQuery predicate like:
+   *   a not in (select b from R2 where R2.y > 5)
+   * The Not In null check query is:
+   *   (select count(*) as c from R2 where R2.y > 5 and b is null)
+   * This Subquery is joined with the Outer Query plan on the join condition 'c = 0'.
+   * The join condition ensures that in case there are null values in the joining column
+   * the Query returns no rows.
+   * 
+   * The AST tree for this is:
+   * 
+   * ^(TOK_QUERY
+   *    ^(TOK FROM
+   *        ^(TOK_SUBQUERY
+   *            {the input SubQuery, with correlation removed}
+   *            subQueryAlias 
+   *          ) 
+   *     )
+   *     ^(TOK_INSERT
+   *         ^(TOK_DESTINATION...)
+   *         ^(TOK_SELECT
+   *             ^(TOK_SELECTEXPR {ast tree for count *}
+   *          )
+   *          ^(TOK_WHERE
+   *             {is null check for joining column} 
+   *           )
+   *      )
+   * )
+   */  
+  static ASTNode buildNotInNullCheckQuery(ASTNode subQueryAST, 
+      String subQueryAlias, 
+      String cntAlias,
+      List<ASTNode> corrExprs,
+      RowResolver sqRR) {
+    
+    subQueryAST = (ASTNode) ParseDriver.adaptor.dupTree(subQueryAST);
+    ASTNode qry = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_QUERY, "TOK_QUERY");
+    
+    qry.addChild(buildNotInNullCheckFrom(subQueryAST, subQueryAlias));
+    ASTNode insertAST = buildNotInNullCheckInsert();
+    qry.addChild(insertAST);
+    insertAST.addChild(buildNotInNullCheckSelect(cntAlias));
+    insertAST.addChild(buildNotInNullCheckWhere(subQueryAST, 
+        subQueryAlias, corrExprs, sqRR));
+    
+    return qry;
+  }
+  
+  /*
+   * build:
+   *    ^(TOK FROM
+   *        ^(TOK_SUBQUERY
+   *            {the input SubQuery, with correlation removed}
+   *            subQueryAlias 
+   *          ) 
+   *     )
+
+   */
+  static ASTNode buildNotInNullCheckFrom(ASTNode subQueryAST, String subQueryAlias) {
+    ASTNode from = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FROM, "TOK_FROM");
+    ASTNode sqExpr = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_SUBQUERY, "TOK_SUBQUERY");
+    sqExpr.addChild(subQueryAST);
+    sqExpr.addChild(createAliasAST(subQueryAlias));
+    from.addChild(sqExpr);
+    return from;
+  }
+  
+  /*
+   * build
+   *     ^(TOK_INSERT
+   *         ^(TOK_DESTINATION...)
+   *      )
+   */
+  static ASTNode buildNotInNullCheckInsert() {
+    ASTNode insert = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_INSERT, "TOK_INSERT");
+    ASTNode dest = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_DESTINATION, "TOK_DESTINATION");
+    ASTNode dir = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_DIR, "TOK_DIR");
+    ASTNode tfile = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_TMP_FILE, "TOK_TMP_FILE");
+    insert.addChild(dest);
+    dest.addChild(dir);
+    dir.addChild(tfile);
+    
+    return insert;
+  }
+  
+  /*
+   * build:
+   *         ^(TOK_SELECT
+   *             ^(TOK_SELECTEXPR {ast tree for count *}
+   *          )
+   */
+  static ASTNode buildNotInNullCheckSelect(String cntAlias) {
+    ASTNode select = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_SELECT, "TOK_SELECT");
+    ASTNode selectExpr = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_SELEXPR, "TOK_SELEXPR");
+    ASTNode countStar = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR, "TOK_FUNCTIONSTAR");
+    ASTNode alias = (createAliasAST(cntAlias));
+    
+    countStar.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, "count"));
+    select.addChild(selectExpr);
+    selectExpr.addChild(countStar);
+    selectExpr.addChild(alias);
+    
+    return select;
+  }
+  
+  /*
+   * build:
+   *          ^(TOK_WHERE
+   *             {is null check for joining column} 
+   *           )
+   */
+  static ASTNode buildNotInNullCheckWhere(ASTNode subQueryAST, 
+      String sqAlias, 
+      List<ASTNode> corrExprs,
+      RowResolver sqRR) {
+    
+    ASTNode sqSelect = (ASTNode) subQueryAST.getChild(1).getChild(1);
+    ASTNode selExpr = (ASTNode) sqSelect.getChild(0);
+    String colAlias = null;
+    
+    if ( selExpr.getChildCount() == 2 ) {
+      colAlias = selExpr.getChild(1).getText();
+    } else if (selExpr.getChild(0).getType() != HiveParser.TOK_ALLCOLREF) {
+      colAlias = sqAlias + "_ninc_col0";
+      selExpr.addChild((ASTNode)ParseDriver.adaptor.create(HiveParser.Identifier, colAlias));
+    } else {
+      List<ColumnInfo> signature = sqRR.getRowSchema().getSignature();
+      ColumnInfo joinColumn = signature.get(0);
+      String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName());
+      colAlias = joinColName[1];
+    }
+    
+    ASTNode searchCond = isNull(createColRefAST(sqAlias, colAlias));
+    
+    for(ASTNode e : corrExprs ) {
+      ASTNode p = (ASTNode) ParseDriver.adaptor.dupTree(e);
+      p = isNull(p);      
+      searchCond = orAST(searchCond, p);      
+    }
+    
+    ASTNode where = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_WHERE, "TOK_WHERE");
+    where.addChild(searchCond);
+    return where;
+  }
+  
+  static ASTNode buildNotInNullJoinCond(String subqueryAlias, String cntAlias) {
+    
+    ASTNode eq = (ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.EQUAL, "=");
+    
+    eq.addChild(createColRefAST(subqueryAlias, cntAlias));
+    eq.addChild((ASTNode) 
+        ParseDriver.adaptor.create(HiveParser.Number, "0"));
+    
+    return eq;
+  }
+  
+  public static interface ISubQueryJoinInfo {
+    public String getAlias();
+    public JoinType getJoinType();
+    public ASTNode getJoinConditionAST();
+    public QBSubQuery getSubQuery();
+    public ASTNode getSubQueryAST();
+    public String getOuterQueryId();
+  };
+
 }
 
 

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java Fri Dec 13 10:56:38 2013
@@ -36,6 +36,8 @@ public class DDLWork implements Serializ
   private CreateDatabaseDesc createDatabaseDesc;
   private SwitchDatabaseDesc switchDatabaseDesc;
   private DropDatabaseDesc dropDatabaseDesc;
+  private LockDatabaseDesc lockDatabaseDesc;
+  private UnlockDatabaseDesc unlockDatabaseDesc;
   private CreateTableDesc createTblDesc;
   private CreateTableLikeDesc createTblLikeDesc;
   private CreateViewDesc createVwDesc;
@@ -284,6 +286,24 @@ public class DDLWork implements Serializ
   }
 
   /**
+   * @param lockDatabaseDesc
+   */
+  public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
+      LockDatabaseDesc lockDatabaseDesc) {
+    this(inputs, outputs);
+    this.lockDatabaseDesc = lockDatabaseDesc;
+  }
+
+  /**
+   * @param unlockDatabaseDesc
+   */
+  public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
+      UnlockDatabaseDesc unlockDatabaseDesc) {
+    this(inputs, outputs);
+    this.unlockDatabaseDesc = unlockDatabaseDesc;
+  }
+
+  /**
    * @param showFuncsDesc
    */
   public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
@@ -501,6 +521,22 @@ public class DDLWork implements Serializ
     this.switchDatabaseDesc = switchDatabaseDesc;
   }
 
+  public LockDatabaseDesc getLockDatabaseDesc() {
+    return lockDatabaseDesc;
+  }
+
+  public void setLockDatabaseDesc(LockDatabaseDesc lockDatabaseDesc) {
+    this.lockDatabaseDesc = lockDatabaseDesc;
+  }
+
+  public UnlockDatabaseDesc getUnlockDatabaseDesc() {
+    return unlockDatabaseDesc;
+  }
+
+  public void setUnlockDatabaseDesc(UnlockDatabaseDesc unlockDatabaseDesc) {
+    this.unlockDatabaseDesc = unlockDatabaseDesc;
+  }
+
   /**
    * @return the createTblDesc
    */

Modified: hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
URL: http://svn.apache.org/viewvc/hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java?rev=1550684&r1=1550683&r2=1550684&view=diff
==============================================================================
--- hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java (original)
+++ hive/branches/tez/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java Fri Dec 13 10:56:38 2013
@@ -25,9 +25,11 @@ public enum HiveOperation {
   LOAD("LOAD", null, new Privilege[]{Privilege.ALTER_DATA}),
   EXPORT("EXPORT", new Privilege[]{Privilege.SELECT}, null),
   IMPORT("IMPORT", null, new Privilege[]{Privilege.ALTER_METADATA, Privilege.ALTER_DATA}),
-  CREATEDATABASE("CREATEDATABASE", null, null),
-  DROPDATABASE("DROPDATABASE", null, null),
-  SWITCHDATABASE("SWITCHDATABASE", null, null),
+  CREATEDATABASE("CREATEDATABASE", null, new Privilege[]{Privilege.CREATE}),
+  DROPDATABASE("DROPDATABASE", null, new Privilege[]{Privilege.DROP}),
+  SWITCHDATABASE("SWITCHDATABASE", new Privilege[]{Privilege.SELECT}, null),
+  LOCKDB("LOCKDATABASE",  new Privilege[]{Privilege.LOCK}, null),
+  UNLOCKDB("UNLOCKDATABASE",  new Privilege[]{Privilege.LOCK}, null),
   DROPTABLE ("DROPTABLE", null, new Privilege[]{Privilege.DROP}),
   DESCTABLE("DESCTABLE", null, null),
   DESCFUNCTION("DESCFUNCTION", null, null),



Mime
View raw message