hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1601032 - in /hive/trunk: metastore/src/model/org/apache/hadoop/hive/metastore/model/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Fri, 06 Jun 2014 22:59:00 GMT
Author: hashutosh
Date: Fri Jun  6 22:58:59 2014
New Revision: 1601032

URL: http://svn.apache.org/r1601032
Log:
HIVE-7168 : Don't require to name all columns in analyze statements if stats collection is
for all columns (Ashutosh Chauhan via Prasanth J)

Modified:
    hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
    hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q
    hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
    hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
    hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
    hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out

Modified: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
(original)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java
Fri Jun  6 22:58:59 2014
@@ -204,7 +204,7 @@ public class MPartitionColumnStatistics 
     return longLowValue;
   }
 
-  public void setLongLowValue(long longLowValue) {
+  public void setLongLowValue(Long longLowValue) {
     this.longLowValue = longLowValue;
   }
 
@@ -212,7 +212,7 @@ public class MPartitionColumnStatistics 
     return longHighValue;
   }
 
-  public void setLongHighValue(long longHighValue) {
+  public void setLongHighValue(Long longHighValue) {
     this.longHighValue = longHighValue;
   }
 
@@ -220,7 +220,7 @@ public class MPartitionColumnStatistics 
     return doubleLowValue;
   }
 
-  public void setDoubleLowValue(double doubleLowValue) {
+  public void setDoubleLowValue(Double doubleLowValue) {
     this.doubleLowValue = doubleLowValue;
   }
 
@@ -228,7 +228,7 @@ public class MPartitionColumnStatistics 
     return doubleHighValue;
   }
 
-  public void setDoubleHighValue(double doubleHighValue) {
+  public void setDoubleHighValue(Double doubleHighValue) {
     this.doubleHighValue = doubleHighValue;
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
Fri Jun  6 22:58:59 2014
@@ -54,16 +54,15 @@ public class ColumnStatsSemanticAnalyzer
   private boolean isRewritten;
 
   private boolean isTableLevel;
-  private String tableName;
   private List<String> colNames;
   private List<String> colType;
   private String partName;
+  private Table tbl;
 
   private class PartitionList {
     private final String[] partKeys;
-    private String[] partKeyTypes;
     private final String[] partValues;
-    private int numPartitions;
+    private final int numPartitions;
     private int numPartitionValues;
 
     PartitionList(int numPartitions) {
@@ -76,10 +75,6 @@ public class ColumnStatsSemanticAnalyzer
       return numPartitions;
     }
 
-    public void setNumPartitions(int numPartitions) {
-      this.numPartitions = numPartitions;
-    }
-
     public String[] getPartValues() {
       return partValues;
     }
@@ -103,18 +98,6 @@ public class ColumnStatsSemanticAnalyzer
     public void setNumPartValues(int numPartValues) {
       numPartitionValues = numPartValues;
     }
-
-    public String[] getPartKeyTypes() {
-      return partKeyTypes;
-    }
-
-    public void setPartKeyTypes(String[] partKeyTypes) {
-      this.partKeyTypes = partKeyTypes;
-    }
-
-    public void setPartKeyType(String partKeyType, int index) {
-      partKeyTypes[index] = partKeyType;
-    }
   }
 
   public ColumnStatsSemanticAnalyzer(HiveConf conf) throws SemanticException {
@@ -130,7 +113,7 @@ public class ColumnStatsSemanticAnalyzer
         child0 = (ASTNode) child0.getChild(0);
         if (child0.getToken().getType() == HiveParser.TOK_TABNAME) {
           child1 = (ASTNode) tree.getChild(1);
-          if (child1.getToken().getType() == HiveParser.TOK_TABCOLNAME) {
+          if (child1.getToken().getType() == HiveParser.KW_COLUMNS) {
             rwt = true;
           }
         }
@@ -151,8 +134,13 @@ public class ColumnStatsSemanticAnalyzer
     return isPartitioned;
   }
 
-  private String getTableName(ASTNode tree) {
-    return getUnescapedName((ASTNode) tree.getChild(0).getChild(0));
+  private Table getTable(ASTNode tree) throws SemanticException {
+    String tableName = getUnescapedName((ASTNode) tree.getChild(0).getChild(0));
+    try {
+      return db.getTable(tableName);
+    } catch (HiveException e) {
+      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
+    }
   }
 
   private PartitionList getPartKeyValuePairsFromAST(ASTNode tree) {
@@ -180,27 +168,26 @@ public class ColumnStatsSemanticAnalyzer
     return partList;
   }
 
-  private List<String> getColumnName(ASTNode tree) {
-    int numCols = tree.getChild(1).getChildCount();
-    List<String> colName = new LinkedList<String>();
-    for (int i = 0; i < numCols; i++) {
-      colName.add(i, new String(getUnescapedName((ASTNode) tree.getChild(1).getChild(i))));
-    }
-    return colName;
-  }
+  private List<String> getColumnName(ASTNode tree) throws SemanticException{
 
-  private int getNumColumns(ASTNode tree) {
-    return tree.getChild(1).getChildCount();
+    switch (tree.getChildCount()) {
+      case 2:
+       return Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
+      case 3:
+        int numCols = tree.getChild(2).getChildCount();
+        List<String> colName = new LinkedList<String>();
+        for (int i = 0; i < numCols; i++) {
+          colName.add(i, new String(getUnescapedName((ASTNode) tree.getChild(2).getChild(i))));
+        }
+        return colName;
+      default:
+        throw new SemanticException("Internal error. Expected number of children of ASTNode
to be"
+            + " either 2 or 3. Found : " + tree.getChildCount());
+    }
   }
 
-  private void validatePartitionKeys(String tableName, PartitionList partList) throws
+  private void validatePartitionKeys(PartitionList partList) throws
     SemanticException {
-    Table tbl;
-    try {
-      tbl = db.getTable(tableName);
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
 
     List<FieldSchema> partKeys = tbl.getPartitionKeys();
     String[] inputPartKeys = partList.getPartKeys();
@@ -221,15 +208,8 @@ public class ColumnStatsSemanticAnalyzer
     }
   }
 
-  private String[] getPartitionKeysType(String tableName, PartitionList partList) throws
+  private String[] getPartitionKeysType(PartitionList partList) throws
     SemanticException {
-    Table tbl;
-    try {
-      tbl = db.getTable(tableName);
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
-
     List<FieldSchema> partKeys = tbl.getPartitionKeys();
     String[] inputPartKeys = partList.getPartKeys();
     String[] inputPartKeyTypes = new String[inputPartKeys.length];
@@ -245,20 +225,13 @@ public class ColumnStatsSemanticAnalyzer
     return inputPartKeyTypes;
   }
 
-  private String constructPartitionName(String tableName, PartitionList partList)
+  private String constructPartitionName(PartitionList partList)
     throws SemanticException {
-    Table tbl;
     Partition part;
     String[] partKeys = partList.getPartKeys();
     String[] partValues = partList.getPartValues();
-
-    try {
-      tbl = db.getTable(tableName);
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
-
     Map<String, String> partSpec = new LinkedHashMap<String, String>();
+
     for (int i=0; i<partKeys.length; i++) {
       partSpec.put(partKeys[i].toLowerCase(), partValues[i]);
     }
@@ -275,7 +248,7 @@ public class ColumnStatsSemanticAnalyzer
     return part.getName();
   }
 
-  private void validatePartitionClause(String tableName, PartitionList partList) throws
+  private void validatePartitionClause(PartitionList partList) throws
     SemanticException {
     int numPartKeys = partList.getNumPartitions();
     int numPartValues = partList.getNumPartValues();
@@ -284,7 +257,7 @@ public class ColumnStatsSemanticAnalyzer
       throw new SemanticException(ErrorMsg.COLUMNSTATSCOLLECTOR_INVALID_SYNTAX.getMsg());
     }
     // Validate the user specified partition keys match the partition keys in the table
-    validatePartitionKeys(tableName, partList);
+    validatePartitionKeys(partList);
   }
 
   private StringBuilder genPartitionClause(PartitionList partList) throws SemanticException
{
@@ -295,7 +268,7 @@ public class ColumnStatsSemanticAnalyzer
     StringBuilder retClause = null;
     String[] partKeys = partList.getPartKeys();
     String[] partValues = partList.getPartValues();
-    String[] partKeysType = getPartitionKeysType(tableName, partList);
+    String[] partKeysType = getPartitionKeysType(partList);
 
     for (int i = 0; i < partList.getNumPartitions(); i++) {
       if (partValues[i] != null) {
@@ -396,41 +369,24 @@ public class ColumnStatsSemanticAnalyzer
     return numBitVectors;
   }
 
-  private List<String> getTableColumnType(String tableName, List<String> colNames,
int numCols)
+  private List<String> getTableColumnType(List<String> colNames)
       throws SemanticException{
     List<String> colTypes = new LinkedList<String>();
-    String colName;
-    Table tbl;
-    try {
-      tbl = db.getTable(tableName);
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
-
     List<FieldSchema> cols = tbl.getCols();
 
-    for (int i=0; i <numCols; i++) {
-      colName = colNames.get(i);
+    for (String colName : colNames) {
       for (FieldSchema col: cols) {
         if (colName.equalsIgnoreCase(col.getName())) {
-          colTypes.add(i, new String(col.getType()));
+          colTypes.add(new String(col.getType()));
         }
       }
     }
     return colTypes;
   }
 
-  private List<String> getPartitionColumnType(String tableName, String partName,
-    List<String> colNames, int numCols) throws SemanticException {
+  private List<String> getPartitionColumnType(String partName,
+    List<String> colNames) throws SemanticException {
     List<String> colTypes = new LinkedList<String>();
-    String colName;
-    Table tbl;
-    try {
-      tbl = db.getTable(tableName);
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
-
     List<String> partNames = new ArrayList<String>();
     partNames.add(partName);
     List<Partition> partitionList;
@@ -443,11 +399,10 @@ public class ColumnStatsSemanticAnalyzer
     Partition part = partitionList.get(0);
     List<FieldSchema> cols = part.getCols();
 
-    for (int i=0; i <numCols; i++) {
-      colName = colNames.get(i);
+    for (String colName : colNames) {
       for (FieldSchema col: cols) {
         if (colName.equalsIgnoreCase(col.getName())) {
-          colTypes.add(i, new String(col.getType()));
+          colTypes.add(new String(col.getType()));
         }
       }
     }
@@ -469,7 +424,7 @@ public class ColumnStatsSemanticAnalyzer
       rewrittenQueryBuilder.append(" )");
     }
     rewrittenQueryBuilder.append(" from ");
-    rewrittenQueryBuilder.append(tableName);
+    rewrittenQueryBuilder.append(tbl.getTableName());
     isRewritten = true;
 
     // If partition level statistics is requested, add predicate and group by as needed to
rewritten
@@ -514,25 +469,24 @@ public class ColumnStatsSemanticAnalyzer
      * an aggregation.
      */
     if (shouldRewrite(tree)) {
-      tableName = new String(getTableName(tree));
+      tbl = getTable(tree);
       colNames = getColumnName(tree);
-      int numCols = getNumColumns(tree);
       // Save away the original AST
       originalTree = tree;
       boolean isPartitionStats = isPartitionLevelStats(tree);
       PartitionList partList = null;
-      checkForPartitionColumns(colNames, getPartitionKeys(tableName));
-      validateSpecifiedColumnNames(tableName, colNames);
+      checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
+      validateSpecifiedColumnNames(colNames);
 
       if (isPartitionStats) {
         isTableLevel = false;
         partList = getPartKeyValuePairsFromAST(tree);
-        validatePartitionClause(tableName, partList);
-        partName = constructPartitionName(tableName, partList);
-        colType = getPartitionColumnType(tableName, partName, colNames, numCols);
+        validatePartitionClause(partList);
+        partName = constructPartitionName(partList);
+        colType = getPartitionColumnType(partName, colNames);
       } else {
         isTableLevel = true;
-        colType = getTableColumnType(tableName, colNames, numCols);
+        colType = getTableColumnType(colNames);
       }
 
       int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
@@ -547,16 +501,9 @@ public class ColumnStatsSemanticAnalyzer
   }
 
   // fail early if the columns specified for column statistics are not valid
-  private void validateSpecifiedColumnNames(String tableName, List<String> specifiedCols)
+  private void validateSpecifiedColumnNames(List<String> specifiedCols)
       throws SemanticException {
-    List<FieldSchema> fields = null;
-    try {
-      fields = db.getTable(tableName).getAllCols();
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
-    List<String> tableCols = Utilities.getColumnNamesFromFieldSchema(fields);
-
+    List<String> tableCols = Utilities.getColumnNamesFromFieldSchema(tbl.getCols());
     for(String sc : specifiedCols) {
       if (!tableCols.contains(sc.toLowerCase())) {
         String msg = "'" + sc + "' (possible columns are " + tableCols.toString() + ")";
@@ -565,17 +512,6 @@ public class ColumnStatsSemanticAnalyzer
     }
   }
 
-  private List<String> getPartitionKeys(String tableName) throws SemanticException
{
-    List<FieldSchema> fields;
-    try {
-      fields = db.getTable(tableName).getPartitionKeys();
-    } catch (HiveException e) {
-      throw new SemanticException(ErrorMsg.INVALID_TABLE.getMsg(tableName));
-    }
-
-    return Utilities.getColumnNamesFromFieldSchema(fields);
-  }
-
   private void checkForPartitionColumns(List<String> specifiedCols, List<String>
partCols)
       throws SemanticException {
     // Raise error if user has specified partition column for stats
@@ -602,7 +538,7 @@ public class ColumnStatsSemanticAnalyzer
       qb = getQB();
       qb.setAnalyzeRewrite(true);
       qbp = qb.getParseInfo();
-      qbp.setTableName(tableName);
+      qbp.setTableName(tbl.getTableName());
       qbp.setTblLvl(isTableLevel);
 
       if (!isTableLevel) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g Fri Jun  6 22:58:59
2014
@@ -1312,7 +1312,9 @@ descStatement
 analyzeStatement
 @init { pushMsg("analyze statement", state); }
 @after { popMsg(state); }
-    : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS ((noscan=KW_NOSCAN)
| (partialscan=KW_PARTIALSCAN) | (KW_FOR KW_COLUMNS statsColumnName=columnNameList))? ->
^(TOK_ANALYZE $parttype $noscan? $partialscan? $statsColumnName?)
+    : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS ((noscan=KW_NOSCAN)
| (partialscan=KW_PARTIALSCAN) 
+                                                      | (KW_FOR KW_COLUMNS (statsColumnName=columnNameList)?))?
+      -> ^(TOK_ANALYZE $parttype $noscan? $partialscan? KW_COLUMNS? $statsColumnName?)
     ;
 
 showStatement

Modified: hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/columnstats_partlvl.q Fri Jun  6 22:58:59
2014
@@ -18,3 +18,8 @@ analyze table Employee_Part partition (e
 explain extended
 analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns
employeeID;
 analyze table Employee_Part partition (employeeSalary=4000.0) compute statistics for columns
employeeID;
+
+explain 
+analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns;
+analyze table Employee_Part partition (employeeSalary=2000.0) compute statistics for columns;
+

Modified: hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/columnstats_tbllvl.q Fri Jun  6 22:58:59
2014
@@ -23,6 +23,15 @@ analyze table UserVisits_web_text_none c
 
 analyze table UserVisits_web_text_none compute statistics for columns sourceIP, avgTimeOnSite,
adRevenue;
 
+explain 
+analyze table UserVisits_web_text_none compute statistics for columns;
+
+analyze table UserVisits_web_text_none compute statistics for columns;
+
+describe formatted UserVisits_web_text_none destURL;
+describe formatted UserVisits_web_text_none adRevenue;
+describe formatted UserVisits_web_text_none avgTimeOnSite;
+ 
 CREATE TABLE empty_tab(
    a int,
    b double,

Modified: hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out (original) and
hive/trunk/ql/src/test/results/clientpositive/columnstats_partlvl.q.out Fri Jun  6 22:58:59
2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out (original) and
hive/trunk/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out Fri Jun  6 22:58:59
2014 differ

Modified: hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out?rev=1601032&r1=1601031&r2=1601032&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out (original)
and hive/trunk/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out Fri Jun  6
22:58:59 2014 differ



Mime
View raw message