hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1615865 [10/10] - in /hive/trunk: metastore/if/ metastore/src/gen/thrift/gen-cpp/ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ metastore/src/gen/thrift/gen-php/metastore/ metastore/src/gen/thrift/gen-py/hive_...
Date Tue, 05 Aug 2014 05:52:53 GMT
Modified: hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original)
+++ hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Tue Aug  5 05:52:51
2014
@@ -1008,6 +1008,26 @@ class ColumnStatistics
   ::Thrift::Struct.generate_accessors self
 end
 
+class AggrStats
+  include ::Thrift::Struct, ::Thrift::Struct_Union
+  COLSTATS = 1
+  PARTSFOUND = 2
+
+  FIELDS = {
+    COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element =>
{:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatisticsObj}},
+    PARTSFOUND => {:type => ::Thrift::Types::I64, :name => 'partsFound'}
+  }
+
+  def struct_fields; FIELDS; end
+
+  def validate
+    raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required
field colStats is unset!') unless @colStats
+    raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required
field partsFound is unset!') unless @partsFound
+  end
+
+  ::Thrift::Struct.generate_accessors self
+end
+
 class Schema
   include ::Thrift::Struct, ::Thrift::Struct_Union
   FIELDSCHEMAS = 1

Modified: hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb (original)
+++ hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb Tue Aug  5 05:52:51
2014
@@ -1231,6 +1231,23 @@ module ThriftHiveMetastore
       raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT,
'get_partitions_statistics_req failed: unknown result')
     end
 
+    def get_aggr_stats_for(request)
+      send_get_aggr_stats_for(request)
+      return recv_get_aggr_stats_for()
+    end
+
+    def send_get_aggr_stats_for(request)
+      send_message('get_aggr_stats_for', Get_aggr_stats_for_args, :request => request)
+    end
+
+    def recv_get_aggr_stats_for()
+      result = receive_message(Get_aggr_stats_for_result)
+      return result.success unless result.success.nil?
+      raise result.o1 unless result.o1.nil?
+      raise result.o2 unless result.o2.nil?
+      raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT,
'get_aggr_stats_for failed: unknown result')
+    end
+
     def delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
       send_delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
       return recv_delete_partition_column_statistics()
@@ -2824,6 +2841,19 @@ module ThriftHiveMetastore
       write_result(result, oprot, 'get_partitions_statistics_req', seqid)
     end
 
+    def process_get_aggr_stats_for(seqid, iprot, oprot)
+      args = read_args(iprot, Get_aggr_stats_for_args)
+      result = Get_aggr_stats_for_result.new()
+      begin
+        result.success = @handler.get_aggr_stats_for(args.request)
+      rescue ::NoSuchObjectException => o1
+        result.o1 = o1
+      rescue ::MetaException => o2
+        result.o2 = o2
+      end
+      write_result(result, oprot, 'get_aggr_stats_for', seqid)
+    end
+
     def process_delete_partition_column_statistics(seqid, iprot, oprot)
       args = read_args(iprot, Delete_partition_column_statistics_args)
       result = Delete_partition_column_statistics_result.new()
@@ -6077,6 +6107,42 @@ module ThriftHiveMetastore
     ::Thrift::Struct.generate_accessors self
   end
 
+  class Get_aggr_stats_for_args
+    include ::Thrift::Struct, ::Thrift::Struct_Union
+    REQUEST = 1
+
+    FIELDS = {
+      REQUEST => {:type => ::Thrift::Types::STRUCT, :name => 'request', :class =>
::PartitionsStatsRequest}
+    }
+
+    def struct_fields; FIELDS; end
+
+    def validate
+    end
+
+    ::Thrift::Struct.generate_accessors self
+  end
+
+  class Get_aggr_stats_for_result
+    include ::Thrift::Struct, ::Thrift::Struct_Union
+    SUCCESS = 0
+    O1 = 1
+    O2 = 2
+
+    FIELDS = {
+      SUCCESS => {:type => ::Thrift::Types::STRUCT, :name => 'success', :class =>
::AggrStats},
+      O1 => {:type => ::Thrift::Types::STRUCT, :name => 'o1', :class => ::NoSuchObjectException},
+      O2 => {:type => ::Thrift::Types::STRUCT, :name => 'o2', :class => ::MetaException}
+    }
+
+    def struct_fields; FIELDS; end
+
+    def validate
+    end
+
+    ::Thrift::Struct.generate_accessors self
+  end
+
   class Delete_partition_column_statistics_args
     include ::Thrift::Struct, ::Thrift::Struct_Union
     DB_NAME = 1

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue
Aug  5 05:52:51 2014
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -4976,6 +4977,25 @@ public class HiveMetaStore extends Thrif
       return rolePrinGrantList;
     }
 
+    @Override
+    public AggrStats get_aggr_stats_for(PartitionsStatsRequest request)
+        throws NoSuchObjectException, MetaException, TException {
+      startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName());
+      AggrStats aggrStats = null;
+      try {
+        //TODO: We are setting partitionCnt for which we were able to retrieve stats same
as
+        // incoming number from request. This is not correct, but currently no users of this
api
+        // rely on this. Only, current user StatsAnnotation don't care for it. StatsOptimizer
+        // will care for it, so before StatsOptimizer begin using it, we need to fix this.
+        aggrStats = new AggrStats(getMS().get_aggr_stats_for(request.getDbName(),
+          request.getTblName(), request.getPartNames(), request.getColNames()), request.getPartNames().size());
+        return aggrStats;
+      } finally {
+          endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName());
+      }
+
+    }
+
   }
 
   public static IHMSHandler newHMSHandler(String name, HiveConf hiveConf) throws MetaException
{

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
(original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
Tue Aug  5 05:52:51 2014
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -1820,4 +1821,11 @@ public class HiveMetaStoreClient impleme
       NoSuchObjectException, UnsupportedOperationException {
     client.drop_table_with_environment_context(dbname, name, deleteData, envContext);
   }
+
+  @Override
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+    List<String> colNames, List<String> partNames) throws NoSuchObjectException,
MetaException, TException {
+    PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames);
+    return client.get_aggr_stats_for(req);
+  }
 }

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java Tue
Aug  5 05:52:51 2014
@@ -38,6 +38,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -1290,4 +1291,7 @@ public interface IMetaStoreClient {
    */
   GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(
       GetRoleGrantsForPrincipalRequest getRolePrincReq) throws MetaException, TException;
+
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+      List<String> colNames, List<String> partName)  throws NoSuchObjectException,
MetaException, TException;
 }

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
(original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
Tue Aug  5 05:52:51 2014
@@ -427,6 +427,7 @@ class MetaStoreDirectSql {
         + " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"PART_ID\" asc";
     loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+      @Override
       public void apply(Partition t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -435,6 +436,7 @@ class MetaStoreDirectSql {
         + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+      @Override
       public void apply(Partition t, Object[] fields) {
         t.addToValues((String)fields[1]);
       }});
@@ -452,6 +454,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"SD_ID\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -460,6 +463,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         if (fields[2] == null) return;
         t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2])));
@@ -469,6 +473,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         t.addToBucketCols((String)fields[1]);
       }});
@@ -479,6 +484,7 @@ class MetaStoreDirectSql {
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     boolean hasSkewedColumns =
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) {
           if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
           t.getSkewedInfo().addToSkewedColNames((String)fields[1]);
@@ -502,6 +508,7 @@ class MetaStoreDirectSql {
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
         private Long currentListId;
         private List<String> currentList;
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
           if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
           // Note that this is not a typical list accumulator - there's no call to finalize
@@ -539,6 +546,7 @@ class MetaStoreDirectSql {
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
         private Long currentListId;
         private List<String> currentList;
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
           if (!t.isSetSkewedInfo()) {
             SkewedInfo skewedInfo = new SkewedInfo();
@@ -572,6 +580,7 @@ class MetaStoreDirectSql {
           + " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\"
>= 0"
           + " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
       loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>()
{
+        @Override
         public void apply(List<FieldSchema> t, Object[] fields) {
           t.add(new FieldSchema((String)fields[2], (String)fields[3], (String)fields[1]));
         }});
@@ -582,6 +591,7 @@ class MetaStoreDirectSql {
         + " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"SERDE_ID\" asc";
     loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {
+      @Override
       public void apply(SerDeInfo t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -891,19 +901,49 @@ class MetaStoreDirectSql {
     return result;
   }
 
-  public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+  public List<ColumnStatisticsObj> aggrColStatsForPartitions(String dbName, String
tableName,
       List<String> partNames, List<String> colNames) throws MetaException {
-    if (colNames.isEmpty() || partNames.isEmpty()) {
-      return Lists.newArrayList();
-    }
-    boolean doTrace = LOG.isDebugEnabled();
-    long start = doTrace ? System.nanoTime() : 0;
-    String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+    String qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
+      + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"),
"
+      + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"),
max(\"NUM_DISTINCTS\"), "
+      + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\")
from \"PART_COL_STATS\""
       + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
       + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
-      + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+      + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
+
+    boolean doTrace = LOG.isDebugEnabled();
+    long start = doTrace ? System.nanoTime() : 0;
+    Query query = pm.newQuery("javax.jdo.query.SQL", qText);
+    Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
+    if (qResult == null) {
+      query.closeAll();
+      return Lists.newArrayList();
+    }
+    List<Object[]> list = ensureList(qResult);
+    List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
+    for (Object[] row : list) {
+      colStats.add(prepareCSObj(row,0));
+    }
+    long end = doTrace ? System.nanoTime() : 0;
+    timingTrace(doTrace, qText, start, end);
+    query.closeAll();
+    return colStats;
+  }
+
+  private ColumnStatisticsObj prepareCSObj (Object[] row, int i) {
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++],
data);
+    Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
+        declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+        avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
+    StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
+        llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
+    return cso;
+  }
+
+  private Object[] prepareParams(String dbName, String tableName, List<String> partNames,
+    List<String> colNames) throws MetaException {
 
-    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
     Object[] params = new Object[colNames.size() + partNames.size() + 2];
     int paramI = 0;
     params[paramI++] = dbName;
@@ -914,7 +954,24 @@ class MetaStoreDirectSql {
     for (String partName : partNames) {
       params[paramI++] = partName;
     }
-    Object qResult = query.executeWithArray(params);
+
+    return params;
+  }
+
+  public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+      List<String> partNames, List<String> colNames) throws MetaException {
+    if (colNames.isEmpty() || partNames.isEmpty()) {
+      return Lists.newArrayList();
+    }
+    boolean doTrace = LOG.isDebugEnabled();
+    long start = doTrace ? System.nanoTime() : 0;
+    String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+      + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+      + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
+      + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+
+    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
+    Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
     long queryTime = doTrace ? System.nanoTime() : 0;
     if (qResult == null) {
       query.closeAll();
@@ -963,16 +1020,7 @@ class MetaStoreDirectSql {
       if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() >
extractSqlLong(laObj))) {
         csd.setLastAnalyzed(extractSqlLong(laObj));
       }
-      ColumnStatisticsData data = new ColumnStatisticsData();
-      // see STATS_COLLIST
-      int i = offset;
-      ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++],
data);
-      Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
-          declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
-          avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
-      StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
-          llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues,
falses);
-      csos.add(cso);
+      csos.add(prepareCSObj(row, offset));
     }
     result.setStatsObj(csos);
     return result;

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Aug
 5 05:52:51 2014
@@ -5901,6 +5901,29 @@ public class ObjectStore implements RawS
     }.run(true);
   }
 
+
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+      final List<String> partNames, final List<String> colNames) throws MetaException,
NoSuchObjectException {
+
+    return new GetListHelper<ColumnStatisticsObj>(dbName, tblName, true, false) {
+      @Override
+      protected List<ColumnStatisticsObj> getSqlResult(
+          GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException {
+        return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, colNames);
+      }
+
+      @Override
+      protected List<ColumnStatisticsObj> getJdoResult(
+          GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException,
+          NoSuchObjectException {
+        // This is fast path for query optimizations, if we can find this info quickly using
+        // directSql, do it. No point in failing back to slow path here.
+        throw new MetaException("Jdo path is not implemented for stats aggr.");
+      }
+      }.run(true);
+  }
+
   private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(
       Table table, List<String> partNames, List<String> colNames)
           throws NoSuchObjectException, MetaException {
@@ -6747,5 +6770,4 @@ public class ObjectStore implements RawS
     }
     return funcs;
   }
-
 }

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Tue Aug 
5 05:52:51 2014
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -547,4 +548,6 @@ public interface RawStore extends Config
    */
   public List<String> getFunctions(String dbName, String pattern) throws MetaException;
 
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+    List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
 }

Modified: hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
(original)
+++ hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
Tue Aug  5 05:52:51 2014
@@ -25,6 +25,7 @@ import java.util.Map;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -709,5 +711,12 @@ public class DummyRawStoreControlledComm
     return objectStore.getFunctions(dbName, pattern);
   }
 
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+      String tblName, List<String> partNames, List<String> colNames)
+      throws MetaException {
+    return null;
+  }
+
 
 }

Modified: hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
(original)
+++ hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
Tue Aug  5 05:52:51 2014
@@ -26,6 +26,7 @@ import junit.framework.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -726,7 +728,12 @@ public class DummyRawStoreForJdoConnecti
     return null;
   }
 
-
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+      String tblName, List<String> partNames, List<String> colNames)
+      throws MetaException {
+    return null;
+  }
 }
 
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java Tue Aug  5 05:52:51
2014
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.RetryingMetaStoreClient;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -2576,6 +2577,16 @@ private void constructOneLBLocationMap(F
     }
   }
 
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+    List<String> colNames, List<String> partName) {
+    try {
+      return getMSC().getAggrColStatsFor(dbName, tblName, colNames, partName);
+    } catch (Exception e) {
+      LOG.debug(StringUtils.stringifyException(e));
+      return null;
+    }
+  }
+
   public boolean deleteTableColumnStatistics(String dbName, String tableName, String colName)
     throws HiveException {
     try {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
Tue Aug  5 05:52:51 2014
@@ -100,9 +100,9 @@ public class StatsRulesProcFactory {
       }
       Table table = aspCtx.getParseContext().getTopToTable().get(tsop);
 
-      // gather statistics for the first time and the attach it to table scan operator
-      Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table,
tsop);
       try {
+        // gather statistics for the first time and the attach it to table scan operator
+        Statistics stats = StatsUtils.collectStatistics(aspCtx.getConf(), partList, table,
tsop);
         tsop.setStatistics(stats.clone());
 
         if (LOG.isDebugEnabled()) {
@@ -110,6 +110,9 @@ public class StatsRulesProcFactory {
         }
       } catch (CloneNotSupportedException e) {
         throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+      } catch (HiveException e) {
+        LOG.debug(e);
+        throw new SemanticException(e);
       }
       return null;
     }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Tue Aug  5 05:52:51
2014
@@ -20,7 +20,6 @@ package org.apache.hadoop.hive.ql.stats;
 
 import java.util.ArrayList;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
@@ -30,7 +29,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.ql.exec.ColumnInfo;
@@ -101,7 +100,7 @@ public class StatsUtils {
    * @throws HiveException
    */
   public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList partList,
-      Table table, TableScanOperator tableScanOperator) {
+      Table table, TableScanOperator tableScanOperator) throws HiveException {
 
     Statistics stats = new Statistics();
 
@@ -206,17 +205,26 @@ public class StatsUtils {
         for (Partition part : partList.getNotDeniedPartns()) {
           partNames.add(part.getName());
         }
-        Map<String, List<ColStatistics>> partStats =
-            getPartColumnStats(table, schema, partNames, neededColumns);
-        if (partStats != null) {
-          for (String partName : partNames) {
-            List<ColStatistics> partStat = partStats.get(partName);
-            haveFullStats &= (partStat != null);
-            if (partStat != null) {
-              stats.updateColumnStatsState(deriveStatType(partStat, neededColumns));
-              stats.addToColumnStats(partStat);
-            }
+        Map<String, String> colToTabAlias = new HashMap<String, String>();
+        neededColumns = processNeededColumns(schema, neededColumns, colToTabAlias);
+        AggrStats aggrStats = Hive.get().getAggrColStatsFor(table.getDbName(), table.getTableName(),
neededColumns, partNames);
+        if (null == aggrStats) {
+          haveFullStats = false;
+        } else {
+          List<ColumnStatisticsObj> colStats = aggrStats.getColStats();
+          if (colStats.size() != neededColumns.size()) {
+            LOG.debug("Column stats requested for : " + neededColumns.size() + " columns.
Able to retrieve"
+                + " for " + colStats.size() + " columns");
+          }
+          List<ColStatistics> columnStats = convertColStats(colStats, table.getTableName(),
colToTabAlias);
+          stats.addToColumnStats(columnStats);
+          State colState = deriveStatType(columnStats, neededColumns);
+          if (aggrStats.getPartsFound() != partNames.size() && colState != State.NONE)
{
+            LOG.debug("Column stats requested for : " + partNames.size() +" partitions. "
+              + "Able to retrieve for " + aggrStats.getPartsFound() + " partitions");
+            stats.updateColumnStatsState(State.PARTIAL);
           }
+          stats.setColumnStatsState(colState);
         }
       }
       // There are some partitions with no state (or we didn't fetch any state).
@@ -460,12 +468,7 @@ public class StatsUtils {
     try {
       List<ColumnStatisticsObj> colStat = Hive.get().getTableColumnStatistics(
           dbName, tabName, neededColsInTable);
-      stats = new ArrayList<ColStatistics>(colStat.size());
-      for (ColumnStatisticsObj statObj : colStat) {
-        ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
-        cs.setTableAlias(colToTabAlias.get(cs.getColumnName()));
-        stats.add(cs);
-      }
+      stats = convertColStats(colStat, tabName, colToTabAlias);
     } catch (HiveException e) {
       LOG.error("Failed to retrieve table statistics: ", e);
       stats = null;
@@ -473,43 +476,16 @@ public class StatsUtils {
     return stats;
   }
 
-  /**
-   * Get table level column statistics from metastore for needed columns
-   * @param table
-   *          - table
-   * @param schema
-   *          - output schema
-   * @param neededColumns
-   *          - list of needed columns
-   * @return column statistics
-   */
-  public static Map<String, List<ColStatistics>> getPartColumnStats(Table table,
-      List<ColumnInfo> schema, List<String> partNames, List<String> neededColumns)
{
-    String dbName = table.getDbName();
-    String tabName = table.getTableName();
-    Map<String, String> colToTabAlias = new HashMap<String, String>(schema.size());
-    List<String> neededColsInTable = processNeededColumns(schema, neededColumns, colToTabAlias);
-    Map<String, List<ColStatistics>> stats = null;
-    try {
-      Map<String, List<ColumnStatisticsObj>> colStat = Hive.get().getPartitionColumnStatistics(
-          dbName, tabName, partNames, neededColsInTable);
-      stats = new HashMap<String, List<ColStatistics>>(colStat.size());
-      for (Map.Entry<String, List<ColumnStatisticsObj>> entry : colStat.entrySet())
{
-        List<ColStatistics> partStat = new ArrayList<ColStatistics>(entry.getValue().size());
-        for (ColumnStatisticsObj statObj : entry.getValue()) {
-          ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
-          cs.setTableAlias(colToTabAlias.get(cs.getColumnName()));
-          partStat.add(cs);
-        }
-        stats.put(entry.getKey(), partStat);
-      }
-    } catch (HiveException e) {
-      LOG.error("Failed to retrieve partitions statistics: ", e);
-      stats = null;
+  private static List<ColStatistics> convertColStats(List<ColumnStatisticsObj>
colStats, String tabName,
+    Map<String,String> colToTabAlias) {
+    List<ColStatistics> stats = new ArrayList<ColStatistics>(colStats.size());
+    for (ColumnStatisticsObj statObj : colStats) {
+      ColStatistics cs = getColStatistics(statObj, tabName, statObj.getColName());
+      cs.setTableAlias(colToTabAlias.get(cs.getColumnName()));
+      stats.add(cs);
     }
     return stats;
   }
-
   private static List<String> processNeededColumns(List<ColumnInfo> schema,
       List<String> neededColumns, Map<String, String> colToTabAlias) {
     for (ColumnInfo col : schema) {

Modified: hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out?rev=1615865&r1=1615864&r2=1615865&view=diff
==============================================================================
Files hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out (original) and
hive/trunk/ql/src/test/results/clientpositive/annotate_stats_part.q.out Tue Aug  5 05:52:51
2014 differ



Mime
View raw message