Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 85F96200B21 for ; Thu, 12 May 2016 00:43:42 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 849C6160A18; Wed, 11 May 2016 22:43:42 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 585A2160A17 for ; Thu, 12 May 2016 00:43:40 +0200 (CEST) Received: (qmail 66924 invoked by uid 500); 11 May 2016 22:43:39 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 66912 invoked by uid 99); 11 May 2016 22:43:39 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 11 May 2016 22:43:39 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 59711DFDC3; Wed, 11 May 2016 22:43:39 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: pxiong@apache.org To: commits@hive.apache.org Message-Id: <7a904c1230364ec3a1a8b36b4a85685c@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-13565: Auto-gather column stats thrift change (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Date: Wed, 11 May 2016 22:43:39 +0000 (UTC) archived-at: Wed, 11 May 2016 22:43:42 -0000 Repository: hive Updated Branches: refs/heads/master 6187e2a6b -> b9e4fe856 HIVE-13565: Auto-gather column stats thrift change (Pengcheng Xiong, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9e4fe85 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9e4fe85 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9e4fe85 Branch: refs/heads/master Commit: b9e4fe856fcf3bb4339c8efebab1138c9dc1e732 Parents: 6187e2a Author: Pengcheng Xiong Authored: Wed May 11 15:43:15 2016 -0700 Committer: Pengcheng Xiong Committed: Wed May 11 15:43:15 2016 -0700 ---------------------------------------------------------------------- .../metastore/TestHiveMetaStoreStatsMerge.java | 199 +++++++++++++++++++ metastore/if/hive_metastore.thrift | 3 +- .../gen/thrift/gen-cpp/hive_metastore_types.cpp | 25 +++ .../gen/thrift/gen-cpp/hive_metastore_types.h | 15 +- .../api/SetPartitionsStatsRequest.java | 109 +++++++++- .../src/gen/thrift/gen-php/metastore/Types.php | 23 +++ .../gen/thrift/gen-py/hive_metastore/ttypes.py | 15 +- .../gen/thrift/gen-rb/hive_metastore_types.rb | 4 +- .../hadoop/hive/metastore/HiveMetaStore.java | 62 +++++- .../hive/metastore/HiveMetaStoreClient.java | 4 + .../hadoop/hive/metastore/MetaStoreUtils.java | 37 ++++ .../stats/merge/BinaryColumnStatsMerger.java | 35 ++++ .../stats/merge/BooleanColumnStatsMerger.java | 35 ++++ .../hbase/stats/merge/ColumnStatsMerger.java | 34 ++++ .../stats/merge/ColumnStatsMergerFactory.java | 138 +++++++++++++ .../stats/merge/DecimalColumnStatsMerger.java | 55 +++++ .../stats/merge/DoubleColumnStatsMerger.java | 48 +++++ .../stats/merge/LongColumnStatsMerger.java | 48 +++++ .../stats/merge/StringColumnStatsMerger.java | 49 +++++ .../hadoop/hive/ql/exec/ColumnStatsTask.java | 26 +-- .../hive/ql/exec/ColumnStatsUpdateTask.java | 32 +-- .../apache/hadoop/hive/ql/metadata/Hive.java | 18 -- .../ql/metadata/SessionHiveMetaStoreClient.java | 18 +- .../hadoop/hive/ql/plan/ColumnStatsDesc.java | 22 +- 24 files changed, 981 insertions(+), 73 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java ---------------------------------------------------------------------- diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java new file mode 100644 index 0000000..d6df32b --- /dev/null +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.cli.CliSessionState; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.events.AddPartitionEvent; +import org.apache.hadoop.hive.metastore.events.AlterTableEvent; +import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent; +import org.apache.hadoop.hive.metastore.events.CreateTableEvent; +import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent; +import org.apache.hadoop.hive.metastore.events.DropPartitionEvent; +import org.apache.hadoop.hive.metastore.events.DropTableEvent; +import org.apache.hadoop.hive.metastore.events.ListenerEvent; +import org.apache.hadoop.hive.ql.io.HiveInputFormat; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.shims.ShimLoader; + +/** + * TestHiveMetaStoreStatsMerge. + * calls in {@link org.apache.hadoop.hive.metastore.HiveMetaStore} + */ +public class TestHiveMetaStoreStatsMerge extends TestCase { + + private HiveConf hiveConf; + private HiveMetaStoreClient msc; + private final Database db = new Database(); + private Table table = new Table(); + + private static final String dbName = "hive3252"; + private static final String tblName = "tmptbl"; + + @Override + protected void setUp() throws Exception { + super.setUp(); + + System.setProperty("hive.metastore.event.listeners", + DummyListener.class.getName()); + + int port = MetaStoreUtils.findFreePort(); + MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge()); + + hiveConf = new HiveConf(this.getClass()); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port); + hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3); + hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, ""); + hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false"); + SessionState.start(new CliSessionState(hiveConf)); + msc = new HiveMetaStoreClient(hiveConf); + + msc.dropDatabase(dbName, true, true); + + db.setName(dbName); + + Map tableParams = new HashMap(); + tableParams.put("a", "string"); + + List cols = new ArrayList(); + cols.add(new FieldSchema("a", "string", "")); + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(cols); + sd.setCompressed(false); + sd.setParameters(tableParams); + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setName(tblName); + sd.getSerdeInfo().setParameters(new HashMap()); + sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1"); + sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName()); + sd.setInputFormat(HiveInputFormat.class.getName()); + sd.setOutputFormat(HiveOutputFormat.class.getName()); + + table.setDbName(dbName); + table.setTableName(tblName); + table.setParameters(tableParams); + table.setSd(sd); + + DummyListener.notifyList.clear(); + } + + @Override + protected void tearDown() throws Exception { + super.tearDown(); + } + + public void testStatsMerge() throws Exception { + int listSize = 0; + + List notifyList = DummyListener.notifyList; + assertEquals(notifyList.size(), listSize); + msc.createDatabase(db); + listSize++; + assertEquals(listSize, notifyList.size()); + CreateDatabaseEvent dbEvent = (CreateDatabaseEvent)(notifyList.get(listSize - 1)); + assert dbEvent.getStatus(); + + msc.createTable(table); + listSize++; + assertEquals(notifyList.size(), listSize); + CreateTableEvent tblEvent = (CreateTableEvent)(notifyList.get(listSize - 1)); + assert tblEvent.getStatus(); + + table = msc.getTable(dbName, tblName); + + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("a"); + obj.setColType("string"); + ColumnStatisticsData data = new ColumnStatisticsData(); + StringColumnStatsData scsd = new StringColumnStatsData(); + scsd.setAvgColLen(10); + scsd.setMaxColLen(20); + scsd.setNumNulls(30); + scsd.setNumDVs(123); + scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"); + data.setStringStats(scsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + List colStats = new ArrayList<>(); + colStats.add(cs); + + SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); + msc.setPartitionColumnStatistics(request); + + List colNames = new ArrayList<>(); + colNames.add("a"); + + StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0) + .getStatsData().getStringStats(); + assertEquals(getScsd.getNumDVs(), 123); + + cs = new ColumnStatistics(); + scsd = new StringColumnStatsData(); + scsd.setAvgColLen(20); + scsd.setMaxColLen(5); + scsd.setNumNulls(70); + scsd.setNumDVs(456); + scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}"); + data.setStringStats(scsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + + request = new SetPartitionsStatsRequest(colStats); + request.setNeedMerge(true); + msc.setPartitionColumnStatistics(request); + + getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0) + .getStatsData().getStringStats(); + assertEquals(getScsd.getAvgColLen(), 20.0); + assertEquals(getScsd.getMaxColLen(), 20); + assertEquals(getScsd.getNumNulls(), 100); + // since metastore is ObjectStore, we use the max function to merge. + assertEquals(getScsd.getNumDVs(), 456); + + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/if/hive_metastore.thrift ---------------------------------------------------------------------- diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift index 84e9b6d..f8e56c7 100755 --- a/metastore/if/hive_metastore.thrift +++ b/metastore/if/hive_metastore.thrift @@ -449,7 +449,8 @@ struct AggrStats { } struct SetPartitionsStatsRequest { -1: required list colStats +1: required list colStats, +2: optional bool needMerge //stats need to be merged with the existing stats } // schema of the table/query results etc. http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp ---------------------------------------------------------------------- diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp index ad5da3e..cd8c552 100644 --- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp +++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp @@ -8165,6 +8165,11 @@ void SetPartitionsStatsRequest::__set_colStats(const std::vectorcolStats = val; } +void SetPartitionsStatsRequest::__set_needMerge(const bool val) { + this->needMerge = val; +__isset.needMerge = true; +} + uint32_t SetPartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol* iprot) { apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); @@ -8207,6 +8212,14 @@ uint32_t SetPartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol* xfer += iprot->skip(ftype); } break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->needMerge); + this->__isset.needMerge = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -8238,6 +8251,11 @@ uint32_t SetPartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol* } xfer += oprot->writeFieldEnd(); + if (this->__isset.needMerge) { + xfer += oprot->writeFieldBegin("needMerge", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->needMerge); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -8246,19 +8264,26 @@ uint32_t SetPartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol* void swap(SetPartitionsStatsRequest &a, SetPartitionsStatsRequest &b) { using ::std::swap; swap(a.colStats, b.colStats); + swap(a.needMerge, b.needMerge); + swap(a.__isset, b.__isset); } SetPartitionsStatsRequest::SetPartitionsStatsRequest(const SetPartitionsStatsRequest& other329) { colStats = other329.colStats; + needMerge = other329.needMerge; + __isset = other329.__isset; } SetPartitionsStatsRequest& SetPartitionsStatsRequest::operator=(const SetPartitionsStatsRequest& other330) { colStats = other330.colStats; + needMerge = other330.needMerge; + __isset = other330.__isset; return *this; } void SetPartitionsStatsRequest::printTo(std::ostream& out) const { using ::apache::thrift::to_string; out << "SetPartitionsStatsRequest("; out << "colStats=" << to_string(colStats); + out << ", " << "needMerge="; (__isset.needMerge ? (out << to_string(needMerge)) : (out << "")); out << ")"; } http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h ---------------------------------------------------------------------- diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h index b5c4f14..883f266 100644 --- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h +++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h @@ -3464,24 +3464,37 @@ inline std::ostream& operator<<(std::ostream& out, const AggrStats& obj) return out; } +typedef struct _SetPartitionsStatsRequest__isset { + _SetPartitionsStatsRequest__isset() : needMerge(false) {} + bool needMerge :1; +} _SetPartitionsStatsRequest__isset; class SetPartitionsStatsRequest { public: SetPartitionsStatsRequest(const SetPartitionsStatsRequest&); SetPartitionsStatsRequest& operator=(const SetPartitionsStatsRequest&); - SetPartitionsStatsRequest() { + SetPartitionsStatsRequest() : needMerge(0) { } virtual ~SetPartitionsStatsRequest() throw(); std::vector colStats; + bool needMerge; + + _SetPartitionsStatsRequest__isset __isset; void __set_colStats(const std::vector & val); + void __set_needMerge(const bool val); + bool operator == (const SetPartitionsStatsRequest & rhs) const { if (!(colStats == rhs.colStats)) return false; + if (__isset.needMerge != rhs.__isset.needMerge) + return false; + else if (__isset.needMerge && !(needMerge == rhs.needMerge)) + return false; return true; } bool operator != (const SetPartitionsStatsRequest &rhs) const { http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java ---------------------------------------------------------------------- diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java index 6e334f6..c8088b4 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java @@ -39,6 +39,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { @@ -47,10 +48,12 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase colStats; // required + private boolean needMerge; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { - COL_STATS((short)1, "colStats"); + COL_STATS((short)1, "colStats"), + NEED_MERGE((short)2, "needMerge"); private static final Map byName = new HashMap(); @@ -67,6 +70,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); tmpMap.put(_Fields.COL_STATS, new org.apache.thrift.meta_data.FieldMetaData("colStats", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, ColumnStatistics.class)))); + tmpMap.put(_Fields.NEED_MERGE, new org.apache.thrift.meta_data.FieldMetaData("needMerge", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(SetPartitionsStatsRequest.class, metaDataMap); } @@ -131,6 +141,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBaseother. */ public SetPartitionsStatsRequest(SetPartitionsStatsRequest other) { + __isset_bitfield = other.__isset_bitfield; if (other.isSetColStats()) { List __this__colStats = new ArrayList(other.colStats.size()); for (ColumnStatistics other_element : other.colStats) { @@ -138,6 +149,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase '\metastore\ColumnStatistics', ), ), + 2 => array( + 'var' => 'needMerge', + 'type' => TType::BOOL, + ), ); } if (is_array($vals)) { if (isset($vals['colStats'])) { $this->colStats = $vals['colStats']; } + if (isset($vals['needMerge'])) { + $this->needMerge = $vals['needMerge']; + } } } @@ -8399,6 +8410,13 @@ class SetPartitionsStatsRequest { $xfer += $input->skip($ftype); } break; + case 2: + if ($ftype == TType::BOOL) { + $xfer += $input->readBool($this->needMerge); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -8429,6 +8447,11 @@ class SetPartitionsStatsRequest { } $xfer += $output->writeFieldEnd(); } + if ($this->needMerge !== null) { + $xfer += $output->writeFieldBegin('needMerge', TType::BOOL, 2); + $xfer += $output->writeBool($this->needMerge); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py ---------------------------------------------------------------------- diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py index b47bb59..6366a81 100644 --- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -5674,15 +5674,18 @@ class SetPartitionsStatsRequest: """ Attributes: - colStats + - needMerge """ thrift_spec = ( None, # 0 (1, TType.LIST, 'colStats', (TType.STRUCT,(ColumnStatistics, ColumnStatistics.thrift_spec)), None, ), # 1 + (2, TType.BOOL, 'needMerge', None, None, ), # 2 ) - def __init__(self, colStats=None,): + def __init__(self, colStats=None, needMerge=None,): self.colStats = colStats + self.needMerge = needMerge def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -5704,6 +5707,11 @@ class SetPartitionsStatsRequest: iprot.readListEnd() else: iprot.skip(ftype) + elif fid == 2: + if ftype == TType.BOOL: + self.needMerge = iprot.readBool() + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -5721,6 +5729,10 @@ class SetPartitionsStatsRequest: iter259.write(oprot) oprot.writeListEnd() oprot.writeFieldEnd() + if self.needMerge is not None: + oprot.writeFieldBegin('needMerge', TType.BOOL, 2) + oprot.writeBool(self.needMerge) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -5733,6 +5745,7 @@ class SetPartitionsStatsRequest: def __hash__(self): value = 17 value = (value * 31) ^ hash(self.colStats) + value = (value * 31) ^ hash(self.needMerge) return value def __repr__(self): http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb ---------------------------------------------------------------------- diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb index 2aa92d8..e8d60d7 100644 --- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -1262,9 +1262,11 @@ end class SetPartitionsStatsRequest include ::Thrift::Struct, ::Thrift::Struct_Union COLSTATS = 1 + NEEDMERGE = 2 FIELDS = { - COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatistics}} + COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatistics}}, + NEEDMERGE => {:type => ::Thrift::Types::BOOL, :name => 'needMerge', :optional => true} } def struct_fields; FIELDS; end http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 4b92b2a..94dd72e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -4421,7 +4421,9 @@ public class HiveMetaStore extends ThriftHiveMetastore { try { statsObj = getMS().getTableColumnStatistics( dbName, tableName, Lists.newArrayList(colName)); - assert statsObj.getStatsObjSize() <= 1; + if (statsObj != null) { + assert statsObj.getStatsObjSize() <= 1; + } return statsObj; } finally { endFunction("get_column_statistics_by_table: ", statsObj != null, null, tableName); @@ -6009,8 +6011,62 @@ public class HiveMetaStore extends ThriftHiveMetastore { throws NoSuchObjectException, InvalidObjectException, MetaException, InvalidInputException, TException { boolean ret = true; - for (ColumnStatistics colStats : request.getColStats()) { - ret = ret && update_partition_column_statistics(colStats); + List csNews = request.getColStats(); + if (csNews == null || csNews.isEmpty()) { + return ret; + } + // figure out if it is table level or partition level + ColumnStatistics firstColStats = csNews.get(0); + ColumnStatisticsDesc statsDesc = firstColStats.getStatsDesc(); + String dbName = statsDesc.getDbName(); + String tableName = statsDesc.getTableName(); + List colNames = new ArrayList<>(); + for (ColumnStatisticsObj obj : firstColStats.getStatsObj()) { + colNames.add(obj.getColName()); + } + if (statsDesc.isIsTblLevel()) { + // there should be only one ColumnStatistics + if (request.getColStatsSize() != 1) { + throw new MetaException( + "Expecting only 1 ColumnStatistics for table's column stats, but find " + + request.getColStatsSize()); + } else { + if (request.isSetNeedMerge() && request.isNeedMerge()) { + // one single call to get all column stats + ColumnStatistics csOld = getMS().getTableColumnStatistics(dbName, tableName, colNames); + if (csOld != null && csOld.getStatsObjSize() != 0) { + MetaStoreUtils.mergeColStats(firstColStats, csOld); + } + } + return update_table_column_statistics(firstColStats); + } + } else { + // partition level column stats merging + List partitionNames = new ArrayList<>(); + for (ColumnStatistics csNew : csNews) { + partitionNames.add(csNew.getStatsDesc().getPartName()); + } + Map map = new HashMap<>(); + if (request.isSetNeedMerge() && request.isNeedMerge()) { + // a single call to get all column stats for all partitions + List csOlds = getMS().getPartitionColumnStatistics(dbName, tableName, + partitionNames, colNames); + if (csNews.size() != csOlds.size()) { + // some of the partitions miss stats. + LOG.debug("Some of the partitions miss stats."); + } + for (ColumnStatistics csOld : csOlds) { + map.put(csOld.getStatsDesc().getPartName(), csOld); + } + } + for (int index = 0; index < csNews.size(); index++) { + ColumnStatistics csNew = csNews.get(index); + ColumnStatistics csOld = map.get(csNew.getStatsDesc().getPartName()); + if (csOld != null && csOld.getStatsObjSize() != 0) { + MetaStoreUtils.mergeColStats(csNew, csOld); + } + ret = ret && update_partition_column_statistics(csNew); + } } return ret; } http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java index 682796d..2e83ee0 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java @@ -1593,6 +1593,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient { /** {@inheritDoc} */ @Override + @Deprecated + //use setPartitionColumnStatistics instead public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException{ @@ -1601,6 +1603,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient { /** {@inheritDoc} */ @Override + @Deprecated + //use setPartitionColumnStatistics instead public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException{ http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index e01fe45..6bc882a 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -55,9 +55,12 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Partition; @@ -65,6 +68,8 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger; +import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; @@ -1811,4 +1816,36 @@ public class MetaStoreUtils { return ret; } + // this function will merge csOld into csNew. + public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld) + throws InvalidObjectException { + List list = new ArrayList<>(); + if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) { + // Some of the columns' stats are missing + // This implies partition schema has changed. We will merge columns + // present in both, overwrite stats for columns absent in metastore and + // leave alone columns stats missing from stats task. This last case may + // leave stats in stale state. This will be addressed later. + LOG.debug("New ColumnStats size is " + csNew.getStatsObj().size() + + ". But old ColumnStats size is " + csOld.getStatsObjSize()); + } + // In this case, we have to find out which columns can be merged. + Map map = new HashMap<>(); + // We build a hash map from colName to object for old ColumnStats. + for (ColumnStatisticsObj obj : csOld.getStatsObj()) { + map.put(obj.getColName(), obj); + } + for (int index = 0; index < csNew.getStatsObj().size(); index++) { + ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index); + ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName()); + if (statsObjOld != null) { + // If statsObjOld is found, we can merge. + ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew, + statsObjOld); + merger.merge(statsObjNew, statsObjOld); + } + list.add(statsObjNew); + } + csNew.setStatsObj(list); + } } http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java new file mode 100644 index 0000000..af0669e --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; + +public class BinaryColumnStatsMerger extends ColumnStatsMerger { + + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats(); + BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats(); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java new file mode 100644 index 0000000..33ff6a1 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; + +public class BooleanColumnStatsMerger extends ColumnStatsMerger { + + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + BooleanColumnStatsData aggregateData = aggregateColStats.getStatsData().getBooleanStats(); + BooleanColumnStatsData newData = newColStats.getStatsData().getBooleanStats(); + aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues()); + aggregateData.setNumFalses(aggregateData.getNumFalses() + newData.getNumFalses()); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java new file mode 100644 index 0000000..33c7e3e --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class ColumnStatsMerger { + protected final Logger LOG = LoggerFactory.getLogger(ColumnStatsMerger.class.getName()); + + NumDistinctValueEstimator ndvEstimator = null; + + public abstract void merge(ColumnStatisticsObj aggregateColStats, + ColumnStatisticsObj newColStats); +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java new file mode 100644 index 0000000..da6cd46 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java @@ -0,0 +1,138 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; + +public class ColumnStatsMergerFactory { + + private ColumnStatsMergerFactory() { + } + + // we depend on the toString() method for javolution.util.FastCollection. + private static int countNumBitVectors(String s) { + if (s != null) { + return StringUtils.countMatches(s, "{"); + } else { + return 0; + } + } + + public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew, + ColumnStatisticsObj statsObjOld) { + ColumnStatsMerger agg; + _Fields typeNew = statsObjNew.getStatsData().getSetField(); + _Fields typeOld = statsObjOld.getStatsData().getSetField(); + // make sure that they have the same type + typeNew = typeNew == typeOld ? typeNew : null; + int numBitVectors = 0; + switch (typeNew) { + case BOOLEAN_STATS: + agg = new BooleanColumnStatsMerger(); + break; + case LONG_STATS: { + agg = new LongColumnStatsMerger(); + int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getLongStats().getBitVectors()); + int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getLongStats().getBitVectors()); + numBitVectors = nbvNew == nbvOld ? nbvNew : 0; + break; + } + case DOUBLE_STATS: { + agg = new DoubleColumnStatsMerger(); + int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDoubleStats().getBitVectors()); + int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDoubleStats().getBitVectors()); + numBitVectors = nbvNew == nbvOld ? nbvNew : 0; + break; + } + case STRING_STATS: { + agg = new StringColumnStatsMerger(); + int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getStringStats().getBitVectors()); + int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getStringStats().getBitVectors()); + numBitVectors = nbvNew == nbvOld ? nbvNew : 0; + break; + } + case BINARY_STATS: + agg = new BinaryColumnStatsMerger(); + break; + case DECIMAL_STATS: { + agg = new DecimalColumnStatsMerger(); + int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDecimalStats().getBitVectors()); + int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDecimalStats().getBitVectors()); + numBitVectors = nbvNew == nbvOld ? nbvNew : 0; + break; + } + default: + throw new RuntimeException("Woh, bad. Unknown stats type " + typeNew.toString()); + } + if (numBitVectors > 0) { + agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + } + return agg; + } + + public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + ColumnStatisticsData csd = new ColumnStatisticsData(); + cso.setColName(colName); + cso.setColType(colType); + switch (type) { + case BOOLEAN_STATS: + csd.setBooleanStats(new BooleanColumnStatsData()); + break; + + case LONG_STATS: + csd.setLongStats(new LongColumnStatsData()); + break; + + case DOUBLE_STATS: + csd.setDoubleStats(new DoubleColumnStatsData()); + break; + + case STRING_STATS: + csd.setStringStats(new StringColumnStatsData()); + break; + + case BINARY_STATS: + csd.setBinaryStats(new BinaryColumnStatsData()); + break; + + case DECIMAL_STATS: + csd.setDecimalStats(new DecimalColumnStatsData()); + break; + + default: + throw new RuntimeException("Woh, bad. Unknown stats type!"); + } + + cso.setStatsData(csd); + return cso; + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java new file mode 100644 index 0000000..c13add9 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; + +public class DecimalColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats(); + DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats(); + Decimal lowValue = aggregateData.getLowValue() != null + && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData + .getLowValue() : newData.getLowValue(); + aggregateData.setLowValue(lowValue); + Decimal highValue = aggregateData.getHighValue() != null + && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData + .getHighValue() : newData.getHighValue(); + aggregateData.setHighValue(highValue); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + long ndv = ndvEstimator.estimateNumDistinctValues(); + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java new file mode 100644 index 0000000..fbdba24 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; + +public class DoubleColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats(); + DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats(); + aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + long ndv = ndvEstimator.estimateNumDistinctValues(); + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java new file mode 100644 index 0000000..ac65590 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; + +public class LongColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats(); + LongColumnStatsData newData = newColStats.getStatsData().getLongStats(); + aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + long ndv = ndvEstimator.estimateNumDistinctValues(); + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java new file mode 100644 index 0000000..4158747 --- /dev/null +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.hbase.stats.merge; + +import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.parquet.Log; + +public class StringColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats(); + StringColumnStatsData newData = newColStats.getStatsData().getStringStats(); + aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen())); + aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getnumBitVectors())); + long ndv = ndvEstimator.estimateNumDistinctValues(); + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + } + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index 05dfa3b..0f0df11 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -392,22 +392,16 @@ public class ColumnStatsTask extends Task implements Serializab return statsDesc; } - private int persistPartitionStats(Hive db) throws HiveException, MetaException, IOException { - - // Fetch result of the analyze table partition (p1=c1).. compute statistics for columns .. + private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException { // Construct a column statistics object from the result List colStats = constructColumnStatsFromPackedRows(db); // Persist the column statistics object to the metastore - db.setPartitionColumnStatistics(new SetPartitionsStatsRequest(colStats)); - return 0; - } - - private int persistTableStats(Hive db) throws HiveException, MetaException, IOException { - // Fetch result of the analyze table .. compute statistics for columns .. - // Construct a column statistics object from the result - ColumnStatistics colStats = constructColumnStatsFromPackedRows(db).get(0); - // Persist the column statistics object to the metastore - db.updateTableColumnStatistics(colStats); + // Note, this function is shared for both table and partition column stats. + SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); + if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) { + request.setNeedMerge(true); + } + db.setPartitionColumnStatistics(request); return 0; } @@ -415,11 +409,7 @@ public class ColumnStatsTask extends Task implements Serializab public int execute(DriverContext driverContext) { try { Hive db = getHive(); - if (work.getColStats().isTblLevel()) { - return persistTableStats(db); - } else { - return persistPartitionStats(db); - } + return persistColumnStats(db); } catch (Exception e) { LOG.error("Failed to run column stats task", e); } http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java index 9a6e5c9..d6852dc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.IOException; import java.math.BigDecimal; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.DriverContext; @@ -91,9 +93,9 @@ public class ColumnStatsUpdateTask extends Task { statsObj.setColName(colName.get(0)); statsObj.setColType(colType.get(0)); - + ColumnStatisticsData statsData = new ColumnStatisticsData(); - + String columnType = colType.get(0); if (columnType.equalsIgnoreCase("long")) { @@ -287,21 +289,11 @@ public class ColumnStatsUpdateTask extends Task { return statsDesc; } - private int persistTableStats(Hive db) throws HiveException, MetaException, - IOException { - // Construct a column statistics object from user input - ColumnStatistics colStats = constructColumnStatsFromInput(); - // Persist the column statistics object to the metastore - db.updateTableColumnStatistics(colStats); - return 0; - } - - private int persistPartitionStats(Hive db) throws HiveException, MetaException, - IOException { - // Construct a column statistics object from user input - ColumnStatistics colStats = constructColumnStatsFromInput(); - // Persist the column statistics object to the metastore - db.updatePartitionColumnStatistics(colStats); + private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException { + List colStats = new ArrayList<>(); + colStats.add(constructColumnStatsFromInput()); + SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats); + db.setPartitionColumnStatistics(request); return 0; } @@ -309,11 +301,7 @@ public class ColumnStatsUpdateTask extends Task { public int execute(DriverContext driverContext) { try { Hive db = getHive(); - if (work.getColStats().isTblLevel()) { - return persistTableStats(db); - } else { - return persistPartitionStats(db); - } + return persistColumnStats(db); } catch (Exception e) { LOG.info("Failed to persist stats in metastore", e); } http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 26f61c5..6af48ec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -3312,24 +3312,6 @@ private void constructOneLBLocationMap(FileStatus fSta, return indexes; } - public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws HiveException { - try { - return getMSC().updateTableColumnStatistics(statsObj); - } catch (Exception e) { - LOG.debug(StringUtils.stringifyException(e)); - throw new HiveException(e); - } - } - - public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws HiveException { - try { - return getMSC().updatePartitionColumnStatistics(statsObj); - } catch (Exception e) { - LOG.debug(StringUtils.stringifyException(e)); - throw new HiveException(e); - } - } - public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException { try { return getMSC().setPartitionColumnStatistics(request); http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java index db2b674..8eb011e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -50,6 +51,7 @@ import org.apache.hadoop.hive.metastore.api.InvalidOperationException; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet; +import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.hadoop.hive.metastore.api.UnknownTableException; @@ -325,15 +327,19 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I /** {@inheritDoc} */ @Override - public boolean updateTableColumnStatistics(ColumnStatistics statsObj) + public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws NoSuchObjectException, InvalidObjectException, MetaException, TException, InvalidInputException { - String dbName = statsObj.getStatsDesc().getDbName().toLowerCase(); - String tableName = statsObj.getStatsDesc().getTableName().toLowerCase(); - if (getTempTable(dbName, tableName) != null) { - return updateTempTableColumnStats(dbName, tableName, statsObj); + if (request.getColStatsSize() == 1) { + ColumnStatistics colStats = request.getColStatsIterator().next(); + ColumnStatisticsDesc desc = colStats.getStatsDesc(); + String dbName = desc.getDbName().toLowerCase(); + String tableName = desc.getTableName().toLowerCase(); + if (getTempTable(dbName, tableName) != null) { + return updateTempTableColumnStats(dbName, tableName, colStats); + } } - return super.updateTableColumnStatistics(statsObj); + return super.setPartitionColumnStatistics(request); } /** {@inheritDoc} */ http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java index c000db2..97f323f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java @@ -29,6 +29,7 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable private static final long serialVersionUID = 1L; private boolean isTblLevel; + private int numBitVector; private String tableName; private List colName; private List colType; @@ -36,12 +37,22 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable public ColumnStatsDesc() { } + public ColumnStatsDesc(String tableName, List colName, List colType, + boolean isTblLevel) { + this.tableName = tableName; + this.colName = colName; + this.colType = colType; + this.isTblLevel = isTblLevel; + this.numBitVector = 0; + } + public ColumnStatsDesc(String tableName, List colName, - List colType, boolean isTblLevel) { + List colType, boolean isTblLevel, int numBitVector) { this.tableName = tableName; this.colName = colName; this.colType = colType; this.isTblLevel = isTblLevel; + this.numBitVector = numBitVector; } @Explain(displayName = "Table") @@ -79,4 +90,13 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable public void setColType(List colType) { this.colType = colType; } + + public int getNumBitVector() { + return numBitVector; + } + + public void setNumBitVector(int numBitVector) { + this.numBitVector = numBitVector; + } + }