hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pxi...@apache.org
Subject hive git commit: HIVE-13565: Auto-gather column stats thrift change (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Date Wed, 11 May 2016 22:43:39 GMT
Repository: hive
Updated Branches:
  refs/heads/master 6187e2a6b -> b9e4fe856


HIVE-13565: Auto-gather column stats thrift change (Pengcheng Xiong, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b9e4fe85
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b9e4fe85
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b9e4fe85

Branch: refs/heads/master
Commit: b9e4fe856fcf3bb4339c8efebab1138c9dc1e732
Parents: 6187e2a
Author: Pengcheng Xiong <pxiong@apache.org>
Authored: Wed May 11 15:43:15 2016 -0700
Committer: Pengcheng Xiong <pxiong@apache.org>
Committed: Wed May 11 15:43:15 2016 -0700

----------------------------------------------------------------------
 .../metastore/TestHiveMetaStoreStatsMerge.java  | 199 +++++++++++++++++++
 metastore/if/hive_metastore.thrift              |   3 +-
 .../gen/thrift/gen-cpp/hive_metastore_types.cpp |  25 +++
 .../gen/thrift/gen-cpp/hive_metastore_types.h   |  15 +-
 .../api/SetPartitionsStatsRequest.java          | 109 +++++++++-
 .../src/gen/thrift/gen-php/metastore/Types.php  |  23 +++
 .../gen/thrift/gen-py/hive_metastore/ttypes.py  |  15 +-
 .../gen/thrift/gen-rb/hive_metastore_types.rb   |   4 +-
 .../hadoop/hive/metastore/HiveMetaStore.java    |  62 +++++-
 .../hive/metastore/HiveMetaStoreClient.java     |   4 +
 .../hadoop/hive/metastore/MetaStoreUtils.java   |  37 ++++
 .../stats/merge/BinaryColumnStatsMerger.java    |  35 ++++
 .../stats/merge/BooleanColumnStatsMerger.java   |  35 ++++
 .../hbase/stats/merge/ColumnStatsMerger.java    |  34 ++++
 .../stats/merge/ColumnStatsMergerFactory.java   | 138 +++++++++++++
 .../stats/merge/DecimalColumnStatsMerger.java   |  55 +++++
 .../stats/merge/DoubleColumnStatsMerger.java    |  48 +++++
 .../stats/merge/LongColumnStatsMerger.java      |  48 +++++
 .../stats/merge/StringColumnStatsMerger.java    |  49 +++++
 .../hadoop/hive/ql/exec/ColumnStatsTask.java    |  26 +--
 .../hive/ql/exec/ColumnStatsUpdateTask.java     |  32 +--
 .../apache/hadoop/hive/ql/metadata/Hive.java    |  18 --
 .../ql/metadata/SessionHiveMetaStoreClient.java |  18 +-
 .../hadoop/hive/ql/plan/ColumnStatsDesc.java    |  22 +-
 24 files changed, 981 insertions(+), 73 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java
new file mode 100644
index 0000000..d6df32b
--- /dev/null
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStoreStatsMerge.java
@@ -0,0 +1,199 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.metastore;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.events.AddPartitionEvent;
+import org.apache.hadoop.hive.metastore.events.AlterTableEvent;
+import org.apache.hadoop.hive.metastore.events.CreateDatabaseEvent;
+import org.apache.hadoop.hive.metastore.events.CreateTableEvent;
+import org.apache.hadoop.hive.metastore.events.DropDatabaseEvent;
+import org.apache.hadoop.hive.metastore.events.DropPartitionEvent;
+import org.apache.hadoop.hive.metastore.events.DropTableEvent;
+import org.apache.hadoop.hive.metastore.events.ListenerEvent;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.shims.ShimLoader;
+
+/**
+ * TestHiveMetaStoreStatsMerge.
+ * calls in {@link org.apache.hadoop.hive.metastore.HiveMetaStore}
+ */
+public class TestHiveMetaStoreStatsMerge extends TestCase {
+
+  private HiveConf hiveConf;
+  private HiveMetaStoreClient msc;
+  private final Database db = new Database();
+  private Table table = new Table();
+
+  private static final String dbName = "hive3252";
+  private static final String tblName = "tmptbl";
+
+  @Override
+  protected void setUp() throws Exception {
+    super.setUp();
+
+    System.setProperty("hive.metastore.event.listeners",
+        DummyListener.class.getName());
+
+    int port = MetaStoreUtils.findFreePort();
+    MetaStoreUtils.startMetaStore(port, ShimLoader.getHadoopThriftAuthBridge());
+
+    hiveConf = new HiveConf(this.getClass());
+    hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://localhost:" + port);
+    hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
+    hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
+    hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
+    hiveConf.set(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY.varname, "false");
+    SessionState.start(new CliSessionState(hiveConf));
+    msc = new HiveMetaStoreClient(hiveConf);
+
+    msc.dropDatabase(dbName, true, true);
+
+    db.setName(dbName);
+
+    Map<String, String> tableParams = new HashMap<String, String>();
+    tableParams.put("a", "string");
+
+    List<FieldSchema> cols = new ArrayList<FieldSchema>();
+    cols.add(new FieldSchema("a", "string", ""));
+    StorageDescriptor sd = new StorageDescriptor();
+    sd.setCols(cols);
+    sd.setCompressed(false);
+    sd.setParameters(tableParams);
+    sd.setSerdeInfo(new SerDeInfo());
+    sd.getSerdeInfo().setName(tblName);
+    sd.getSerdeInfo().setParameters(new HashMap<String, String>());
+    sd.getSerdeInfo().getParameters().put(serdeConstants.SERIALIZATION_FORMAT, "1");
+    sd.getSerdeInfo().setSerializationLib(LazySimpleSerDe.class.getName());
+    sd.setInputFormat(HiveInputFormat.class.getName());
+    sd.setOutputFormat(HiveOutputFormat.class.getName());
+
+    table.setDbName(dbName);
+    table.setTableName(tblName);
+    table.setParameters(tableParams);
+    table.setSd(sd);
+
+    DummyListener.notifyList.clear();
+  }
+
+  @Override
+  protected void tearDown() throws Exception {
+    super.tearDown();
+  }
+
+  public void testStatsMerge() throws Exception {
+    int listSize = 0;
+
+    List<ListenerEvent> notifyList = DummyListener.notifyList;
+    assertEquals(notifyList.size(), listSize);
+    msc.createDatabase(db);
+    listSize++;
+    assertEquals(listSize, notifyList.size());
+    CreateDatabaseEvent dbEvent = (CreateDatabaseEvent)(notifyList.get(listSize - 1));
+    assert dbEvent.getStatus();
+
+    msc.createTable(table);
+    listSize++;
+    assertEquals(notifyList.size(), listSize);
+    CreateTableEvent tblEvent = (CreateTableEvent)(notifyList.get(listSize - 1));
+    assert tblEvent.getStatus();
+
+    table = msc.getTable(dbName, tblName);
+
+    ColumnStatistics cs = new ColumnStatistics();
+    ColumnStatisticsDesc desc = new ColumnStatisticsDesc(true, dbName, tblName);
+    cs.setStatsDesc(desc);
+    ColumnStatisticsObj obj = new ColumnStatisticsObj();
+    obj.setColName("a");
+    obj.setColType("string");
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    StringColumnStatsData scsd = new StringColumnStatsData();
+    scsd.setAvgColLen(10);
+    scsd.setMaxColLen(20);
+    scsd.setNumNulls(30);
+    scsd.setNumDVs(123);
+    scsd.setBitVectors("{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}");
+    data.setStringStats(scsd);
+    obj.setStatsData(data);
+    cs.addToStatsObj(obj);
+    
+    List<ColumnStatistics> colStats = new ArrayList<>();
+    colStats.add(cs);
+    
+    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
+    msc.setPartitionColumnStatistics(request);
+
+    List<String> colNames = new ArrayList<>();
+    colNames.add("a");
+
+    StringColumnStatsData getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0)
+        .getStatsData().getStringStats();
+    assertEquals(getScsd.getNumDVs(), 123);
+    
+    cs = new ColumnStatistics();
+    scsd = new StringColumnStatsData();
+    scsd.setAvgColLen(20);
+    scsd.setMaxColLen(5);
+    scsd.setNumNulls(70);
+    scsd.setNumDVs(456);
+    scsd.setBitVectors("{0, 1}{0, 1}{1, 2, 4}{0, 1, 2}{0, 1, 2}{0, 2}{0, 1, 3, 4}{0, 1}{0, 1}{3, 4, 6}{2}{0, 1}{0, 3}{0}{0, 1}{0, 1, 4}");
+    data.setStringStats(scsd);
+    obj.setStatsData(data);
+    cs.addToStatsObj(obj);
+    
+    request = new SetPartitionsStatsRequest(colStats);
+    request.setNeedMerge(true);
+    msc.setPartitionColumnStatistics(request);
+    
+    getScsd = msc.getTableColumnStatistics(dbName, tblName, colNames).get(0)
+        .getStatsData().getStringStats();
+    assertEquals(getScsd.getAvgColLen(), 20.0);
+    assertEquals(getScsd.getMaxColLen(), 20);
+    assertEquals(getScsd.getNumNulls(), 100);
+    // since metastore is ObjectStore, we use the max function to merge.
+    assertEquals(getScsd.getNumDVs(), 456);
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/if/hive_metastore.thrift
----------------------------------------------------------------------
diff --git a/metastore/if/hive_metastore.thrift b/metastore/if/hive_metastore.thrift
index 84e9b6d..f8e56c7 100755
--- a/metastore/if/hive_metastore.thrift
+++ b/metastore/if/hive_metastore.thrift
@@ -449,7 +449,8 @@ struct AggrStats {
 }
 
 struct SetPartitionsStatsRequest {
-1: required list<ColumnStatistics> colStats
+1: required list<ColumnStatistics> colStats,
+2: optional bool needMerge //stats need to be merged with the existing stats
 }
 
 // schema of the table/query results etc.

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
index ad5da3e..cd8c552 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
@@ -8165,6 +8165,11 @@ void SetPartitionsStatsRequest::__set_colStats(const std::vector<ColumnStatistic
   this->colStats = val;
 }
 
+void SetPartitionsStatsRequest::__set_needMerge(const bool val) {
+  this->needMerge = val;
+__isset.needMerge = true;
+}
+
 uint32_t SetPartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol* iprot) {
 
   apache::thrift::protocol::TInputRecursionTracker tracker(*iprot);
@@ -8207,6 +8212,14 @@ uint32_t SetPartitionsStatsRequest::read(::apache::thrift::protocol::TProtocol*
           xfer += iprot->skip(ftype);
         }
         break;
+      case 2:
+        if (ftype == ::apache::thrift::protocol::T_BOOL) {
+          xfer += iprot->readBool(this->needMerge);
+          this->__isset.needMerge = true;
+        } else {
+          xfer += iprot->skip(ftype);
+        }
+        break;
       default:
         xfer += iprot->skip(ftype);
         break;
@@ -8238,6 +8251,11 @@ uint32_t SetPartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol*
   }
   xfer += oprot->writeFieldEnd();
 
+  if (this->__isset.needMerge) {
+    xfer += oprot->writeFieldBegin("needMerge", ::apache::thrift::protocol::T_BOOL, 2);
+    xfer += oprot->writeBool(this->needMerge);
+    xfer += oprot->writeFieldEnd();
+  }
   xfer += oprot->writeFieldStop();
   xfer += oprot->writeStructEnd();
   return xfer;
@@ -8246,19 +8264,26 @@ uint32_t SetPartitionsStatsRequest::write(::apache::thrift::protocol::TProtocol*
 void swap(SetPartitionsStatsRequest &a, SetPartitionsStatsRequest &b) {
   using ::std::swap;
   swap(a.colStats, b.colStats);
+  swap(a.needMerge, b.needMerge);
+  swap(a.__isset, b.__isset);
 }
 
 SetPartitionsStatsRequest::SetPartitionsStatsRequest(const SetPartitionsStatsRequest& other329) {
   colStats = other329.colStats;
+  needMerge = other329.needMerge;
+  __isset = other329.__isset;
 }
 SetPartitionsStatsRequest& SetPartitionsStatsRequest::operator=(const SetPartitionsStatsRequest& other330) {
   colStats = other330.colStats;
+  needMerge = other330.needMerge;
+  __isset = other330.__isset;
   return *this;
 }
 void SetPartitionsStatsRequest::printTo(std::ostream& out) const {
   using ::apache::thrift::to_string;
   out << "SetPartitionsStatsRequest(";
   out << "colStats=" << to_string(colStats);
+  out << ", " << "needMerge="; (__isset.needMerge ? (out << to_string(needMerge)) : (out << "<null>"));
   out << ")";
 }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
index b5c4f14..883f266 100644
--- a/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
+++ b/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
@@ -3464,24 +3464,37 @@ inline std::ostream& operator<<(std::ostream& out, const AggrStats& obj)
   return out;
 }
 
+typedef struct _SetPartitionsStatsRequest__isset {
+  _SetPartitionsStatsRequest__isset() : needMerge(false) {}
+  bool needMerge :1;
+} _SetPartitionsStatsRequest__isset;
 
 class SetPartitionsStatsRequest {
  public:
 
   SetPartitionsStatsRequest(const SetPartitionsStatsRequest&);
   SetPartitionsStatsRequest& operator=(const SetPartitionsStatsRequest&);
-  SetPartitionsStatsRequest() {
+  SetPartitionsStatsRequest() : needMerge(0) {
   }
 
   virtual ~SetPartitionsStatsRequest() throw();
   std::vector<ColumnStatistics>  colStats;
+  bool needMerge;
+
+  _SetPartitionsStatsRequest__isset __isset;
 
   void __set_colStats(const std::vector<ColumnStatistics> & val);
 
+  void __set_needMerge(const bool val);
+
   bool operator == (const SetPartitionsStatsRequest & rhs) const
   {
     if (!(colStats == rhs.colStats))
       return false;
+    if (__isset.needMerge != rhs.__isset.needMerge)
+      return false;
+    else if (__isset.needMerge && !(needMerge == rhs.needMerge))
+      return false;
     return true;
   }
   bool operator != (const SetPartitionsStatsRequest &rhs) const {

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
index 6e334f6..c8088b4 100644
--- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
+++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SetPartitionsStatsRequest.java
@@ -39,6 +39,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
   private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("SetPartitionsStatsRequest");
 
   private static final org.apache.thrift.protocol.TField COL_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("colStats", org.apache.thrift.protocol.TType.LIST, (short)1);
+  private static final org.apache.thrift.protocol.TField NEED_MERGE_FIELD_DESC = new org.apache.thrift.protocol.TField("needMerge", org.apache.thrift.protocol.TType.BOOL, (short)2);
 
   private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
   static {
@@ -47,10 +48,12 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
   }
 
   private List<ColumnStatistics> colStats; // required
+  private boolean needMerge; // optional
 
   /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
   public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COL_STATS((short)1, "colStats");
+    COL_STATS((short)1, "colStats"),
+    NEED_MERGE((short)2, "needMerge");
 
     private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
 
@@ -67,6 +70,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
       switch(fieldId) {
         case 1: // COL_STATS
           return COL_STATS;
+        case 2: // NEED_MERGE
+          return NEED_MERGE;
         default:
           return null;
       }
@@ -107,12 +112,17 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
   }
 
   // isset id assignments
+  private static final int __NEEDMERGE_ISSET_ID = 0;
+  private byte __isset_bitfield = 0;
+  private static final _Fields optionals[] = {_Fields.NEED_MERGE};
   public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
   static {
     Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
     tmpMap.put(_Fields.COL_STATS, new org.apache.thrift.meta_data.FieldMetaData("colStats", org.apache.thrift.TFieldRequirementType.REQUIRED, 
         new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
             new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, ColumnStatistics.class))));
+    tmpMap.put(_Fields.NEED_MERGE, new org.apache.thrift.meta_data.FieldMetaData("needMerge", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
+        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
     metaDataMap = Collections.unmodifiableMap(tmpMap);
     org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(SetPartitionsStatsRequest.class, metaDataMap);
   }
@@ -131,6 +141,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
    * Performs a deep copy on <i>other</i>.
    */
   public SetPartitionsStatsRequest(SetPartitionsStatsRequest other) {
+    __isset_bitfield = other.__isset_bitfield;
     if (other.isSetColStats()) {
       List<ColumnStatistics> __this__colStats = new ArrayList<ColumnStatistics>(other.colStats.size());
       for (ColumnStatistics other_element : other.colStats) {
@@ -138,6 +149,7 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
       }
       this.colStats = __this__colStats;
     }
+    this.needMerge = other.needMerge;
   }
 
   public SetPartitionsStatsRequest deepCopy() {
@@ -147,6 +159,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
   @Override
   public void clear() {
     this.colStats = null;
+    setNeedMergeIsSet(false);
+    this.needMerge = false;
   }
 
   public int getColStatsSize() {
@@ -187,6 +201,28 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
     }
   }
 
+  public boolean isNeedMerge() {
+    return this.needMerge;
+  }
+
+  public void setNeedMerge(boolean needMerge) {
+    this.needMerge = needMerge;
+    setNeedMergeIsSet(true);
+  }
+
+  public void unsetNeedMerge() {
+    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NEEDMERGE_ISSET_ID);
+  }
+
+  /** Returns true if field needMerge is set (has been assigned a value) and false otherwise */
+  public boolean isSetNeedMerge() {
+    return EncodingUtils.testBit(__isset_bitfield, __NEEDMERGE_ISSET_ID);
+  }
+
+  public void setNeedMergeIsSet(boolean value) {
+    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NEEDMERGE_ISSET_ID, value);
+  }
+
   public void setFieldValue(_Fields field, Object value) {
     switch (field) {
     case COL_STATS:
@@ -197,6 +233,14 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
       }
       break;
 
+    case NEED_MERGE:
+      if (value == null) {
+        unsetNeedMerge();
+      } else {
+        setNeedMerge((Boolean)value);
+      }
+      break;
+
     }
   }
 
@@ -205,6 +249,9 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
     case COL_STATS:
       return getColStats();
 
+    case NEED_MERGE:
+      return isNeedMerge();
+
     }
     throw new IllegalStateException();
   }
@@ -218,6 +265,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
     switch (field) {
     case COL_STATS:
       return isSetColStats();
+    case NEED_MERGE:
+      return isSetNeedMerge();
     }
     throw new IllegalStateException();
   }
@@ -244,6 +293,15 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
         return false;
     }
 
+    boolean this_present_needMerge = true && this.isSetNeedMerge();
+    boolean that_present_needMerge = true && that.isSetNeedMerge();
+    if (this_present_needMerge || that_present_needMerge) {
+      if (!(this_present_needMerge && that_present_needMerge))
+        return false;
+      if (this.needMerge != that.needMerge)
+        return false;
+    }
+
     return true;
   }
 
@@ -256,6 +314,11 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
     if (present_colStats)
       list.add(colStats);
 
+    boolean present_needMerge = true && (isSetNeedMerge());
+    list.add(present_needMerge);
+    if (present_needMerge)
+      list.add(needMerge);
+
     return list.hashCode();
   }
 
@@ -277,6 +340,16 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
         return lastComparison;
       }
     }
+    lastComparison = Boolean.valueOf(isSetNeedMerge()).compareTo(other.isSetNeedMerge());
+    if (lastComparison != 0) {
+      return lastComparison;
+    }
+    if (isSetNeedMerge()) {
+      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.needMerge, other.needMerge);
+      if (lastComparison != 0) {
+        return lastComparison;
+      }
+    }
     return 0;
   }
 
@@ -304,6 +377,12 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
       sb.append(this.colStats);
     }
     first = false;
+    if (isSetNeedMerge()) {
+      if (!first) sb.append(", ");
+      sb.append("needMerge:");
+      sb.append(this.needMerge);
+      first = false;
+    }
     sb.append(")");
     return sb.toString();
   }
@@ -327,6 +406,8 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
 
   private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
     try {
+      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
+      __isset_bitfield = 0;
       read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
     } catch (org.apache.thrift.TException te) {
       throw new java.io.IOException(te);
@@ -370,6 +451,14 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
               org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
             }
             break;
+          case 2: // NEED_MERGE
+            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
+              struct.needMerge = iprot.readBool();
+              struct.setNeedMergeIsSet(true);
+            } else { 
+              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
+            }
+            break;
           default:
             org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
         }
@@ -395,6 +484,11 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
         }
         oprot.writeFieldEnd();
       }
+      if (struct.isSetNeedMerge()) {
+        oprot.writeFieldBegin(NEED_MERGE_FIELD_DESC);
+        oprot.writeBool(struct.needMerge);
+        oprot.writeFieldEnd();
+      }
       oprot.writeFieldStop();
       oprot.writeStructEnd();
     }
@@ -419,6 +513,14 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
           _iter290.write(oprot);
         }
       }
+      BitSet optionals = new BitSet();
+      if (struct.isSetNeedMerge()) {
+        optionals.set(0);
+      }
+      oprot.writeBitSet(optionals, 1);
+      if (struct.isSetNeedMerge()) {
+        oprot.writeBool(struct.needMerge);
+      }
     }
 
     @Override
@@ -436,6 +538,11 @@ public class SetPartitionsStatsRequest implements org.apache.thrift.TBase<SetPar
         }
       }
       struct.setColStatsIsSet(true);
+      BitSet incoming = iprot.readBitSet(1);
+      if (incoming.get(0)) {
+        struct.needMerge = iprot.readBool();
+        struct.setNeedMergeIsSet(true);
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-php/metastore/Types.php
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-php/metastore/Types.php b/metastore/src/gen/thrift/gen-php/metastore/Types.php
index f67e61f..189894d 100644
--- a/metastore/src/gen/thrift/gen-php/metastore/Types.php
+++ b/metastore/src/gen/thrift/gen-php/metastore/Types.php
@@ -8340,6 +8340,10 @@ class SetPartitionsStatsRequest {
    * @var \metastore\ColumnStatistics[]
    */
   public $colStats = null;
+  /**
+   * @var bool
+   */
+  public $needMerge = null;
 
   public function __construct($vals=null) {
     if (!isset(self::$_TSPEC)) {
@@ -8353,12 +8357,19 @@ class SetPartitionsStatsRequest {
             'class' => '\metastore\ColumnStatistics',
             ),
           ),
+        2 => array(
+          'var' => 'needMerge',
+          'type' => TType::BOOL,
+          ),
         );
     }
     if (is_array($vals)) {
       if (isset($vals['colStats'])) {
         $this->colStats = $vals['colStats'];
       }
+      if (isset($vals['needMerge'])) {
+        $this->needMerge = $vals['needMerge'];
+      }
     }
   }
 
@@ -8399,6 +8410,13 @@ class SetPartitionsStatsRequest {
             $xfer += $input->skip($ftype);
           }
           break;
+        case 2:
+          if ($ftype == TType::BOOL) {
+            $xfer += $input->readBool($this->needMerge);
+          } else {
+            $xfer += $input->skip($ftype);
+          }
+          break;
         default:
           $xfer += $input->skip($ftype);
           break;
@@ -8429,6 +8447,11 @@ class SetPartitionsStatsRequest {
       }
       $xfer += $output->writeFieldEnd();
     }
+    if ($this->needMerge !== null) {
+      $xfer += $output->writeFieldBegin('needMerge', TType::BOOL, 2);
+      $xfer += $output->writeBool($this->needMerge);
+      $xfer += $output->writeFieldEnd();
+    }
     $xfer += $output->writeFieldStop();
     $xfer += $output->writeStructEnd();
     return $xfer;

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
index b47bb59..6366a81 100644
--- a/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
+++ b/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
@@ -5674,15 +5674,18 @@ class SetPartitionsStatsRequest:
   """
   Attributes:
    - colStats
+   - needMerge
   """
 
   thrift_spec = (
     None, # 0
     (1, TType.LIST, 'colStats', (TType.STRUCT,(ColumnStatistics, ColumnStatistics.thrift_spec)), None, ), # 1
+    (2, TType.BOOL, 'needMerge', None, None, ), # 2
   )
 
-  def __init__(self, colStats=None,):
+  def __init__(self, colStats=None, needMerge=None,):
     self.colStats = colStats
+    self.needMerge = needMerge
 
   def read(self, iprot):
     if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
@@ -5704,6 +5707,11 @@ class SetPartitionsStatsRequest:
           iprot.readListEnd()
         else:
           iprot.skip(ftype)
+      elif fid == 2:
+        if ftype == TType.BOOL:
+          self.needMerge = iprot.readBool()
+        else:
+          iprot.skip(ftype)
       else:
         iprot.skip(ftype)
       iprot.readFieldEnd()
@@ -5721,6 +5729,10 @@ class SetPartitionsStatsRequest:
         iter259.write(oprot)
       oprot.writeListEnd()
       oprot.writeFieldEnd()
+    if self.needMerge is not None:
+      oprot.writeFieldBegin('needMerge', TType.BOOL, 2)
+      oprot.writeBool(self.needMerge)
+      oprot.writeFieldEnd()
     oprot.writeFieldStop()
     oprot.writeStructEnd()
 
@@ -5733,6 +5745,7 @@ class SetPartitionsStatsRequest:
   def __hash__(self):
     value = 17
     value = (value * 31) ^ hash(self.colStats)
+    value = (value * 31) ^ hash(self.needMerge)
     return value
 
   def __repr__(self):

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
----------------------------------------------------------------------
diff --git a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
index 2aa92d8..e8d60d7 100644
--- a/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
+++ b/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
@@ -1262,9 +1262,11 @@ end
 class SetPartitionsStatsRequest
   include ::Thrift::Struct, ::Thrift::Struct_Union
   COLSTATS = 1
+  NEEDMERGE = 2
 
   FIELDS = {
-    COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatistics}}
+    COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatistics}},
+    NEEDMERGE => {:type => ::Thrift::Types::BOOL, :name => 'needMerge', :optional => true}
   }
 
   def struct_fields; FIELDS; end

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 4b92b2a..94dd72e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -4421,7 +4421,9 @@ public class HiveMetaStore extends ThriftHiveMetastore {
       try {
         statsObj = getMS().getTableColumnStatistics(
             dbName, tableName, Lists.newArrayList(colName));
-        assert statsObj.getStatsObjSize() <= 1;
+        if (statsObj != null) {
+          assert statsObj.getStatsObjSize() <= 1;
+        }
         return statsObj;
       } finally {
         endFunction("get_column_statistics_by_table: ", statsObj != null, null, tableName);
@@ -6009,8 +6011,62 @@ public class HiveMetaStore extends ThriftHiveMetastore {
         throws NoSuchObjectException, InvalidObjectException, MetaException, InvalidInputException,
         TException {
       boolean ret = true;
-      for (ColumnStatistics colStats : request.getColStats()) {
-        ret = ret && update_partition_column_statistics(colStats);
+      List<ColumnStatistics> csNews = request.getColStats();
+      if (csNews == null || csNews.isEmpty()) {
+        return ret;
+      }
+      // figure out if it is table level or partition level
+      ColumnStatistics firstColStats = csNews.get(0);
+      ColumnStatisticsDesc statsDesc = firstColStats.getStatsDesc();
+      String dbName = statsDesc.getDbName();
+      String tableName = statsDesc.getTableName();
+      List<String> colNames = new ArrayList<>();
+      for (ColumnStatisticsObj obj : firstColStats.getStatsObj()) {
+        colNames.add(obj.getColName());
+      }
+      if (statsDesc.isIsTblLevel()) {
+        // there should be only one ColumnStatistics
+        if (request.getColStatsSize() != 1) {
+          throw new MetaException(
+              "Expecting only 1 ColumnStatistics for table's column stats, but find "
+                  + request.getColStatsSize());
+        } else {
+          if (request.isSetNeedMerge() && request.isNeedMerge()) {
+            // one single call to get all column stats
+            ColumnStatistics csOld = getMS().getTableColumnStatistics(dbName, tableName, colNames);
+            if (csOld != null && csOld.getStatsObjSize() != 0) {
+              MetaStoreUtils.mergeColStats(firstColStats, csOld);
+            }
+          }
+          return update_table_column_statistics(firstColStats);
+        }
+      } else {
+        // partition level column stats merging
+        List<String> partitionNames = new ArrayList<>();
+        for (ColumnStatistics csNew : csNews) {
+          partitionNames.add(csNew.getStatsDesc().getPartName());
+        }
+        Map<String, ColumnStatistics> map = new HashMap<>();
+        if (request.isSetNeedMerge() && request.isNeedMerge()) {
+          // a single call to get all column stats for all partitions
+          List<ColumnStatistics> csOlds = getMS().getPartitionColumnStatistics(dbName, tableName,
+              partitionNames, colNames);
+          if (csNews.size() != csOlds.size()) {
+            // some of the partitions miss stats.
+            LOG.debug("Some of the partitions miss stats.");
+          }
+          for (ColumnStatistics csOld : csOlds) {
+            map.put(csOld.getStatsDesc().getPartName(), csOld);
+          }
+        }
+        for (int index = 0; index < csNews.size(); index++) {
+          ColumnStatistics csNew = csNews.get(index);
+          ColumnStatistics csOld = map.get(csNew.getStatsDesc().getPartName());
+          if (csOld != null && csOld.getStatsObjSize() != 0) {
+            MetaStoreUtils.mergeColStats(csNew, csOld);
+          }
+          ret = ret && update_partition_column_statistics(csNew);
+        }
       }
       return ret;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
index 682796d..2e83ee0 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
@@ -1593,6 +1593,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
 
   /** {@inheritDoc} */
   @Override
+  @Deprecated
+  //use setPartitionColumnStatistics instead
   public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
     throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
     InvalidInputException{
@@ -1601,6 +1603,8 @@ public class HiveMetaStoreClient implements IMetaStoreClient {
 
   /** {@inheritDoc} */
   @Override
+  @Deprecated
+  //use setPartitionColumnStatistics instead
   public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj)
     throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
     InvalidInputException{

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index e01fe45..6bc882a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -55,9 +55,12 @@ import org.apache.hadoop.hive.common.JavaUtils;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Partition;
@@ -65,6 +68,8 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger;
+import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -1811,4 +1816,36 @@ public class MetaStoreUtils {
     return ret;
   }
 
+  // this function will merge csOld into csNew.
+  public static void mergeColStats(ColumnStatistics csNew, ColumnStatistics csOld)
+      throws InvalidObjectException {
+    List<ColumnStatisticsObj> list = new ArrayList<>();
+    if (csNew.getStatsObj().size() != csOld.getStatsObjSize()) {
+      // Some of the columns' stats are missing
+      // This implies partition schema has changed. We will merge columns
+      // present in both, overwrite stats for columns absent in metastore and
+      // leave alone columns stats missing from stats task. This last case may
+      // leave stats in stale state. This will be addressed later.
+      LOG.debug("New ColumnStats size is " + csNew.getStatsObj().size()
+          + ". But old ColumnStats size is " + csOld.getStatsObjSize());
+    }
+    // In this case, we have to find out which columns can be merged.
+    Map<String, ColumnStatisticsObj> map = new HashMap<>();
+    // We build a hash map from colName to object for old ColumnStats.
+    for (ColumnStatisticsObj obj : csOld.getStatsObj()) {
+      map.put(obj.getColName(), obj);
+    }
+    for (int index = 0; index < csNew.getStatsObj().size(); index++) {
+      ColumnStatisticsObj statsObjNew = csNew.getStatsObj().get(index);
+      ColumnStatisticsObj statsObjOld = map.get(statsObjNew.getColName());
+      if (statsObjOld != null) {
+        // If statsObjOld is found, we can merge.
+        ColumnStatsMerger merger = ColumnStatsMergerFactory.getColumnStatsMerger(statsObjNew,
+            statsObjOld);
+        merger.merge(statsObjNew, statsObjOld);
+      }
+      list.add(statsObjNew);
+    }
+    csNew.setStatsObj(list);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
new file mode 100644
index 0000000..af0669e
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BinaryColumnStatsMerger.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+
+public class BinaryColumnStatsMerger extends ColumnStatsMerger {
+
+  @Override
+  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+    BinaryColumnStatsData aggregateData = aggregateColStats.getStatsData().getBinaryStats();
+    BinaryColumnStatsData newData = newColStats.getStatsData().getBinaryStats();
+    aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
+    aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
+    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
new file mode 100644
index 0000000..33ff6a1
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/BooleanColumnStatsMerger.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+
+public class BooleanColumnStatsMerger extends ColumnStatsMerger {
+
+  @Override
+  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+    BooleanColumnStatsData aggregateData = aggregateColStats.getStatsData().getBooleanStats();
+    BooleanColumnStatsData newData = newColStats.getStatsData().getBooleanStats();
+    aggregateData.setNumTrues(aggregateData.getNumTrues() + newData.getNumTrues());
+    aggregateData.setNumFalses(aggregateData.getNumFalses() + newData.getNumFalses());
+    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
new file mode 100644
index 0000000..33c7e3e
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public abstract class ColumnStatsMerger {
+  protected final Logger LOG = LoggerFactory.getLogger(ColumnStatsMerger.class.getName());
+
+  NumDistinctValueEstimator ndvEstimator = null;
+
+  public abstract void merge(ColumnStatisticsObj aggregateColStats,
+      ColumnStatisticsObj newColStats);
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
new file mode 100644
index 0000000..da6cd46
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData._Fields;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+
+public class ColumnStatsMergerFactory {
+
+  private ColumnStatsMergerFactory() {
+  }
+  
+  // we depend on the toString() method for javolution.util.FastCollection.
+  private static int countNumBitVectors(String s) {
+    if (s != null) {
+      return StringUtils.countMatches(s, "{");
+    } else {
+      return 0;
+    }
+  }
+
+  public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsObjNew,
+      ColumnStatisticsObj statsObjOld) {
+    ColumnStatsMerger agg;
+    _Fields typeNew = statsObjNew.getStatsData().getSetField();
+    _Fields typeOld = statsObjOld.getStatsData().getSetField();
+    // make sure that they have the same type
+    typeNew = typeNew == typeOld ? typeNew : null;
+    int numBitVectors = 0;
+    switch (typeNew) {
+    case BOOLEAN_STATS:
+      agg = new BooleanColumnStatsMerger();
+      break;
+    case LONG_STATS: {
+      agg = new LongColumnStatsMerger();
+      int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getLongStats().getBitVectors());
+      int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getLongStats().getBitVectors());
+      numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+      break;
+    }
+    case DOUBLE_STATS: {
+      agg = new DoubleColumnStatsMerger();
+      int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDoubleStats().getBitVectors());
+      int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDoubleStats().getBitVectors());
+      numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+      break;
+    }
+    case STRING_STATS: {
+      agg = new StringColumnStatsMerger();
+      int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getStringStats().getBitVectors());
+      int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getStringStats().getBitVectors());
+      numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+      break;
+    }
+    case BINARY_STATS:
+      agg = new BinaryColumnStatsMerger();
+      break;
+    case DECIMAL_STATS: {
+      agg = new DecimalColumnStatsMerger();
+      int nbvNew = countNumBitVectors(statsObjNew.getStatsData().getDecimalStats().getBitVectors());
+      int nbvOld = countNumBitVectors(statsObjOld.getStatsData().getDecimalStats().getBitVectors());
+      numBitVectors = nbvNew == nbvOld ? nbvNew : 0;
+      break;
+    }
+    default:
+      throw new RuntimeException("Woh, bad.  Unknown stats type " + typeNew.toString());
+    }
+    if (numBitVectors > 0) {
+      agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors);
+    }
+    return agg;
+  }
+
+  public static ColumnStatisticsObj newColumnStaticsObj(String colName, String colType, _Fields type) {
+    ColumnStatisticsObj cso = new ColumnStatisticsObj();
+    ColumnStatisticsData csd = new ColumnStatisticsData();
+    cso.setColName(colName);
+    cso.setColType(colType);
+    switch (type) {
+    case BOOLEAN_STATS:
+      csd.setBooleanStats(new BooleanColumnStatsData());
+      break;
+
+    case LONG_STATS:
+      csd.setLongStats(new LongColumnStatsData());
+      break;
+
+    case DOUBLE_STATS:
+      csd.setDoubleStats(new DoubleColumnStatsData());
+      break;
+
+    case STRING_STATS:
+      csd.setStringStats(new StringColumnStatsData());
+      break;
+
+    case BINARY_STATS:
+      csd.setBinaryStats(new BinaryColumnStatsData());
+      break;
+
+    case DECIMAL_STATS:
+      csd.setDecimalStats(new DecimalColumnStatsData());
+      break;
+
+    default:
+      throw new RuntimeException("Woh, bad.  Unknown stats type!");
+    }
+
+    cso.setStatsData(csd);
+    return cso;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
new file mode 100644
index 0000000..c13add9
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+
+public class DecimalColumnStatsMerger extends ColumnStatsMerger {
+  @Override
+  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+    DecimalColumnStatsData aggregateData = aggregateColStats.getStatsData().getDecimalStats();
+    DecimalColumnStatsData newData = newColStats.getStatsData().getDecimalStats();
+    Decimal lowValue = aggregateData.getLowValue() != null
+        && (aggregateData.getLowValue().compareTo(newData.getLowValue()) > 0) ? aggregateData
+        .getLowValue() : newData.getLowValue();
+    aggregateData.setLowValue(lowValue);
+    Decimal highValue = aggregateData.getHighValue() != null
+        && (aggregateData.getHighValue().compareTo(newData.getHighValue()) > 0) ? aggregateData
+        .getHighValue() : newData.getHighValue();
+    aggregateData.setHighValue(highValue);
+    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      long ndv = ndvEstimator.estimateNumDistinctValues();
+      LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+          + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+      aggregateData.setNumDVs(ndv);
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
new file mode 100644
index 0000000..fbdba24
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+
+public class DoubleColumnStatsMerger extends ColumnStatsMerger {
+  @Override
+  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+    DoubleColumnStatsData aggregateData = aggregateColStats.getStatsData().getDoubleStats();
+    DoubleColumnStatsData newData = newColStats.getStatsData().getDoubleStats();
+    aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
+    aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
+    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      long ndv = ndvEstimator.estimateNumDistinctValues();
+      LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+          + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+      aggregateData.setNumDVs(ndv);
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
new file mode 100644
index 0000000..ac65590
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+
+public class LongColumnStatsMerger extends ColumnStatsMerger {
+  @Override
+  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+    LongColumnStatsData aggregateData = aggregateColStats.getStatsData().getLongStats();
+    LongColumnStatsData newData = newColStats.getStatsData().getLongStats();
+    aggregateData.setLowValue(Math.min(aggregateData.getLowValue(), newData.getLowValue()));
+    aggregateData.setHighValue(Math.max(aggregateData.getHighValue(), newData.getHighValue()));
+    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      long ndv = ndvEstimator.estimateNumDistinctValues();
+      LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+          + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+      aggregateData.setNumDVs(ndv);
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
new file mode 100644
index 0000000..4158747
--- /dev/null
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hive.metastore.hbase.stats.merge;
+
+import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.parquet.Log;
+
+public class StringColumnStatsMerger extends ColumnStatsMerger {
+  @Override
+  public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) {
+    StringColumnStatsData aggregateData = aggregateColStats.getStatsData().getStringStats();
+    StringColumnStatsData newData = newColStats.getStatsData().getStringStats();
+    aggregateData.setMaxColLen(Math.max(aggregateData.getMaxColLen(), newData.getMaxColLen()));
+    aggregateData.setAvgColLen(Math.max(aggregateData.getAvgColLen(), newData.getAvgColLen()));
+    aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls());
+    if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) {
+      aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs()));
+    } else {
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(),
+          ndvEstimator.getnumBitVectors()));
+      long ndv = ndvEstimator.estimateNumDistinctValues();
+      LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of "
+          + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv);
+      aggregateData.setNumDVs(ndv);
+      aggregateData.setBitVectors(ndvEstimator.serialize().toString());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index 05dfa3b..0f0df11 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -392,22 +392,16 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     return statsDesc;
   }
 
-  private int persistPartitionStats(Hive db) throws HiveException, MetaException, IOException {
-
-    // Fetch result of the analyze table partition (p1=c1).. compute statistics for columns ..
+  private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException {
     // Construct a column statistics object from the result
     List<ColumnStatistics> colStats = constructColumnStatsFromPackedRows(db);
     // Persist the column statistics object to the metastore
-    db.setPartitionColumnStatistics(new SetPartitionsStatsRequest(colStats));
-    return 0;
-  }
-
-  private int persistTableStats(Hive db) throws HiveException, MetaException, IOException {
-    // Fetch result of the analyze table .. compute statistics for columns ..
-    // Construct a column statistics object from the result
-    ColumnStatistics colStats = constructColumnStatsFromPackedRows(db).get(0);
-    // Persist the column statistics object to the metastore
-    db.updateTableColumnStatistics(colStats);
+    // Note, this function is shared for both table and partition column stats.
+    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
+    if (work.getColStats() != null && work.getColStats().getNumBitVector() > 0) {
+      request.setNeedMerge(true);
+    }
+    db.setPartitionColumnStatistics(request);
     return 0;
   }
 
@@ -415,11 +409,7 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
   public int execute(DriverContext driverContext) {
     try {
       Hive db = getHive();
-      if (work.getColStats().isTblLevel()) {
-        return persistTableStats(db);
-      } else {
-        return persistPartitionStats(db);
-      }
+      return persistColumnStats(db);
     } catch (Exception e) {
       LOG.error("Failed to run column stats task", e);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
index 9a6e5c9..d6852dc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.exec;
 import java.io.IOException;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
 import org.apache.hadoop.hive.ql.CompilationOpContext;
 import org.apache.hadoop.hive.ql.DriverContext;
@@ -91,9 +93,9 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
     statsObj.setColName(colName.get(0));
 
     statsObj.setColType(colType.get(0));
-
+    
     ColumnStatisticsData statsData = new ColumnStatisticsData();
-
+    
     String columnType = colType.get(0);
 
     if (columnType.equalsIgnoreCase("long")) {
@@ -287,21 +289,11 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
     return statsDesc;
   }
 
-  private int persistTableStats(Hive db) throws HiveException, MetaException,
-      IOException {
-    // Construct a column statistics object from user input
-    ColumnStatistics colStats = constructColumnStatsFromInput();
-    // Persist the column statistics object to the metastore
-    db.updateTableColumnStatistics(colStats);
-    return 0;
-  }
-
-  private int persistPartitionStats(Hive db) throws HiveException, MetaException,
-      IOException {
-    // Construct a column statistics object from user input
-    ColumnStatistics colStats = constructColumnStatsFromInput();
-    // Persist the column statistics object to the metastore
-    db.updatePartitionColumnStatistics(colStats);
+  private int persistColumnStats(Hive db) throws HiveException, MetaException, IOException {
+    List<ColumnStatistics> colStats = new ArrayList<>();
+    colStats.add(constructColumnStatsFromInput());
+    SetPartitionsStatsRequest request = new SetPartitionsStatsRequest(colStats);
+    db.setPartitionColumnStatistics(request);
     return 0;
   }
 
@@ -309,11 +301,7 @@ public class ColumnStatsUpdateTask extends Task<ColumnStatsUpdateWork> {
   public int execute(DriverContext driverContext) {
     try {
       Hive db = getHive();
-      if (work.getColStats().isTblLevel()) {
-        return persistTableStats(db);
-      } else {
-        return persistPartitionStats(db);
-      }
+      return persistColumnStats(db);
     } catch (Exception e) {
       LOG.info("Failed to persist stats in metastore", e);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 26f61c5..6af48ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -3312,24 +3312,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
     return indexes;
   }
 
-  public boolean updateTableColumnStatistics(ColumnStatistics statsObj) throws HiveException {
-    try {
-      return getMSC().updateTableColumnStatistics(statsObj);
-    } catch (Exception e) {
-      LOG.debug(StringUtils.stringifyException(e));
-      throw new HiveException(e);
-    }
-  }
-
-  public boolean updatePartitionColumnStatistics(ColumnStatistics statsObj) throws HiveException {
-    try {
-      return getMSC().updatePartitionColumnStatistics(statsObj);
-    } catch (Exception e) {
-      LOG.debug(StringUtils.stringifyException(e));
-      throw new HiveException(e);
-    }
-  }
-
   public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request) throws HiveException {
     try {
       return getMSC().setPartitionColumnStatistics(request);

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
index db2b674..8eb011e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/SessionHiveMetaStoreClient.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -50,6 +51,7 @@ import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
+import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.TableMeta;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
@@ -325,15 +327,19 @@ public class SessionHiveMetaStoreClient extends HiveMetaStoreClient implements I
 
   /** {@inheritDoc} */
   @Override
-  public boolean updateTableColumnStatistics(ColumnStatistics statsObj)
+  public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request)
       throws NoSuchObjectException, InvalidObjectException, MetaException, TException,
       InvalidInputException {
-    String dbName = statsObj.getStatsDesc().getDbName().toLowerCase();
-    String tableName = statsObj.getStatsDesc().getTableName().toLowerCase();
-    if (getTempTable(dbName, tableName) != null) {
-      return updateTempTableColumnStats(dbName, tableName, statsObj);
+    if (request.getColStatsSize() == 1) {
+      ColumnStatistics colStats = request.getColStatsIterator().next();
+      ColumnStatisticsDesc desc = colStats.getStatsDesc();
+      String dbName = desc.getDbName().toLowerCase();
+      String tableName = desc.getTableName().toLowerCase();
+      if (getTempTable(dbName, tableName) != null) {
+        return updateTempTableColumnStats(dbName, tableName, colStats);
+      }
     }
-    return super.updateTableColumnStatistics(statsObj);
+    return super.setPartitionColumnStatistics(request);
   }
 
   /** {@inheritDoc} */

http://git-wip-us.apache.org/repos/asf/hive/blob/b9e4fe85/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
index c000db2..97f323f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsDesc.java
@@ -29,6 +29,7 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable
 
   private static final long serialVersionUID = 1L;
   private boolean isTblLevel;
+  private int numBitVector;
   private String tableName;
   private List<String> colName;
   private List<String> colType;
@@ -36,12 +37,22 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable
   public ColumnStatsDesc() {
   }
 
+  public ColumnStatsDesc(String tableName, List<String> colName, List<String> colType,
+      boolean isTblLevel) {
+    this.tableName = tableName;
+    this.colName = colName;
+    this.colType = colType;
+    this.isTblLevel = isTblLevel;
+    this.numBitVector = 0;
+  }
+  
   public ColumnStatsDesc(String tableName, List<String> colName,
-    List<String> colType, boolean isTblLevel) {
+    List<String> colType, boolean isTblLevel, int numBitVector) {
     this.tableName = tableName;
     this.colName = colName;
     this.colType = colType;
     this.isTblLevel = isTblLevel;
+    this.numBitVector = numBitVector;
   }
 
   @Explain(displayName = "Table")
@@ -79,4 +90,13 @@ public class ColumnStatsDesc extends DDLDesc implements Serializable, Cloneable
   public void setColType(List<String> colType) {
     this.colType = colType;
   }
+
+  public int getNumBitVector() {
+    return numBitVector;
+  }
+
+  public void setNumBitVector(int numBitVector) {
+    this.numBitVector = numBitVector;
+  }
+
 }


Mime
View raw message