hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pxi...@apache.org
Subject [1/4] hive git commit: HIVE-12763: Use bit vector to track NDV (Pengcheng Xiong, reviewed by Laljo John Pullokkaran and Alan Gates)
Date Fri, 29 Jan 2016 05:25:54 GMT
Repository: hive
Updated Branches:
  refs/heads/master 0c7f2d66b -> 7b2f6703f


http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
----------------------------------------------------------------------
diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
new file mode 100644
index 0000000..b0d7662
--- /dev/null
+++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseStoreBitVector.java
@@ -0,0 +1,634 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hive.metastore.hbase;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.client.HTableInterface;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
+import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.Decimal;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Function;
+import org.apache.hadoop.hive.metastore.api.FunctionType;
+import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
+import org.apache.hadoop.hive.metastore.api.Order;
+import org.apache.hadoop.hive.metastore.api.Partition;
+import org.apache.hadoop.hive.metastore.api.PrincipalType;
+import org.apache.hadoop.hive.metastore.api.ResourceType;
+import org.apache.hadoop.hive.metastore.api.ResourceUri;
+import org.apache.hadoop.hive.metastore.api.Role;
+import org.apache.hadoop.hive.metastore.api.SerDeInfo;
+import org.apache.hadoop.hive.metastore.api.SkewedInfo;
+import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+import java.io.IOException;
+import java.security.MessageDigest;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+/**
+ *
+ */
+public class TestHBaseStoreBitVector {
+  private static final Logger LOG = LoggerFactory.getLogger(TestHBaseStoreBitVector.class.getName());
+  static Map<String, String> emptyParameters = new HashMap<String, String>();
+  // Table with NUM_PART_KEYS partitioning keys and NUM_PARTITIONS values per key
+  static final int NUM_PART_KEYS = 1;
+  static final int NUM_PARTITIONS = 5;
+  static final String DB = "db";
+  static final String TBL = "tbl";
+  static final String COL = "col";
+  static final String PART_KEY_PREFIX = "part";
+  static final String PART_VAL_PREFIX = "val";
+  static final String PART_KV_SEPARATOR = "=";
+  static final List<String> PART_KEYS = new ArrayList<String>();
+  static final List<String> PART_VALS = new ArrayList<String>();
+  // Initialize mock partitions
+  static {
+    for (int i = 1; i <= NUM_PART_KEYS; i++) {
+      PART_KEYS.add(PART_KEY_PREFIX + i);
+    }
+    for (int i = 1; i <= NUM_PARTITIONS; i++) {
+      PART_VALS.add(PART_VAL_PREFIX + i);
+    }
+  }
+  static final long DEFAULT_TIME = System.currentTimeMillis();
+  static final String PART_KEY = "part";
+  static final String LONG_COL = "longCol";
+  static final String LONG_TYPE = "long";
+  static final String INT_TYPE = "int";
+  static final String INT_VAL = "1234";
+  static final String DOUBLE_COL = "doubleCol";
+  static final String DOUBLE_TYPE = "double";
+  static final String DOUBLE_VAL = "3.1415";
+  static final String STRING_COL = "stringCol";
+  static final String STRING_TYPE = "string";
+  static final String STRING_VAL = "stringval";
+  static final String DECIMAL_COL = "decimalCol";
+  static final String DECIMAL_TYPE = "decimal(5,3)";
+  static final String DECIMAL_VAL = "12.123";
+  static List<ColumnStatisticsObj> longColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+  static List<ColumnStatisticsObj> doubleColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+  static List<ColumnStatisticsObj> stringColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+  static List<ColumnStatisticsObj> decimalColStatsObjs = new ArrayList<ColumnStatisticsObj>(
+      NUM_PARTITIONS);
+
+  @Rule public ExpectedException thrown = ExpectedException.none();
+  @Mock HTableInterface htable;
+  SortedMap<String, Cell> rows = new TreeMap<>();
+  HBaseStore store;
+
+
+  @BeforeClass
+  public static void beforeTest() {
+    // All data intitializations
+    populateMockStats();
+  }
+
+  private static void populateMockStats() {
+    ColumnStatisticsObj statsObj;
+    // Add NUM_PARTITIONS ColumnStatisticsObj of each type
+    // For aggregate stats test, we'll treat each ColumnStatisticsObj as stats for 1 partition
+    // For the rest, we'll just pick the 1st ColumnStatisticsObj from this list and use it
+    for (int i = 0; i < NUM_PARTITIONS; i++) {
+      statsObj = mockLongStats(i);
+      longColStatsObjs.add(statsObj);
+      statsObj = mockDoubleStats(i);
+      doubleColStatsObjs.add(statsObj);
+      statsObj = mockStringStats(i);
+      stringColStatsObjs.add(statsObj);
+      statsObj = mockDecimalStats(i);
+      decimalColStatsObjs.add(statsObj);
+    }
+  }
+
+  private static ColumnStatisticsObj mockLongStats(int i) {
+    long high = 120938479124L + 100*i;
+    long low = -12341243213412124L - 50*i;
+    long nulls = 23 + i;
+    long dVs = 213L + 10*i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{1, 2, 3, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(LONG_COL);
+    colStatsObj.setColType(LONG_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    LongColumnStatsData longData = new LongColumnStatsData();
+    longData.setHighValue(high);
+    longData.setLowValue(low);
+    longData.setNumNulls(nulls);
+    longData.setNumDVs(dVs);
+    longData.setBitVectors(bitVectors);
+    data.setLongStats(longData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  private static ColumnStatisticsObj mockDoubleStats(int i) {
+    double high = 123423.23423 + 100*i;
+    double low = 0.00001234233 - 50*i;
+    long nulls = 92 + i;
+    long dVs = 1234123421L + 10*i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 2, 3, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(DOUBLE_COL);
+    colStatsObj.setColType(DOUBLE_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    DoubleColumnStatsData doubleData = new DoubleColumnStatsData();
+    doubleData.setHighValue(high);
+    doubleData.setLowValue(low);
+    doubleData.setNumNulls(nulls);
+    doubleData.setNumDVs(dVs);
+    doubleData.setBitVectors(bitVectors);
+    data.setDoubleStats(doubleData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  private static ColumnStatisticsObj mockStringStats(int i) {
+    long maxLen = 1234 + 10*i;
+    double avgLen = 32.3 + i;
+    long nulls = 987 + 10*i;
+    long dVs = 906 + i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 3, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(STRING_COL);
+    colStatsObj.setColType(STRING_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    StringColumnStatsData stringData = new StringColumnStatsData();
+    stringData.setMaxColLen(maxLen);
+    stringData.setAvgColLen(avgLen);
+    stringData.setNumNulls(nulls);
+    stringData.setNumDVs(dVs);
+    stringData.setBitVectors(bitVectors);
+    data.setStringStats(stringData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  private static ColumnStatisticsObj mockDecimalStats(int i) {
+    Decimal high = new Decimal();
+    high.setScale((short)3);
+    String strHigh = String.valueOf(3876 + 100*i);
+    high.setUnscaled(strHigh.getBytes());
+    Decimal low = new Decimal();
+    low.setScale((short)3);
+    String strLow = String.valueOf(38 + i);
+    low.setUnscaled(strLow.getBytes());
+    long nulls = 13 + i;
+    long dVs = 923947293L + 100*i;
+    String bitVectors = "{0, 1, 2, 3, 4, 5, 6, 7, 8}{0, 1, 2, 4, 5, 6, 7, 8}";
+    ColumnStatisticsObj colStatsObj = new ColumnStatisticsObj();
+    colStatsObj.setColName(DECIMAL_COL);
+    colStatsObj.setColType(DECIMAL_TYPE);
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    DecimalColumnStatsData decimalData = new DecimalColumnStatsData();
+    decimalData.setHighValue(high);
+    decimalData.setLowValue(low);
+    decimalData.setNumNulls(nulls);
+    decimalData.setNumDVs(dVs);
+    decimalData.setBitVectors(bitVectors);
+    data.setDecimalStats(decimalData);
+    colStatsObj.setStatsData(data);
+    return colStatsObj;
+  }
+
+  @AfterClass
+  public static void afterTest() {
+  }
+
+
+  @Before
+  public void init() throws IOException {
+    MockitoAnnotations.initMocks(this);
+    HiveConf conf = new HiveConf();
+    conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true);
+    store = MockUtils.init(conf, htable, rows);
+  }
+
+  @Test
+  public void longTableStatistics() throws Exception {
+    createMockTable(LONG_COL, LONG_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = longColStatsObjs.get(0);
+    LongColumnStatsData longData = obj.getStatsData().getLongStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(LONG_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
+    // Compare LongColumnStatsData
+    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
+    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
+    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
+    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
+    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
+    Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void doubleTableStatistics() throws Exception {
+    createMockTable(DOUBLE_COL, DOUBLE_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
+    DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DOUBLE_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
+    // Compare DoubleColumnStatsData
+    DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
+    Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
+    Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
+    Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
+    Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
+    Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void stringTableStatistics() throws Exception {
+    createMockTable(STRING_COL, STRING_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
+    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(STRING_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
+    // Compare StringColumnStatsData
+    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
+    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
+    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
+    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
+    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
+    Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void decimalTableStatistics() throws Exception {
+    createMockTable(DECIMAL_COL, DECIMAL_TYPE);
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for table level stats
+    ColumnStatisticsDesc desc = getMockTblColStatsDesc();
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
+    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    store.updateTableColumnStatistics(stats);
+    // Get from DB
+    ColumnStatistics statsFromDB = store.getTableColumnStatistics(DB, TBL, Arrays.asList(DECIMAL_COL));
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.getStatsDesc().getTableName());
+    Assert.assertTrue(statsFromDB.getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
+    // Compare DecimalColumnStatsData
+    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
+    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
+    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
+    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
+    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
+    Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void longPartitionStatistics() throws Exception {
+    createMockTableAndPartition(INT_TYPE, INT_VAL);
+    // Add partition stats for: LONG_COL and partition: {PART_KEY, INT_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, INT_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = longColStatsObjs.get(0);
+    LongColumnStatsData longData = obj.getStatsData().getLongStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(INT_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.LONG_STATS, dataFromDB.getSetField());
+    // Compare LongColumnStatsData
+    LongColumnStatsData longDataFromDB = dataFromDB.getLongStats();
+    Assert.assertEquals(longData.getHighValue(), longDataFromDB.getHighValue());
+    Assert.assertEquals(longData.getLowValue(), longDataFromDB.getLowValue());
+    Assert.assertEquals(longData.getNumNulls(), longDataFromDB.getNumNulls());
+    Assert.assertEquals(longData.getNumDVs(), longDataFromDB.getNumDVs());
+    Assert.assertEquals(longData.getBitVectors(), longDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void doublePartitionStatistics() throws Exception {
+    createMockTableAndPartition(DOUBLE_TYPE, DOUBLE_VAL);
+    // Add partition stats for: DOUBLE_COL and partition: {PART_KEY, DOUBLE_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DOUBLE_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = doubleColStatsObjs.get(0);
+    DoubleColumnStatsData doubleData = obj.getStatsData().getDoubleStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(DOUBLE_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DOUBLE_STATS, dataFromDB.getSetField());
+    // Compare DoubleColumnStatsData
+    DoubleColumnStatsData doubleDataFromDB = dataFromDB.getDoubleStats();
+    Assert.assertEquals(doubleData.getHighValue(), doubleDataFromDB.getHighValue(), 0.01);
+    Assert.assertEquals(doubleData.getLowValue(), doubleDataFromDB.getLowValue(), 0.01);
+    Assert.assertEquals(doubleData.getNumNulls(), doubleDataFromDB.getNumNulls());
+    Assert.assertEquals(doubleData.getNumDVs(), doubleDataFromDB.getNumDVs());
+    Assert.assertEquals(doubleData.getBitVectors(), doubleDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void stringPartitionStatistics() throws Exception {
+    createMockTableAndPartition(STRING_TYPE, STRING_VAL);
+    // Add partition stats for: STRING_COL and partition: {PART_KEY, STRING_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, STRING_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = stringColStatsObjs.get(0);
+    StringColumnStatsData stringData = obj.getStatsData().getStringStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(STRING_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.STRING_STATS, dataFromDB.getSetField());
+    // Compare StringColumnStatsData
+    StringColumnStatsData stringDataFromDB = dataFromDB.getStringStats();
+    Assert.assertEquals(stringData.getMaxColLen(), stringDataFromDB.getMaxColLen());
+    Assert.assertEquals(stringData.getAvgColLen(), stringDataFromDB.getAvgColLen(), 0.01);
+    Assert.assertEquals(stringData.getNumNulls(), stringDataFromDB.getNumNulls());
+    Assert.assertEquals(stringData.getNumDVs(), stringDataFromDB.getNumDVs());
+    Assert.assertEquals(stringData.getBitVectors(), stringDataFromDB.getBitVectors());
+  }
+
+  @Test
+  public void decimalPartitionStatistics() throws Exception {
+    createMockTableAndPartition(DECIMAL_TYPE, DECIMAL_VAL);
+    // Add partition stats for: DECIMAL_COL and partition: {PART_KEY, DECIMAL_VAL} to DB
+    // Because of the way our mock implementation works we actually need to not create the table
+    // before we set statistics on it.
+    ColumnStatistics stats = new ColumnStatistics();
+    // Get a default ColumnStatisticsDesc for partition level stats
+    ColumnStatisticsDesc desc = getMockPartColStatsDesc(PART_KEY, DECIMAL_VAL);
+    stats.setStatsDesc(desc);
+    // Get one of the pre-created ColumnStatisticsObj
+    ColumnStatisticsObj obj = decimalColStatsObjs.get(0);
+    DecimalColumnStatsData decimalData = obj.getStatsData().getDecimalStats();
+    // Add to DB
+    stats.addToStatsObj(obj);
+    List<String> parVals = new ArrayList<String>();
+    parVals.add(DECIMAL_VAL);
+    store.updatePartitionColumnStatistics(stats, parVals);
+    // Get from DB
+    List<String> partNames = new ArrayList<String>();
+    partNames.add(desc.getPartName());
+    List<String> colNames = new ArrayList<String>();
+    colNames.add(obj.getColName());
+    List<ColumnStatistics> statsFromDB = store.getPartitionColumnStatistics(DB, TBL, partNames, colNames);
+    // Compare ColumnStatisticsDesc
+    Assert.assertEquals(1, statsFromDB.size());
+    Assert.assertEquals(desc.getLastAnalyzed(), statsFromDB.get(0).getStatsDesc().getLastAnalyzed());
+    Assert.assertEquals(DB, statsFromDB.get(0).getStatsDesc().getDbName());
+    Assert.assertEquals(TBL, statsFromDB.get(0).getStatsDesc().getTableName());
+    Assert.assertFalse(statsFromDB.get(0).getStatsDesc().isIsTblLevel());
+    // Compare ColumnStatisticsObj
+    Assert.assertEquals(1, statsFromDB.get(0).getStatsObjSize());
+    ColumnStatisticsObj objFromDB = statsFromDB.get(0).getStatsObj().get(0);
+    ColumnStatisticsData dataFromDB = objFromDB.getStatsData();
+    // Compare ColumnStatisticsData
+    Assert.assertEquals(ColumnStatisticsData._Fields.DECIMAL_STATS, dataFromDB.getSetField());
+    // Compare DecimalColumnStatsData
+    DecimalColumnStatsData decimalDataFromDB = dataFromDB.getDecimalStats();
+    Assert.assertEquals(decimalData.getHighValue(), decimalDataFromDB.getHighValue());
+    Assert.assertEquals(decimalData.getLowValue(), decimalDataFromDB.getLowValue());
+    Assert.assertEquals(decimalData.getNumNulls(), decimalDataFromDB.getNumNulls());
+    Assert.assertEquals(decimalData.getNumDVs(), decimalDataFromDB.getNumDVs());
+    Assert.assertEquals(decimalData.getBitVectors(), decimalDataFromDB.getBitVectors());
+  }
+
+  private Table createMockTable(String name, String type) throws Exception {
+    List<FieldSchema> cols = new ArrayList<FieldSchema>();
+    cols.add(new FieldSchema(name, type, ""));
+    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+    Map<String, String> params = new HashMap<String, String>();
+    params.put("key", "value");
+    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17,
+        serde, new ArrayList<String>(), new ArrayList<Order>(), params);
+    int currentTime = (int)(System.currentTimeMillis() / 1000);
+    Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols,
+        emptyParameters, null, null, null);
+    store.createTable(table);
+    return table;
+  }
+
+  private Table createMockTableAndPartition(String partType, String partVal) throws Exception {
+    List<FieldSchema> cols = new ArrayList<FieldSchema>();
+    cols.add(new FieldSchema("col1", partType, ""));
+    List<String> vals = new ArrayList<String>();
+    vals.add(partVal);
+    SerDeInfo serde = new SerDeInfo("serde", "seriallib", null);
+    Map<String, String> params = new HashMap<String, String>();
+    params.put("key", "value");
+    StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 17,
+        serde, Arrays.asList("bucketcol"), Arrays.asList(new Order("sortcol", 1)), params);
+    int currentTime = (int)(System.currentTimeMillis() / 1000);
+    Table table = new Table(TBL, DB, "me", currentTime, currentTime, 0, sd, cols,
+        emptyParameters, null, null, null);
+    store.createTable(table);
+    Partition part = new Partition(vals, DB, TBL, currentTime, currentTime, sd,
+        emptyParameters);
+    store.addPartition(part);
+    return table;
+  }
+  /**
+   * Returns a dummy table level ColumnStatisticsDesc with default values
+   */
+  private ColumnStatisticsDesc getMockTblColStatsDesc() {
+    ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
+    desc.setLastAnalyzed(DEFAULT_TIME);
+    desc.setDbName(DB);
+    desc.setTableName(TBL);
+    desc.setIsTblLevel(true);
+    return desc;
+  }
+
+  /**
+   * Returns a dummy partition level ColumnStatisticsDesc
+   */
+  private ColumnStatisticsDesc getMockPartColStatsDesc(String partKey, String partVal) {
+    ColumnStatisticsDesc desc = new ColumnStatisticsDesc();
+    desc.setLastAnalyzed(DEFAULT_TIME);
+    desc.setDbName(DB);
+    desc.setTableName(TBL);
+    // part1=val1
+    desc.setPartName(partKey + PART_KV_SEPARATOR + partVal);
+    desc.setIsTblLevel(false);
+    return desc;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/pom.xml
----------------------------------------------------------------------
diff --git a/ql/pom.xml b/ql/pom.xml
index 358cd2a..f19a225 100644
--- a/ql/pom.xml
+++ b/ql/pom.xml
@@ -397,11 +397,6 @@
       <version>${guava.version}</version>
     </dependency>
     <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <version>${protobuf.version}</version>
-    </dependency>
-    <dependency>
       <groupId>com.googlecode.javaewah</groupId>
       <artifactId>JavaEWAH</artifactId>
       <version>${javaewah.version}</version>

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
index 7914471..f9a9fd2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java
@@ -121,6 +121,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       double d = ((DoubleObjectInspector) oi).get(o);
       statsObj.getStatsData().getDoubleStats().setLowValue(d);
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDoubleStats().setBitVectors(v);;
     }
   }
 
@@ -138,6 +142,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       HiveDecimal d = ((HiveDecimalObjectInspector) oi).getPrimitiveJavaObject(o);
       statsObj.getStatsData().getDecimalStats().setLowValue(convertToThriftDecimal(d));
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDecimalStats().setBitVectors(v);;
     }
   }
 
@@ -159,6 +167,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       long  v = ((LongObjectInspector) oi).get(o);
       statsObj.getStatsData().getLongStats().setLowValue(v);
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getLongStats().setBitVectors(v);;
     }
   }
 
@@ -176,6 +188,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("maxlength")) {
       long v = ((LongObjectInspector) oi).get(o);
       statsObj.getStatsData().getStringStats().setMaxColLen(v);
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getStringStats().setBitVectors(v);;
     }
   }
 
@@ -207,6 +223,10 @@ public class ColumnStatsTask extends Task<ColumnStatsWork> implements Serializab
     } else if (fName.equals("min")) {
       DateWritable v = ((DateObjectInspector) oi).getPrimitiveWritableObject(o);
       statsObj.getStatsData().getDateStats().setLowValue(new Date(v.getDays()));
+    } else if (fName.equals("ndvbitvector")) {
+      PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
+      String v = ((StringObjectInspector) poi).getPrimitiveJavaObject(o);
+      statsObj.getStatsData().getDateStats().setBitVectors(v);;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
index 1f30cbd..bb1bbad 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
@@ -25,6 +25,8 @@ import java.util.Map;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.common.HiveStatsUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.conf.HiveVariableSource;
@@ -201,60 +203,6 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
     throw new SemanticException ("Unknown partition key : " + partKey);
   }
 
-  private int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException {
-    int numBitVectors;
-    float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
-
-    if (percentageError < 0.0) {
-      throw new SemanticException("hive.stats.ndv.error can't be negative");
-    } else if (percentageError <= 2.4) {
-      numBitVectors = 1024;
-      LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
-      LOG.info("Choosing 1024 bit vectors..");
-    } else if (percentageError <= 3.4 ) {
-      numBitVectors = 1024;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 1024 bit vectors..");
-    } else if (percentageError <= 4.8) {
-      numBitVectors = 512;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 512 bit vectors..");
-     } else if (percentageError <= 6.8) {
-      numBitVectors = 256;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 256 bit vectors..");
-    } else if (percentageError <= 9.7) {
-      numBitVectors = 128;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 128 bit vectors..");
-    } else if (percentageError <= 13.8) {
-      numBitVectors = 64;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 64 bit vectors..");
-    } else if (percentageError <= 19.6) {
-      numBitVectors = 32;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 32 bit vectors..");
-    } else if (percentageError <= 28.2) {
-      numBitVectors = 16;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 16 bit vectors..");
-    } else if (percentageError <= 40.9) {
-      numBitVectors = 8;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 8 bit vectors..");
-    } else if (percentageError <= 61.0) {
-      numBitVectors = 4;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 4 bit vectors..");
-    } else {
-      numBitVectors = 2;
-      LOG.info("Error requested is " + percentageError + "%");
-      LOG.info("Choosing 2 bit vectors..");
-    }
-    return numBitVectors;
-  }
-
   private List<String> getColumnTypes(List<String> colNames)
       throws SemanticException{
     List<String> colTypes = new LinkedList<String>();
@@ -396,7 +344,12 @@ public class ColumnStatsSemanticAnalyzer extends SemanticAnalyzer {
         isTableLevel = true;
       }
       colType = getColumnTypes(colNames);
-      int numBitVectors = getNumBitVectorsForNDVEstimation(conf);
+      int numBitVectors;
+      try {
+        numBitVectors = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
+      } catch (Exception e) {
+        throw new SemanticException(e.getMessage());
+      }
       rewrittenQuery = genRewrittenQuery(colNames, numBitVectors, partSpec, isPartitionStats);
       rewrittenTree = genRewrittenTree(rewrittenQuery);
     } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index b4cf58f..ea506fc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory;
 import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.ColStatistics.Range;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -1558,4 +1559,58 @@ public class StatsUtils {
       return Long.MAX_VALUE;
     }
   }
+
+  public static int getNumBitVectorsForNDVEstimation(HiveConf conf) throws SemanticException {
+    int numBitVectors;
+    float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR);
+
+    if (percentageError < 0.0) {
+      throw new SemanticException("hive.stats.ndv.error can't be negative");
+    } else if (percentageError <= 2.4) {
+      numBitVectors = 1024;
+      LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%");
+      LOG.info("Choosing 1024 bit vectors..");
+    } else if (percentageError <= 3.4 ) {
+      numBitVectors = 1024;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 1024 bit vectors..");
+    } else if (percentageError <= 4.8) {
+      numBitVectors = 512;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 512 bit vectors..");
+     } else if (percentageError <= 6.8) {
+      numBitVectors = 256;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 256 bit vectors..");
+    } else if (percentageError <= 9.7) {
+      numBitVectors = 128;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 128 bit vectors..");
+    } else if (percentageError <= 13.8) {
+      numBitVectors = 64;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 64 bit vectors..");
+    } else if (percentageError <= 19.6) {
+      numBitVectors = 32;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 32 bit vectors..");
+    } else if (percentageError <= 28.2) {
+      numBitVectors = 16;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 16 bit vectors..");
+    } else if (percentageError <= 40.9) {
+      numBitVectors = 8;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 8 bit vectors..");
+    } else if (percentageError <= 61.0) {
+      numBitVectors = 4;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 4 bit vectors..");
+    } else {
+      numBitVectors = 2;
+      LOG.info("Error requested is " + percentageError + "%");
+      LOG.info("Choosing 2 bit vectors..");
+    }
+    return numBitVectors;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
index 0e96f89..d6ca73f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
@@ -43,8 +43,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.util.StringUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * GenericUDAFComputeStats
@@ -401,6 +399,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         foi.add(getValueObjectInspector());
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
 
         List<String> fname = new ArrayList<String>();
         fname.add("columnType");
@@ -408,11 +407,13 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         fname.add("max");
         fname.add("countnulls");
         fname.add("numdistinctvalues");
+        fname.add("ndvbitvector");
 
-        result = new Object[5];
+        result = new Object[6];
         result[0] = new Text();
         result[3] = new LongWritable(0);
         result[4] = new LongWritable(0);
+        result[5] = new Text();
 
         return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
             foi);
@@ -448,6 +449,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         serializeCommon(result);
         long dv = numDV != null ? numDV.estimateNumDistinctValues() : 0;
         ((LongWritable) result[4]).set(dv);
+        if (numDV != null) {
+          ((Text) result[5]).set(numDV.serialize());
+        }
 
         return result;
       }
@@ -795,6 +799,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+        foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
 
         List<String> fname = new ArrayList<String>();
         fname.add("columntype");
@@ -802,13 +807,15 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         fname.add("avglength");
         fname.add("countnulls");
         fname.add("numdistinctvalues");
+        fname.add("ndvbitvector");
 
-        result = new Object[5];
+        result = new Object[6];
         result[0] = new Text();
         result[1] = new LongWritable(0);
         result[2] = new DoubleWritable(0);
         result[3] = new LongWritable(0);
         result[4] = new LongWritable(0);
+        result[5] = new Text();
 
         return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
             foi);
@@ -1003,7 +1010,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       ((DoubleWritable) result[2]).set(avgLength);
       ((LongWritable) result[3]).set(myagg.countNulls);
       ((LongWritable) result[4]).set(numDV);
-
+      if (myagg.numBitVectors != 0) {
+        ((Text) result[5]).set(myagg.numDV.serialize());
+      }
       return result;
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
index bfed116..ee1c2ae 100644
--- a/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/char_udf1.q.java1.7.out
@@ -422,7 +422,7 @@ from char_udf_1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@char_udf_1
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
 PREHOOK: query: select
   min(c2),
   min(c4)

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index b7c9075..2545c03 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -66,7 +66,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -186,7 +186,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -199,7 +199,7 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     columns _col0,_col1
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:double
+                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:double
                     escape.delim \
                     hive.serialization.extend.additional.nesting.levels true
                     serialization.escape.crlf true
@@ -264,7 +264,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -384,7 +384,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1
             File Output Operator
               compressed: false
@@ -397,7 +397,7 @@ STAGE PLANS:
                   output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
                     columns _col0,_col1
-                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:double
+                    columns.types struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:double
                     escape.delim \
                     hive.serialization.extend.additional.nesting.levels true
                     serialization.escape.crlf true
@@ -462,7 +462,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false
@@ -542,7 +542,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double)
+            expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 9685202..39f45ae 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -104,7 +104,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
             File Output Operator
               compressed: false
@@ -177,7 +177,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false
@@ -261,7 +261,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2
             File Output Operator
               compressed: false
@@ -342,7 +342,7 @@ STAGE PLANS:
           mode: mergepartial
           outputColumnNames: _col0, _col1, _col2, _col3
           Select Operator
-            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>), _col0 (type: double), _col1 (type: string)
+            expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
             File Output Operator
               compressed: false

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
index 0aadae3..4cd12c4 100644
--- a/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_tbllvl.q.out
@@ -187,7 +187,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true
@@ -588,7 +588,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_date.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_date.q.out b/ql/src/test/results/clientpositive/compute_stats_date.q.out
index b57a862..d9c47d0 100644
--- a/ql/src/test/results/clientpositive/compute_stats_date.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_date.q.out
@@ -47,7 +47,7 @@ select compute_stats(fl_date, 16) from tab_date
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_date
 #### A masked pattern was here ####
-{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18}
+{"columntype":"Date","min":"2000-11-20","max":"2010-10-29","countnulls":0,"numdistinctvalues":18,"ndvbitvector":"{0, 1, 2, 3, 4, 5}{0, 1, 2, 3}{0}{0, 1, 2, 6}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 2}{0, 1, 2, 3, 4}{0, 1, 2, 4, 5}{0, 1, 2, 3}{0, 1, 2, 3, 5}{0, 1, 2, 3, 4, 5}{0, 1, 2, 3, 4}"}
 PREHOOK: query: explain
 analyze table tab_date compute statistics for columns fl_date
 PREHOOK: type: QUERY

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
index 35abb37..c204ab6 100644
--- a/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_decimal.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 18) from tab_decimal
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_decimal
 #### A masked pattern was here ####
-{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13}
+{"columntype":"Decimal","min":-87.2,"max":435.331,"countnulls":2,"numdistinctvalues":13,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}{0, 1, 2, 3, 5}{0, 1, 3}{0, 1, 2, 4}{0, 1, 2, 3, 5}{0, 1, 2, 3}{0, 1, 2}{0, 1}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 6, 8}{0, 1, 2, 3}{0, 1, 2}{0, 1, 4, 5}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_double.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_double.q.out b/ql/src/test/results/clientpositive/compute_stats_double.q.out
index f6b4052..0a67ecd 100644
--- a/ql/src/test/results/clientpositive/compute_stats_double.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_double.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_double
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_double
 #### A masked pattern was here ####
-{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11}
+{"columntype":"Double","min":-87.2,"max":435.33,"countnulls":2,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3, 4}{0, 1, 2}{0, 1}{0, 1, 3, 4}{0, 1, 3}{0, 1, 2, 3, 8}{0, 1, 3}{0, 1, 2}{0, 1, 4}{0, 1, 2}{0, 1, 2, 3}{0, 1, 2, 3}{0, 1, 2, 3, 4}{0, 1, 2}{0, 1, 2, 3, 4}{0, 1, 3}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out b/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
index f76c760..a6cb9af 100644
--- a/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_empty_table.q.out
@@ -34,7 +34,7 @@ POSTHOOK: query: select compute_stats(b, 16) from tab_empty
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_empty
 #### A masked pattern was here ####
-{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"Long","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
 PREHOOK: query: select compute_stats(c, 16) from tab_empty
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tab_empty
@@ -43,7 +43,7 @@ POSTHOOK: query: select compute_stats(c, 16) from tab_empty
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_empty
 #### A masked pattern was here ####
-{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"Double","min":null,"max":null,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
 PREHOOK: query: select compute_stats(d, 16) from tab_empty
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tab_empty
@@ -52,7 +52,7 @@ POSTHOOK: query: select compute_stats(d, 16) from tab_empty
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_empty
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0}
+{"columntype":"String","maxlength":0,"avglength":0.0,"countnulls":0,"numdistinctvalues":0,"ndvbitvector":""}
 PREHOOK: query: select compute_stats(e, 16) from tab_empty
 PREHOOK: type: QUERY
 PREHOOK: Input: default@tab_empty

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_long.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_long.q.out b/ql/src/test/results/clientpositive/compute_stats_long.q.out
index 2c6171d..b6f2b10 100644
--- a/ql/src/test/results/clientpositive/compute_stats_long.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_long.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_int
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_int
 #### A masked pattern was here ####
-{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11}
+{"columntype":"Long","min":4,"max":344,"countnulls":1,"numdistinctvalues":11,"ndvbitvector":"{0, 1, 2, 3}{0, 2, 5}{0, 1, 2, 3, 4}{0, 1, 2, 4, 6, 7}{0, 1, 2, 4}{0, 1, 2, 4, 5}{0, 1, 2, 5}{0, 1, 2}{0, 1, 2, 3}{0, 1, 3, 4}{0, 1, 2, 5, 6}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 2, 3, 10}{0, 1, 2, 4}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/compute_stats_string.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/compute_stats_string.q.out b/ql/src/test/results/clientpositive/compute_stats_string.q.out
index bdf9d85..fbd0e6d 100644
--- a/ql/src/test/results/clientpositive/compute_stats_string.q.out
+++ b/ql/src/test/results/clientpositive/compute_stats_string.q.out
@@ -35,4 +35,4 @@ select compute_stats(a, 16) from tab_string
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_string
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7}
+{"columntype":"String","maxlength":11,"avglength":3.9,"countnulls":0,"numdistinctvalues":7,"ndvbitvector":"{0, 1, 2, 3}{0, 1}{0, 1, 3}{0, 2}{0, 1, 2, 3}{0, 1, 3}{0, 1, 2, 3}{0, 1, 3}{0, 1}{0, 1}{0, 1, 2, 4}{0, 1, 4}{0, 2, 4}{0, 1, 2, 3}{0, 1, 2}{0, 1, 2}"}

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
index 7fa3089..8f50a43 100644
--- a/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/display_colstats_tbllvl.q.out
@@ -203,7 +203,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
index ae39d18..b46f509 100644
--- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
+++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out
@@ -211,7 +211,7 @@ STAGE PLANS:
                 output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                 properties:
                   columns _col0,_col1,_col2
-                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint>
+                  columns.types struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>:struct<columntype:string,min:double,max:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>
                   escape.delim \
                   hive.serialization.extend.additional.nesting.levels true
                   serialization.escape.crlf true

http://git-wip-us.apache.org/repos/asf/hive/blob/7b2f6703/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
index 853bc4a..459d93b 100644
--- a/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
+++ b/ql/src/test/results/clientpositive/varchar_udf1.q.java1.7.out
@@ -416,7 +416,7 @@ from varchar_udf_1
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@varchar_udf_1
 #### A masked pattern was here ####
-{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1}
+{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}	{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"}
 PREHOOK: query: select
   min(c2),
   min(c4)


Mime
View raw message