hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pxi...@apache.org
Subject [20/24] hive git commit: HIVE-16996: Add HLL as an alternative to FM sketch to compute stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Prasanth Jayachandran)
Date Sat, 15 Jul 2017 08:06:59 GMT
http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
index 2ebfcb2..2d56950 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
@@ -22,6 +22,11 @@ import java.util.List;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hadoop.hive.common.ndv.FMSketch;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
@@ -53,13 +58,13 @@ import org.apache.hadoop.util.StringUtils;
 public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
 
   static final Logger LOG = LoggerFactory.getLogger(GenericUDAFComputeStats.class.getName());
-
+  
   @Override
   public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
       throws SemanticException {
-    if (parameters.length != 2 ) {
+    if (parameters.length < 2 ) {
       throw new UDFArgumentTypeException(parameters.length - 1,
-          "Exactly two arguments are expected.");
+          "Exactly 2 (col + hll) or 3 (col + fm + #bitvectors) arguments are expected.");
     }
 
     if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
@@ -235,23 +240,12 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       if (!emptyTable) {
         if (p == null) {
           myagg.countNulls++;
-        }
-        else {
-          try {
-            boolean v = PrimitiveObjectInspectorUtils.getBoolean(p, inputOI);
-            if (v == false) {
-              myagg.countFalses++;
-            } else if (v == true){
-              myagg.countTrues++;
-            }
-          } catch (NumberFormatException e) {
-            if (!warned) {
-              warned = true;
-              LOG.warn(getClass().getSimpleName() + " "
-                  + StringUtils.stringifyException(e));
-              LOG.warn(getClass().getSimpleName()
-                  + " ignoring similar exceptions.");
-            }
+        } else {
+          boolean v = PrimitiveObjectInspectorUtils.getBoolean(p, inputOI);
+          if (v == false) {
+            myagg.countFalses++;
+          } else if (v == true) {
+            myagg.countTrues++;
           }
         }
       }
@@ -302,6 +296,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
     /* Object Inspector corresponding to the input parameter.
      */
     protected transient PrimitiveObjectInspector inputOI;
+    protected transient PrimitiveObjectInspector funcOI;
     protected transient PrimitiveObjectInspector numVectorsOI;
 
 
@@ -322,9 +317,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
     protected transient StructField ndvField;
     protected transient StringObjectInspector ndvFieldOI;
 
-    protected transient StructField numBitVectorsField;
-    protected transient IntObjectInspector numBitVectorsFieldOI;
-
     /* Partial aggregation result returned by TerminatePartial. Partial result is a struct
      * containing a long field named "count".
      */
@@ -334,8 +326,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
      */
     protected transient Object[] result;
 
-    protected transient boolean warned;
-
     protected abstract OI getValueObjectInspector();
     
     protected abstract OI getValueObjectInspector(PrimitiveTypeInfo typeInfo);
@@ -347,7 +337,10 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       // initialize input
       if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
         inputOI = (PrimitiveObjectInspector) parameters[0];
-        numVectorsOI = (PrimitiveObjectInspector) parameters[1];
+        funcOI = (PrimitiveObjectInspector) parameters[1];
+        if (parameters.length > 2) {
+          numVectorsOI = (PrimitiveObjectInspector) parameters[2];
+        }
       } else {
         soi = (StructObjectInspector) parameters[0];
 
@@ -363,9 +356,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         ndvField = soi.getStructFieldRef("bitvector");
         ndvFieldOI = (StringObjectInspector) ndvField.getFieldObjectInspector();
 
-        numBitVectorsField = soi.getStructFieldRef("numbitvectors");
-        numBitVectorsFieldOI = (IntObjectInspector)
-            numBitVectorsField.getFieldObjectInspector();
       }
 
       // initialize output
@@ -376,7 +366,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         foi.add(getValueObjectInspector(inputOI.getTypeInfo()));
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
-        foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
 
         List<String> fname = new ArrayList<String>();
         fname.add("columnType");
@@ -384,13 +373,11 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         fname.add("max");
         fname.add("countnulls");
         fname.add("bitvector");
-        fname.add("numbitvectors");
 
         partialResult = new Object[6];
         partialResult[0] = new Text();
         partialResult[3] = new LongWritable(0);
         partialResult[4] = new Text();
-        partialResult[5] = new IntWritable(0);
 
         return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
             foi);
@@ -436,12 +423,13 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         return (int) (model.lengthFor(columnType)
                     + model.primitive1()
                     + model.primitive2()
-                    + ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) :
+                    + ((numDV == null) ? lengthFor(model, null) :
                                          numDV.lengthFor(model)));
       }
 
-      protected void initNDVEstimator(int numBitVectors) {
-        numDV = new NumDistinctValueEstimator(numBitVectors);
+      protected void initNDVEstimator(String func, int numBitVectors) {
+        numDV = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(func,
+            numBitVectors);
       }
 
       protected abstract void update(Object p, PrimitiveObjectInspector inputOI);
@@ -457,7 +445,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         if (numDV != null) {
           ((Text) result[5]).set(numDV.serialize());
         }
-
         return result;
       }
 
@@ -465,11 +452,10 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         // Serialize the rest of the values in the AggBuffer
         serializeCommon(result);
 
-        // Serialize numDistinctValue Estimator
-        Text t = numDV.serialize();
-        ((Text) result[4]).set(t);
-        ((IntWritable) result[5]).set(numDV.getnumBitVectors());
-
+        if (numDV != null) {
+          // Serialize numDistinctValue Estimator
+          ((Text) result[4]).set(numDV.serialize());
+        }
         return result;
       }
 
@@ -495,30 +481,29 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       NumericStatsAgg myagg = (NumericStatsAgg) agg;
 
       if (myagg.numDV == null) {
-        int numVectors = parameters[1] == null ? 0 :
-            PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI);
-        if (numVectors > MAX_BIT_VECTORS) {
-          throw new HiveException("The maximum allowed value for number of bit vectors " +
-              " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors");
+        String func = null;
+        int numVectors = 0;
+        // func may be null when GBY op is closing.
+        // see mvn test -Dtest=TestMiniTezCliDriver -Dqfile=explainuser_3.q
+        // original behavior is to create FMSketch
+        func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString(
+            parameters[1], funcOI);
+        if (parameters.length == 3) {
+          numVectors = parameters[2] == null ? 0 : PrimitiveObjectInspectorUtils.getInt(
+              parameters[2], numVectorsOI);
+          if (numVectors > MAX_BIT_VECTORS) {
+            throw new HiveException("The maximum allowed value for number of bit vectors " + " is "
+                + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors");
+          }
         }
-        myagg.initNDVEstimator(numVectors);
+        myagg.initNDVEstimator(func, numVectors);
       }
 
-      //Update null counter if a null value is seen
+      // Update null counter if a null value is seen
       if (parameters[0] == null) {
         myagg.countNulls++;
       } else {
-        try {
-          myagg.update(parameters[0], inputOI);
-        } catch (NumberFormatException e) {
-          if (!warned) {
-            warned = true;
-            LOG.warn(getClass().getSimpleName() + " "
-                + StringUtils.stringifyException(e));
-            LOG.warn(getClass().getSimpleName()
-                + " ignoring similar exceptions.");
-          }
-        }
+        myagg.update(parameters[0], inputOI);
       }
     }
 
@@ -537,15 +522,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       if (partial != null) {
         NumericStatsAgg myagg = (NumericStatsAgg) agg;
 
-        if (myagg.numDV == null) {
-          Object partialValue = soi.getStructFieldData(partial, numBitVectorsField);
-          int numVectors = numBitVectorsFieldOI.get(partialValue);
-          if (numVectors <= 0) {
-            return;
-          }
-          myagg.initNDVEstimator(numVectors);
-        }
-
         // Update min if min is lesser than the smallest value seen so far
         Object minValue = soi.getStructFieldData(partial, minField);
         myagg.updateMin(minValue, minFieldOI);
@@ -561,9 +537,15 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         // Merge numDistinctValue Estimators
         Object numDistinct = soi.getStructFieldData(partial, ndvField);
         String v = ndvFieldOI.getPrimitiveJavaObject(numDistinct);
-        NumDistinctValueEstimator o =
-            new NumDistinctValueEstimator(v, myagg.numDV.getnumBitVectors());
-        myagg.numDV.mergeEstimators(o);
+        
+        if (v != null && v.length() != 0) {
+          if (myagg.numDV == null) {
+            myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(v);
+          } else {
+            myagg.numDV.mergeEstimators(NumDistinctValueEstimatorFactory
+                .getNumDistinctValueEstimator(v));
+          }
+        }
       }
     }
   }
@@ -713,6 +695,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
     /* Object Inspector corresponding to the input parameter.
      */
     private transient PrimitiveObjectInspector inputOI;
+    private transient PrimitiveObjectInspector funcOI;
     private transient PrimitiveObjectInspector numVectorsOI;
     private final static int MAX_BIT_VECTORS = 1024;
 
@@ -741,9 +724,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
     private transient StructField ndvField;
     private transient StringObjectInspector ndvFieldOI;
 
-    private transient StructField numBitVectorsField;
-    private transient IntObjectInspector numBitVectorsFieldOI;
-
     /* Output of final result of the aggregation
      */
     private transient Object[] result;
@@ -755,7 +735,10 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       // initialize input
       if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
         inputOI = (PrimitiveObjectInspector) parameters[0];
-        numVectorsOI = (PrimitiveObjectInspector) parameters[1];
+        funcOI = (PrimitiveObjectInspector) parameters[1];
+        if (parameters.length > 2) {
+          numVectorsOI = (PrimitiveObjectInspector) parameters[2];
+        }
       } else {
         soi = (StructObjectInspector) parameters[0];
 
@@ -774,9 +757,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         ndvField = soi.getStructFieldRef("bitvector");
         ndvFieldOI = (StringObjectInspector) ndvField.getFieldObjectInspector();
 
-        numBitVectorsField = soi.getStructFieldRef("numbitvectors");
-        numBitVectorsFieldOI = (IntObjectInspector)
-                                  numBitVectorsField.getFieldObjectInspector();
       }
 
       // initialize output
@@ -788,7 +768,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
         foi.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector);
-        foi.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
 
         List<String> fname = new ArrayList<String>();
         fname.add("columntype");
@@ -797,7 +776,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         fname.add("count");
         fname.add("countnulls");
         fname.add("bitvector");
-        fname.add("numbitvectors");
 
         partialResult = new Object[7];
         partialResult[0] = new Text();
@@ -806,7 +784,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         partialResult[3] = new LongWritable(0);
         partialResult[4] = new LongWritable(0);
         partialResult[5] = new Text();
-        partialResult[6] = new IntWritable(0);
 
         return ObjectInspectorFactory.getStandardStructObjectInspector(fname,
           foi);
@@ -847,15 +824,14 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       public long sumLength;             /* Sum of lengths of all values seen so far */
       public long count;                          /* Count of all values seen so far */
       public long countNulls;          /* Count of number of null values seen so far */
-      public StringNumDistinctValueEstimator numDV;      /* Distinct value estimator */
-      public int numBitVectors;
+      public NumDistinctValueEstimator numDV;      /* Distinct value estimator */
       public boolean firstItem;
       @Override
       public int estimate() {
         JavaDataModel model = JavaDataModel.get();
         return (int) (model.primitive1() * 2 + model.primitive2() * 4 +
                     model.lengthFor(columnType) +
-                    ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) :
+                    ((numDV == null) ? lengthFor(model, null) :
                                        numDV.lengthFor(model)));
 
       }
@@ -868,8 +844,9 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       return result;
     }
 
-    public void initNDVEstimator(StringStatsAgg aggBuffer, int numBitVectors) {
-      aggBuffer.numDV = new StringNumDistinctValueEstimator(numBitVectors);
+    public void initNDVEstimator(StringStatsAgg aggBuffer, String func, int numBitVectors) {
+      aggBuffer.numDV = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(func,
+          numBitVectors);
       aggBuffer.numDV.reset();
     }
 
@@ -890,83 +867,59 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
     public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
       Object p = parameters[0];
       StringStatsAgg myagg = (StringStatsAgg) agg;
-      boolean emptyTable = false;
-
-      if (parameters[1] == null) {
-        emptyTable = true;
-      }
 
       if (myagg.firstItem) {
         int numVectors = 0;
-        if (!emptyTable) {
-          numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI);
-        }
-
-        if (numVectors > MAX_BIT_VECTORS) {
-          throw new HiveException("The maximum allowed value for number of bit vectors " +
-            " is " + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors");
+        String func = parameters[1] == null ? "fm" : PrimitiveObjectInspectorUtils.getString(
+            parameters[1], funcOI);
+        if (parameters.length > 2) {
+          numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[2], numVectorsOI);
+          if (numVectors > MAX_BIT_VECTORS) {
+            throw new HiveException("The maximum allowed value for number of bit vectors " + " is "
+                + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors");
+          }
         }
 
-        initNDVEstimator(myagg, numVectors);
+        initNDVEstimator(myagg, func, numVectors);
         myagg.firstItem = false;
-        myagg.numBitVectors = numVectors;
       }
 
-      if (!emptyTable) {
-
-        // Update null counter if a null value is seen
-        if (p == null) {
-          myagg.countNulls++;
-        }
-        else {
-          try {
-
-            String v = PrimitiveObjectInspectorUtils.getString(p, inputOI);
-
-            // Update max length if new length is greater than the ones seen so far
-            int len = v.length();
-            if (len > myagg.maxLength) {
-              myagg.maxLength = len;
-            }
-
-            // Update sum length with the new length
-            myagg.sumLength += len;
-
-            // Increment count of values seen so far
-            myagg.count++;
-
-            // Add string value to NumDistinctValue Estimator
-            myagg.numDV.addToEstimator(v);
-
-          } catch (NumberFormatException e) {
-            if (!warned) {
-              warned = true;
-              LOG.warn(getClass().getSimpleName() + " "
-                  + StringUtils.stringifyException(e));
-              LOG.warn(getClass().getSimpleName()
-                  + " ignoring similar exceptions.");
-            }
-          }
+      // Update null counter if a null value is seen
+      String v = PrimitiveObjectInspectorUtils.getString(p, inputOI);
+      if (v == null) {
+        myagg.countNulls++;
+      } else {
+        // Update max length if new length is greater than the ones seen so
+        // far
+        int len = v.length();
+        if (len > myagg.maxLength) {
+          myagg.maxLength = len;
         }
+
+        // Update sum length with the new length
+        myagg.sumLength += len;
+
+        // Increment count of values seen so far
+        myagg.count++;
+
+        // Add string value to NumDistinctValue Estimator
+        myagg.numDV.addToEstimator(v);
       }
     }
 
     @Override
     public Object terminatePartial(AggregationBuffer agg) throws HiveException {
       StringStatsAgg myagg = (StringStatsAgg) agg;
-
-      // Serialize numDistinctValue Estimator
-      Text t = myagg.numDV.serialize();
-
       // Serialize the rest of the values in the AggBuffer
       ((Text) partialResult[0]).set(myagg.columnType);
       ((LongWritable) partialResult[1]).set(myagg.maxLength);
       ((LongWritable) partialResult[2]).set(myagg.sumLength);
       ((LongWritable) partialResult[3]).set(myagg.count);
       ((LongWritable) partialResult[4]).set(myagg.countNulls);
-      ((Text) partialResult[5]).set(t);
-      ((IntWritable) partialResult[6]).set(myagg.numBitVectors);
-
+      // Serialize numDistinctValue Estimator
+      if (myagg.numDV != null) {
+        ((Text) partialResult[5]).set(myagg.numDV.serialize());
+      }
       return partialResult;
     }
 
@@ -975,17 +928,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       if (partial != null) {
         StringStatsAgg myagg = (StringStatsAgg) agg;
 
-        if (myagg.firstItem) {
-          Object partialValue = soi.getStructFieldData(partial, numBitVectorsField);
-          int numVectors = numBitVectorsFieldOI.get(partialValue);
-          if (numVectors <= 0) {
-            return;
-          }
-          initNDVEstimator(myagg, numVectors);
-          myagg.firstItem = false;
-          myagg.numBitVectors = numVectors;
-        }
-
         // Update maxLength if length is greater than the largest value seen so far
         Object partialValue = soi.getStructFieldData(partial, maxLengthField);
         if (myagg.maxLength < maxLengthFieldOI.get(partialValue)) {
@@ -1007,8 +949,15 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         // Merge numDistinctValue Estimators
         partialValue = soi.getStructFieldData(partial, ndvField);
         String v = ndvFieldOI.getPrimitiveJavaObject(partialValue);
-        NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors);
-        myagg.numDV.mergeEstimators(o);
+        
+        if (v != null && v.length() != 0) {
+          if (myagg.numDV == null) {
+            myagg.numDV = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(v);
+          } else {
+            myagg.numDV.mergeEstimators(NumDistinctValueEstimatorFactory
+                .getNumDistinctValueEstimator(v));
+          }
+        }
       }
     }
 
@@ -1016,16 +965,12 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
     public Object terminate(AggregationBuffer agg) throws HiveException {
       StringStatsAgg myagg = (StringStatsAgg) agg;
 
-      long numDV = 0;
+      long numDV = myagg.numDV == null ? 0 : myagg.numDV.estimateNumDistinctValues();
       double avgLength = 0.0;
       long total = myagg.count + myagg.countNulls;
 
-      if (myagg.numBitVectors != 0) {
-        numDV = myagg.numDV.estimateNumDistinctValues();
-      }
-
       if (total != 0) {
-         avgLength = myagg.sumLength / (1.0 * total);
+        avgLength = myagg.sumLength / (1.0 * total);
       }
 
       // Serialize the result struct
@@ -1034,7 +979,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       ((DoubleWritable) result[2]).set(avgLength);
       ((LongWritable) result[3]).set(myagg.countNulls);
       ((LongWritable) result[4]).set(numDV);
-      if (myagg.numBitVectors != 0) {
+      if (myagg.numDV != null) {
         ((Text) result[5]).set(myagg.numDV.serialize());
       }
       return result;
@@ -1181,8 +1126,6 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       myagg.countNulls = 0;
     }
 
-    boolean warned = false;
-
     @Override
     public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
       Object p = parameters[0];
@@ -1197,32 +1140,21 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
         // Update null counter if a null value is seen
         if (p == null) {
           myagg.countNulls++;
-        }
-        else {
-          try {
-            BytesWritable v = PrimitiveObjectInspectorUtils.getBinary(p, inputOI);
-
-            // Update max length if new length is greater than the ones seen so far
-            int len = v.getLength();
-            if (len > myagg.maxLength) {
-              myagg.maxLength = len;
-            }
-
-            // Update sum length with the new length
-            myagg.sumLength += len;
-
-            // Increment count of values seen so far
-            myagg.count++;
-
-          } catch (NumberFormatException e) {
-            if (!warned) {
-              warned = true;
-              LOG.warn(getClass().getSimpleName() + " "
-                  + StringUtils.stringifyException(e));
-              LOG.warn(getClass().getSimpleName()
-                  + " ignoring similar exceptions.");
-            }
+        } else {
+          BytesWritable v = PrimitiveObjectInspectorUtils.getBinary(p, inputOI);
+
+          // Update max length if new length is greater than the ones seen so
+          // far
+          int len = v.getLength();
+          if (len > myagg.maxLength) {
+            myagg.maxLength = len;
           }
+
+          // Update sum length with the new length
+          myagg.sumLength += len;
+
+          // Increment count of values seen so far
+          myagg.count++;
         }
       }
     }
@@ -1425,4 +1357,25 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver {
       ((NumericStatsAgg)agg).reset("Date");
     }
   }
+  
+  @InterfaceAudience.LimitedPrivate(value = { "Hive" })
+  static int lengthFor(JavaDataModel model, Integer numVector) {
+    int length = model.object();
+    length += model.primitive1() * 2; // two int
+    length += model.primitive2(); // one double
+    length += model.lengthForRandom() * 2; // two Random
+
+    if (numVector == null) {
+      numVector = 16; // HiveConf hive.stats.ndv.error default produces 16
+                      // vectors
+    }
+
+    if (numVector > 0) {
+      length += model.array() * 3; // three array
+      length += model.primitive1() * numVector * 2; // two int array
+      length += (model.object() + model.array() + model.primitive1() + model.primitive2())
+          * numVector; // bitset array
+    }
+    return length;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java
deleted file mode 100644
index 1c197a0..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java
+++ /dev/null
@@ -1,43 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-public class LongNumDistinctValueEstimator extends NumDistinctValueEstimator {
-
-  public LongNumDistinctValueEstimator(int numBitVectors) {
-    super(numBitVectors);
-  }
-
-  public LongNumDistinctValueEstimator(String s, int numVectors) {
-    super(s, numVectors);
-  }
-
-   @Override
-   public void addToEstimator(long v) {
-    /* Update summary bitVector :
-     * Generate hash value of the long value and mod it by 2^bitVectorSize-1.
-     * In this implementation bitVectorSize is 31.
-     */
-     super.addToEstimator(v);
-  }
-
-  @Override
-  public void addToEstimatorPCSA(long v) {
-    super.addToEstimatorPCSA(v);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
deleted file mode 100644
index fa70f49..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java
+++ /dev/null
@@ -1,383 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-import java.util.Random;
-
-import javolution.util.FastBitSet;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.common.classification.InterfaceAudience;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.apache.hadoop.io.Text;
-
-public class NumDistinctValueEstimator {
-
-  static final Logger LOG = LoggerFactory.getLogger(NumDistinctValueEstimator.class.getName());
-
-  /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number.
-   * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1.
-   * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise
-   * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1
-   * thus introducing errors in the estimates.
-   */
-  private static final int BIT_VECTOR_SIZE = 31;
-  private final int numBitVectors;
-
-  // Refer to Flajolet-Martin'86 for the value of phi
-  private static final double PHI = 0.77351;
-
-  private final int[] a;
-  private final int[] b;
-  private final FastBitSet[] bitVector;
-
-  private final Random aValue;
-  private final Random bValue;
-
-  /* Create a new distinctValueEstimator
-   */
-  public NumDistinctValueEstimator(int numBitVectors) {
-    this.numBitVectors = numBitVectors;
-    bitVector = new FastBitSet[numBitVectors];
-    for (int i=0; i< numBitVectors; i++) {
-      bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
-    }
-
-    a = new int[numBitVectors];
-    b = new int[numBitVectors];
-
-    /* Use a large prime number as a seed to the random number generator.
-     * Java's random number generator uses the Linear Congruential Generator to generate random
-     * numbers using the following recurrence relation,
-     *
-     * X(n+1) = (a X(n) + c ) mod m
-     *
-     *  where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48
-     *  is not a prime number and hence the set of numbers from 0 to m don't form a finite field.
-     *  If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair
-     *  wise independent.
-     *
-     *  However, empirically passing in prime numbers as seeds seems to work better than when passing
-     *  composite numbers as seeds. Ideally Java's Random should pick m such that m is prime.
-     *
-     */
-    aValue = new Random(99397);
-    bValue = new Random(9876413);
-
-    for (int i = 0; i < numBitVectors; i++) {
-      int randVal;
-      /* a and b shouldn't be even; If a and b are even, then none of the values
-       * will set bit 0 thus introducing errors in the estimate. Both a and b can be even
-       * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this
-       * always pick odd values for a and b.
-       */
-      do {
-        randVal = aValue.nextInt();
-      } while (randVal % 2 == 0);
-
-      a[i] = randVal;
-
-      do {
-        randVal = bValue.nextInt();
-      } while (randVal % 2 == 0);
-
-      b[i] = randVal;
-
-      if (a[i] < 0) {
-        a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1);
-      }
-
-      if (b[i] < 0) {
-        b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1);
-      }
-    }
-  }
-
-  public NumDistinctValueEstimator(String s, int numBitVectors) {
-    this.numBitVectors = numBitVectors;
-    FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors);
-    bitVector = new FastBitSet[numBitVectors];
-    for(int i=0; i <numBitVectors; i++) {
-       bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
-       bitVector[i].clear();
-       bitVector[i].or(bitVectorDeser[i]);
-    }
-
-    a = null;
-    b = null;
-
-    aValue = null;
-    bValue = null;
-  }
-
-  /**
-   * Resets a distinctValueEstimator object to its original state.
-   */
-  public void reset() {
-    for (int i=0; i< numBitVectors; i++) {
-      bitVector[i].clear();
-    }
-  }
-
-  public FastBitSet getBitVector(int index) {
-    return bitVector[index];
-  }
-
-  public int getnumBitVectors() {
-    return numBitVectors;
-  }
-
-  public int getBitVectorSize() {
-    return BIT_VECTOR_SIZE;
-  }
-
-  public void printNumDistinctValueEstimator() {
-    String t = new String();
-
-    LOG.debug("NumDistinctValueEstimator");
-    LOG.debug("Number of Vectors: {}", numBitVectors);
-    LOG.debug("Vector Size: {}", BIT_VECTOR_SIZE);
-
-    for (int i=0; i < numBitVectors; i++) {
-      t = t + bitVector[i].toString();
-    }
-
-    LOG.debug("Serialized Vectors: ");
-    LOG.debug(t);
-  }
-
-  /* Serializes a distinctValueEstimator object to Text for transport.
-   *
-   */
-  public Text serialize() {
-    String s = new String();
-    for(int i=0; i < numBitVectors; i++) {
-      s = s + (bitVector[i].toString());
-    }
-    return new Text(s);
-  }
-
-  /* Deserializes from string to FastBitSet; Creates a NumDistinctValueEstimator object and
-   * returns it.
-   */
-
-  private FastBitSet[] deserialize(String s, int numBitVectors) {
-    FastBitSet[] b = new FastBitSet[numBitVectors];
-    for (int j=0; j < numBitVectors; j++) {
-      b[j] = new FastBitSet(BIT_VECTOR_SIZE);
-      b[j].clear();
-    }
-
-    int vectorIndex =0;
-
-    /* Parse input string to obtain the indexes that are set in the bitvector.
-     * When a toString() is called on a FastBitSet object to serialize it, the serialization
-     * adds { and } to the beginning and end of the return String.
-     * Skip "{", "}", ",", " " in the input string.
-     */
-    for(int i=1; i < s.length()-1;) {
-      char c = s.charAt(i);
-      i = i + 1;
-
-      // Move on to the next bit vector
-      if (c == '}') {
-         vectorIndex = vectorIndex + 1;
-      }
-
-      // Encountered a numeric value; Extract out the entire number
-      if (c >= '0' && c <= '9') {
-        String t = new String();
-        t = t + c;
-        c = s.charAt(i);
-        i = i + 1;
-
-        while (c != ',' && c!= '}') {
-          t = t + c;
-          c = s.charAt(i);
-          i = i + 1;
-        }
-
-        int bitIndex = Integer.parseInt(t);
-        assert(bitIndex >= 0);
-        assert(vectorIndex < numBitVectors);
-        b[vectorIndex].set(bitIndex);
-        if (c == '}') {
-          vectorIndex =  vectorIndex + 1;
-        }
-      }
-    }
-    return b;
-  }
-
-  private int generateHash(long v, int hashNum) {
-    int mod = (1<<BIT_VECTOR_SIZE) - 1;
-    long tempHash = a[hashNum] * v  + b[hashNum];
-    tempHash %= mod;
-    int hash = (int) tempHash;
-
-    /* Hash function should map the long value to 0...2^L-1.
-     * Hence hash value has to be non-negative.
-     */
-    if (hash < 0) {
-      hash = hash + mod;
-    }
-    return hash;
-  }
-
-  private int generateHashForPCSA(long v) {
-    int mod = 1 << (BIT_VECTOR_SIZE - 1) - 1;
-    long tempHash = a[0] * v + b[0];
-    tempHash %= mod;
-    int hash = (int) tempHash;
-
-    /* Hash function should map the long value to 0...2^L-1.
-     * Hence hash value has to be non-negative.
-     */
-    if (hash < 0) {
-      hash = hash + mod + 1;
-    }
-    return hash;
-  }
-
-  public void addToEstimator(long v) {
-    /* Update summary bitVector :
-     * Generate hash value of the long value and mod it by 2^bitVectorSize-1.
-     * In this implementation bitVectorSize is 31.
-     */
-
-    for (int i = 0; i<numBitVectors; i++) {
-      int hash = generateHash(v,i);
-      int index;
-
-      // Find the index of the least significant bit that is 1
-      for (index=0; index<BIT_VECTOR_SIZE; index++) {
-        if (hash % 2 != 0) {
-          break;
-        }
-        hash = hash >> 1;
-      }
-
-      // Set bitvector[index] := 1
-      bitVector[i].set(index);
-    }
-  }
-
-  public void addToEstimatorPCSA(long v) {
-    int hash = generateHashForPCSA(v);
-    int rho = hash/numBitVectors;
-    int index;
-
-    // Find the index of the least significant bit that is 1
-    for (index=0; index<BIT_VECTOR_SIZE; index++) {
-      if (rho % 2 != 0) {
-        break;
-      }
-      rho = rho >> 1;
-    }
-
-    // Set bitvector[index] := 1
-    bitVector[hash%numBitVectors].set(index);
-  }
-
-  public void addToEstimator(double d) {
-    int v = new Double(d).hashCode();
-    addToEstimator(v);
-  }
-
-  public void addToEstimatorPCSA(double d) {
-    int v = new Double(d).hashCode();
-    addToEstimatorPCSA(v);
-  }
-
-  public void addToEstimator(HiveDecimal decimal) {
-    int v = decimal.hashCode();
-    addToEstimator(v);
-  }
-
-  public void addToEstimatorPCSA(HiveDecimal decimal) {
-    int v = decimal.hashCode();
-    addToEstimatorPCSA(v);
-  }
-
-  public void mergeEstimators(NumDistinctValueEstimator o) {
-    // Bitwise OR the bitvector with the bitvector in the agg buffer
-    for (int i=0; i<numBitVectors; i++) {
-      bitVector[i].or(o.getBitVector(i));
-    }
-  }
-
-  public long estimateNumDistinctValuesPCSA() {
-    double numDistinctValues = 0.0;
-    long S = 0;
-
-    for (int i=0; i < numBitVectors; i++) {
-      int index = 0;
-      while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) {
-        index = index + 1;
-      }
-      S = S + index;
-    }
-
-    numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors));
-    return ((long)numDistinctValues);
-  }
-
-  /* We use the Flajolet-Martin estimator to estimate the number of distinct values.FM uses the
-   * location of the least significant zero as an estimate of log2(phi*ndvs).
-   */
-  public long estimateNumDistinctValues() {
-    int sumLeastSigZero = 0;
-    double avgLeastSigZero;
-    double numDistinctValues;
-
-    for (int i=0; i< numBitVectors; i++) {
-      int leastSigZero = bitVector[i].nextClearBit(0);
-      sumLeastSigZero += leastSigZero;
-    }
-
-    avgLeastSigZero =
-        sumLeastSigZero/(numBitVectors * 1.0) - (Math.log(PHI)/Math.log(2.0));
-    numDistinctValues = Math.pow(2.0, avgLeastSigZero);
-    return ((long)(numDistinctValues));
-  }
-
-  @InterfaceAudience.LimitedPrivate(value = { "Hive" })
-  static int lengthFor(JavaDataModel model, Integer numVector) {
-    int length = model.object();
-    length += model.primitive1() * 2;       // two int
-    length += model.primitive2();           // one double
-    length += model.lengthForRandom() * 2;  // two Random
-
-    if (numVector == null) {
-      numVector = 16; // HiveConf hive.stats.ndv.error default produces 16 vectors
-    }
-
-    if (numVector > 0) {
-      length += model.array() * 3;                    // three array
-      length += model.primitive1() * numVector * 2;   // two int array
-      length += (model.object() + model.array() + model.primitive1() +
-          model.primitive2()) * numVector;   // bitset array
-    }
-    return length;
-  }
-
-  public int lengthFor(JavaDataModel model) {
-    return lengthFor(model, getnumBitVectors());
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java
deleted file mode 100644
index 601901c..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-public class StringNumDistinctValueEstimator extends NumDistinctValueEstimator {
-
-  public StringNumDistinctValueEstimator(int numVectors) {
-    super(numVectors);
-  }
-
-  public StringNumDistinctValueEstimator(String s, int numVectors) {
-    super(s, numVectors);
-  }
-
-  public void addToEstimator(String s) {
-    int v = s.hashCode();
-    super.addToEstimator(v);
-  }
-
-  public void addToEstimatorPCSA(String s) {
-    int v = s.hashCode();
-    super.addToEstimatorPCSA(v);
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/char_udf1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/char_udf1.q b/ql/src/test/queries/clientpositive/char_udf1.q
index 39aa0e0..fa3a261 100644
--- a/ql/src/test/queries/clientpositive/char_udf1.q
+++ b/ql/src/test/queries/clientpositive/char_udf1.q
@@ -142,8 +142,8 @@ from char_udf_1 limit 1;
 
 -- Aggregate Functions
 select
-  compute_stats(c2, 16),
-  compute_stats(c4, 16)
+  compute_stats(c2, 'fm', 16),
+  compute_stats(c4, 'fm', 16)
 from char_udf_1;
 
 select

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/compute_stats_date.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/compute_stats_date.q b/ql/src/test/queries/clientpositive/compute_stats_date.q
index 09128f6..bf47852 100644
--- a/ql/src/test/queries/clientpositive/compute_stats_date.q
+++ b/ql/src/test/queries/clientpositive/compute_stats_date.q
@@ -13,7 +13,7 @@ load data local inpath '../../data/files/flights_join.txt' overwrite into table
 select count(*) from tab_date;
 
 -- compute statistical summary of data
-select compute_stats(fl_date, 16) from tab_date;
+select compute_stats(fl_date, 'hll') from tab_date;
 
 explain
 analyze table tab_date compute statistics for columns fl_date;

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/compute_stats_decimal.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/compute_stats_decimal.q b/ql/src/test/queries/clientpositive/compute_stats_decimal.q
index 76e1468..2beafaf 100644
--- a/ql/src/test/queries/clientpositive/compute_stats_decimal.q
+++ b/ql/src/test/queries/clientpositive/compute_stats_decimal.q
@@ -8,4 +8,4 @@ LOAD DATA LOCAL INPATH "../../data/files/decimal.txt" INTO TABLE tab_decimal;
 select count(*) from tab_decimal;
 
 -- compute statistical summary of data
-select compute_stats(a, 18) from tab_decimal;
+select compute_stats(a, 'fm', 18) from tab_decimal;

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/compute_stats_double.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/compute_stats_double.q b/ql/src/test/queries/clientpositive/compute_stats_double.q
index 7a1e0f6..6bae064 100644
--- a/ql/src/test/queries/clientpositive/compute_stats_double.q
+++ b/ql/src/test/queries/clientpositive/compute_stats_double.q
@@ -6,4 +6,4 @@ LOAD DATA LOCAL INPATH "../../data/files/double.txt" INTO TABLE tab_double;
 select count(*) from tab_double;
 
 -- compute statistical summary of data
-select compute_stats(a, 16) from tab_double;
+select compute_stats(a, 'fm', 16) from tab_double;

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/compute_stats_long.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/compute_stats_long.q b/ql/src/test/queries/clientpositive/compute_stats_long.q
index 6a2070f..48f4ebb 100644
--- a/ql/src/test/queries/clientpositive/compute_stats_long.q
+++ b/ql/src/test/queries/clientpositive/compute_stats_long.q
@@ -6,4 +6,4 @@ LOAD DATA LOCAL INPATH "../../data/files/int.txt" INTO TABLE tab_int;
 select count(*) from tab_int;
 
 -- compute statistical summary of data
-select compute_stats(a, 16) from tab_int;
+select compute_stats(a, 'fm', 16) from tab_int;

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/compute_stats_string.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/compute_stats_string.q b/ql/src/test/queries/clientpositive/compute_stats_string.q
index 0023e7f..79a531e 100644
--- a/ql/src/test/queries/clientpositive/compute_stats_string.q
+++ b/ql/src/test/queries/clientpositive/compute_stats_string.q
@@ -6,4 +6,4 @@ LOAD DATA LOCAL INPATH "../../data/files/string.txt" INTO TABLE tab_string;
 select count(*) from tab_string;
 
 -- compute statistical summary of data
-select compute_stats(a, 16) from tab_string;
+select compute_stats(a, 'fm', 16) from tab_string;

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/hll.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q
new file mode 100644
index 0000000..edfdce8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/hll.q
@@ -0,0 +1,46 @@
+set hive.mapred.mode=nonstrict;
+
+create table i(key int);
+
+insert overwrite table i select key from src;
+
+explain analyze table i compute statistics for columns;
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
+drop table i;
+
+create table i(key double);
+
+insert overwrite table i select key from src;
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
+drop table i;
+
+create table i(key decimal);
+
+insert overwrite table i select key from src;
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+
+drop table i;
+
+create table i(key date);
+
+insert into i values ('2012-08-17');
+insert into i values ('2012-08-17');
+insert into i values ('2013-08-17');
+insert into i values ('2012-03-17');
+insert into i values ('2012-05-17');
+
+analyze table i compute statistics for columns;
+
+desc formatted i key;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q b/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q
index 8bbae39..d72fad5 100644
--- a/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q
+++ b/ql/src/test/queries/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q
@@ -7,7 +7,7 @@ set hive.groupby.skewindata=false;
 set mapred.reduce.tasks=31;
 
 
-select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -17,7 +17,7 @@ select
   var_samp(substr(src.value,5)) as d
  from src)subq;
 
-explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -27,7 +27,7 @@ select
   var_samp(substr(src.value,5)) as d
  from src)subq;
 
-select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -39,7 +39,7 @@ select
  
 set hive.optimize.reducededuplication=false;
 
-explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -49,7 +49,7 @@ select
   var_samp(substr(src.value,5)) as d
  from src)subq;
 
-select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/varchar_udf1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/varchar_udf1.q b/ql/src/test/queries/clientpositive/varchar_udf1.q
index 4d1f884..1039ed9 100644
--- a/ql/src/test/queries/clientpositive/varchar_udf1.q
+++ b/ql/src/test/queries/clientpositive/varchar_udf1.q
@@ -139,8 +139,8 @@ from varchar_udf_1 limit 1;
 
 -- Aggregate Functions
 select
-  compute_stats(c2, 16),
-  compute_stats(c4, 16)
+  compute_stats(c2, 'fm', 16),
+  compute_stats(c4, 'fm', 16)
 from varchar_udf_1;
 
 select

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/queries/clientpositive/vector_udf1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_udf1.q b/ql/src/test/queries/clientpositive/vector_udf1.q
index 48d3e1e..c1d4372 100644
--- a/ql/src/test/queries/clientpositive/vector_udf1.q
+++ b/ql/src/test/queries/clientpositive/vector_udf1.q
@@ -351,8 +351,8 @@ select
 from varchar_udf_1;
 
 select
-  compute_stats(c2, 16),
-  compute_stats(c4, 16)
+  compute_stats(c2, 'fm', 16),
+  compute_stats(c4, 'fm', 16)
 from varchar_udf_1;
 
 explain vectorization detail

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
index 922822e..c0d4eee 100644
--- a/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_update_status.q.out
@@ -36,7 +36,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src_stat_part_one
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-key                 	string              	                    	                    	0                   	14                  	1.72                	3                   	                    	                    	from deserializer   
+key                 	string              	                    	                    	0                   	16                  	1.72                	3                   	                    	                    	from deserializer   
 PREHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2')
 PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS
 POSTHOOK: query: ALTER TABLE src_stat_part_one PARTITION(partitionId=1) UPDATE STATISTICS for column key SET ('numDVs'='11','avgColLen'='2.2')
@@ -88,7 +88,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@src_stat_part_two
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-key                 	string              	                    	                    	0                   	14                  	1.72                	3                   	                    	                    	from deserializer   
+key                 	string              	                    	                    	0                   	16                  	1.72                	3                   	                    	                    	from deserializer   
 PREHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40')
 PREHOOK: type: ALTERTABLE_UPDATEPARTSTATS
 POSTHOOK: query: ALTER TABLE src_stat_part_two PARTITION(px=1, py='a') UPDATE STATISTICS for column key SET ('numDVs'='30','maxColLen'='40')


Mime
View raw message