hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From na...@apache.org
Subject svn commit: r1476483 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: exec/ udf/generic/
Date Sat, 27 Apr 2013 03:38:25 GMT
Author: navis
Date: Sat Apr 27 03:38:23 2013
New Revision: 1476483

URL: http://svn.apache.org/r1476483
Log:
HIVE-4068 Size of aggregation buffer which uses non-primitive type is not estimated correctly
(Navis)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java Sat Apr 27
03:38:23 2013
@@ -151,35 +151,13 @@ public class GroupByOperator extends Ope
   private List<FastBitSet> groupingSetsBitSet;
   transient private List<Object> newKeysGroupingSets;
 
-  /**
-   * This is used to store the position and field names for variable length
-   * fields.
-   **/
-  class varLenFields {
-    int aggrPos;
-    List<Field> fields;
-
-    varLenFields(int aggrPos, List<Field> fields) {
-      this.aggrPos = aggrPos;
-      this.fields = fields;
-    }
-
-    int getAggrPos() {
-      return aggrPos;
-    }
-
-    List<Field> getFields() {
-      return fields;
-    }
-  };
-
   // for these positions, some variable primitive type (String) is used, so size
   // cannot be estimated. sample it at runtime.
   transient List<Integer> keyPositionsSize;
 
   // for these positions, some variable primitive type (String) is used for the
   // aggregation classes
-  transient List<varLenFields> aggrPositions;
+  transient List<Field>[] aggrPositions;
 
   transient int fixedRowSize;
   transient long maxHashTblMemory;
@@ -383,7 +361,7 @@ public class GroupByOperator extends Ope
       aggregations = newAggregations();
       hashAggr = true;
       keyPositionsSize = new ArrayList<Integer>();
-      aggrPositions = new ArrayList<varLenFields>();
+      aggrPositions = new List[aggregations.length];
       groupbyMapAggrInterval = HiveConf.getIntVar(hconf,
           HiveConf.ConfVars.HIVEGROUPBYMAPINTERVAL);
 
@@ -523,21 +501,10 @@ public class GroupByOperator extends Ope
     }
 
     if (c.isInstance(new String()) || c.isInstance(new ByteArrayRef())) {
-      int idx = 0;
-      varLenFields v = null;
-      for (idx = 0; idx < aggrPositions.size(); idx++) {
-        v = aggrPositions.get(idx);
-        if (v.getAggrPos() == pos) {
-          break;
-        }
+      if (aggrPositions[pos] == null) {
+        aggrPositions[pos] = new ArrayList<Field>();
       }
-
-      if (idx == aggrPositions.size()) {
-        v = new varLenFields(pos, new ArrayList<Field>());
-        aggrPositions.add(v);
-      }
-
-      v.getFields().add(f);
+      aggrPositions[pos].add(f);
       return javaObjectOverHead;
     }
 
@@ -582,9 +549,11 @@ public class GroupByOperator extends Ope
     for (int i = 0; i < aggregationEvaluators.length; i++) {
 
       fixedRowSize += javaObjectOverHead;
-      Class<? extends AggregationBuffer> agg = aggregationEvaluators[i]
-          .getNewAggregationBuffer().getClass();
-      Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg);
+      AggregationBuffer agg = aggregationEvaluators[i].getNewAggregationBuffer();
+      if (GenericUDAFEvaluator.isEstimable(agg)) {
+        continue;
+      }
+      Field[] fArr = ObjectInspectorUtils.getDeclaredNonStaticFields(agg.getClass());
       for (Field f : fArr) {
         fixedRowSize += getSize(i, f.getType(), f);
       }
@@ -968,29 +937,15 @@ public class GroupByOperator extends Ope
         }
       }
 
-      AggregationBuffer[] aggs = null;
-      if (aggrPositions.size() > 0) {
-        KeyWrapper newKeyProber = newKeys.copyKey();
-        aggs = hashAggregations.get(newKeyProber);
-      }
-
-      for (varLenFields v : aggrPositions) {
-        int aggrPos = v.getAggrPos();
-        List<Field> fieldsVarLen = v.getFields();
-        AggregationBuffer agg = aggs[aggrPos];
-
-        try {
-          for (Field f : fieldsVarLen) {
-            Object o = f.get(agg);
-            if (o instanceof String){
-              totalVariableSize += ((String)o).length();
-            }
-            else if (o instanceof ByteArrayRef){
-              totalVariableSize += ((ByteArrayRef)o).getData().length;
-            }
-          }
-        } catch (IllegalAccessException e) {
-          assert false;
+      AggregationBuffer[] aggs = hashAggregations.get(newKeys);
+      for (int i = 0; i < aggs.length; i++) {
+        AggregationBuffer agg = aggs[i];
+        if (GenericUDAFEvaluator.isEstimable(agg)) {
+          totalVariableSize += ((GenericUDAFEvaluator.AbstractAggregationBuffer)agg).estimate();
+          continue;
+        }
+        if (aggrPositions[i] != null) {
+          totalVariableSize += estimateSize(agg, aggrPositions[i]);
         }
       }
 
@@ -1010,6 +965,24 @@ public class GroupByOperator extends Ope
     return false;
   }
 
+  private int estimateSize(AggregationBuffer agg, List<Field> fields) {
+    int length = 0;
+    for (Field f : fields) {
+      try {
+        Object o = f.get(agg);
+        if (o instanceof String){
+          length += ((String)o).length();
+        }
+        else if (o instanceof ByteArrayRef){
+          length += ((ByteArrayRef)o).getData().length;
+        }
+      } catch (Exception e) {
+        // continue.. null out the field?
+      }
+    }
+    return length;
+  }
+
   private void flush(boolean complete) throws HiveException {
 
     countAfterReport = 0;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java Sat
Apr 27 03:38:23 2013
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.UD
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -303,6 +304,14 @@ public class GenericUDAFAverage extends 
       }
     }
 
+    @AggregationType(estimable = true)
+    static class AverageAgg extends AbstractAggregationBuffer {
+      long count;
+      double sum;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 2; }
+    };
+
     @Override
     public void reset(AggregationBuffer aggregation) throws HiveException {
       doReset((AverageAggregationBuffer<TYPE>)aggregation);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFBridge.java Sat
Apr 27 03:38:23 2013
@@ -147,7 +147,7 @@ public class GenericUDAFBridge extends A
     }
 
     /** class for storing UDAFEvaluator value. */
-    static class UDAFAgg implements AggregationBuffer {
+    static class UDAFAgg extends AbstractAggregationBuffer {
       UDAFEvaluator ueObject;
 
       UDAFAgg(UDAFEvaluator ueObject) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCollectSet.java
Sat Apr 27 03:38:23 2013
@@ -99,7 +99,7 @@ public class GenericUDAFCollectSet exten
       }
     }
     
-    static class MkArrayAggregationBuffer implements AggregationBuffer {
+    static class MkArrayAggregationBuffer extends AbstractAggregationBuffer {
       Set<Object> container;
     }
     

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java
Sat Apr 27 03:38:23 2013
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -185,11 +186,17 @@ public class GenericUDAFComputeStats ext
             foi);
     }
 
-    public static class BooleanStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class BooleanStatsAgg extends AbstractAggregationBuffer {
       public String columnType;                        /* Datatype of column */
       public long countTrues;  /* Count of number of true values seen so far */
       public long countFalses; /* Count of number of false values seen so far */
       public long countNulls;  /* Count of number of null values seen so far */
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive2() * 3 + model.lengthFor(columnType);
+      }
     };
 
     @Override
@@ -426,7 +433,9 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class LongStatsAgg implements AggregationBuffer {
+
+    @AggregationType(estimable = true)
+    public static class LongStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public long min;                              /* Minimum value seen so far */
       public long max;                              /* Maximum value seen so far */
@@ -434,6 +443,12 @@ public class GenericUDAFComputeStats ext
       public LongNumDistinctValueEstimator numDV;    /* Distinct value estimator */
       public boolean firstItem;                     /* First item in the aggBuf? */
       public int numBitVectors;
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive1() * 2 + model.primitive2() * 3 +
+            model.lengthFor(columnType) + model.lengthFor(numDV);
+      }
     };
 
     @Override
@@ -738,7 +753,8 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class DoubleStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class DoubleStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public double min;                            /* Minimum value seen so far */
       public double max;                            /* Maximum value seen so far */
@@ -746,6 +762,12 @@ public class GenericUDAFComputeStats ext
       public DoubleNumDistinctValueEstimator numDV;  /* Distinct value estimator */
       public boolean firstItem;                     /* First item in the aggBuf? */
       public int numBitVectors;
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive1() * 2 + model.primitive2() * 3 +
+            model.lengthFor(columnType) + model.lengthFor(numDV);
+      }
     };
 
     @Override
@@ -1061,7 +1083,8 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class StringStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class StringStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public long maxLength;                           /* Maximum length seen so far */
       public long sumLength;             /* Sum of lengths of all values seen so far */
@@ -1070,6 +1093,12 @@ public class GenericUDAFComputeStats ext
       public StringNumDistinctValueEstimator numDV;      /* Distinct value estimator */
       public int numBitVectors;
       public boolean firstItem;
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive1() * 2 + model.primitive2() * 4 +
+            model.lengthFor(columnType) + model.lengthFor(numDV);
+      }
     };
 
     @Override
@@ -1377,12 +1406,18 @@ public class GenericUDAFComputeStats ext
       }
     }
 
-    public static class BinaryStatsAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    public static class BinaryStatsAgg extends AbstractAggregationBuffer {
       public String columnType;
       public long maxLength;                           /* Maximum length seen so far */
       public long sumLength;             /* Sum of lengths of all values seen so far */
       public long count;                          /* Count of all values seen so far */
       public long countNulls;          /* Count of number of null values seen so far */
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.primitive2() * 4 + model.lengthFor(columnType);
+      }
     };
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFContextNGrams.java
Sat Apr 27 03:38:23 2013
@@ -400,7 +400,7 @@ public class GenericUDAFContextNGrams im
 
 
     // Aggregation buffer methods. 
-    static class NGramAggBuf implements AggregationBuffer {
+    static class NGramAggBuf extends AbstractAggregationBuffer {
       ArrayList<String> context;
       NGramEstimator nge;
     };

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
Sat Apr 27 03:38:23 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -247,13 +248,16 @@ public class GenericUDAFCorrelation exte
       }
     }
 
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       long count; // number n of elements
       double xavg; // average of x elements
       double yavg; // average of y elements
       double xvar; // n times the variance of x elements
       double yvar; // n times the variance of y elements
       double covar; // n times the covariance
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 6; }
     };
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java Sat
Apr 27 03:38:23 2013
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@@ -99,8 +100,11 @@ public class GenericUDAFCount implements
     }
 
     /** class for storing count value. */
-    static class CountAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class CountAgg extends AbstractAggregationBuffer {
       long value;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2; }
     }
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
Sat Apr 27 03:38:23 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -38,7 +39,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Compute the covariance covar_pop(x, y), using the following one-pass method
@@ -224,11 +224,14 @@ public class GenericUDAFCovariance exten
       }
     }
 
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       long count; // number n of elements
       double xavg; // average of x elements
       double yavg; // average of y elements
       double covar; // n times the covariance
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 4; }
     };
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java
Sat Apr 27 03:38:23 2013
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.List;
 
 import javaewah.EWAHCompressedBitmap;
 
@@ -109,8 +108,13 @@ public class GenericUDAFEWAHBitmap exten
     }
 
     /** class for storing the current partial result aggregation */
-    static class BitmapAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class BitmapAgg extends AbstractAggregationBuffer {
       EWAHCompressedBitmap bitmap;
+      @Override
+      public int estimate() {
+        return bitmap.sizeInBytes();
+      }
     }
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java
Sat Apr 27 03:38:23 2013
@@ -41,6 +41,19 @@ import org.apache.hadoop.hive.serde2.obj
 @UDFType(deterministic = true)
 public abstract class GenericUDAFEvaluator implements Closeable {
 
+  public static @interface AggregationType {
+    boolean estimable() default false;
+  }
+
+  public static boolean isEstimable(AggregationBuffer buffer) {
+    if (buffer instanceof AbstractAggregationBuffer) {
+      Class<? extends AggregationBuffer> clazz = buffer.getClass();
+      AggregationType annotation = clazz.getAnnotation(AggregationType.class);
+      return annotation != null && annotation.estimable();
+    }
+    return false;
+  }
+
   /**
    * Mode.
    *
@@ -123,10 +136,21 @@ public abstract class GenericUDAFEvaluat
    * 
    * In the future, we may completely hide this class inside the Evaluator and
    * use integer numbers to identify which aggregation we are looking at.
+   *
+   * @deprecated use {@link AbstractAggregationBuffer} instead
    */
   public static interface AggregationBuffer {
   };
 
+  public static abstract class AbstractAggregationBuffer implements AggregationBuffer {
+    /**
+     * Estimate the size of memory which is occupied by aggregation buffer.
+     * Currently, hive assumes that primitives types occupies 16 byte and java object has
+     * 64 byte overhead for each. For map, each entry also has 64 byte overhead.
+     */
+    public int estimate() { return -1; }
+  }
+
   /**
    * Get a new aggregation object.
    */

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java
Sat Apr 27 03:38:23 2013
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.udf.ge
 
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -27,21 +26,16 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Computes an approximate histogram of a numerical column using a user-specified number
of bins.
@@ -235,8 +229,13 @@ public class GenericUDAFHistogramNumeric
 
 
     // Aggregation buffer definition and manipulation methods
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       NumericHistogram histogram; // the histogram object
+      @Override
+      public int estimate() {
+        return JavaDataModel.get().lengthFor(histogram);
+      }
     };
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMax.java Sat Apr
27 03:38:23 2013
@@ -71,7 +71,7 @@ public class GenericUDAFMax extends Abst
     }
 
     /** class for storing the current max value */
-    static class MaxAgg implements AggregationBuffer {
+    static class MaxAgg extends AbstractAggregationBuffer {
       Object o;
     }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFMin.java Sat Apr
27 03:38:23 2013
@@ -71,7 +71,7 @@ public class GenericUDAFMin extends Abst
     }
 
     /** class for storing the current max value */
-    static class MinAgg implements AggregationBuffer {
+    static class MinAgg extends AbstractAggregationBuffer {
       Object o;
     }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java
Sat Apr 27 03:38:23 2013
@@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
@@ -33,15 +34,9 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.util.StringUtils;
 
 /**
  * Computes an approximate percentile (quantile) from an approximate histogram, for very
@@ -353,9 +348,16 @@ public class GenericUDAFPercentileApprox
 
     // Aggregation buffer methods. We wrap GenericUDAFHistogramNumeric's aggregation buffer
     // inside our own, so that we can also store requested quantile values between calls
-    static class PercentileAggBuf implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class PercentileAggBuf extends AbstractAggregationBuffer {
       NumericHistogram histogram;   // histogram used for quantile approximation
       double[] quantiles;           // the quantiles requested
+      @Override
+      public int estimate() {
+        JavaDataModel model = JavaDataModel.get();
+        return model.lengthFor(histogram) +
+            model.array() + JavaDataModel.PRIMITIVES2 * quantiles.length;
+      }
     };
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java Sat Apr
27 03:38:23 2013
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -96,7 +97,8 @@ public class GenericUDAFSum extends Abst
     }
 
     /** class for storing decimal sum value. */
-    static class SumHiveDecimalAgg implements AggregationBuffer {
+    @AggregationType(estimable = false) // hard to know exactly for decimals
+    static class SumHiveDecimalAgg extends AbstractAggregationBuffer {
       boolean empty;
       HiveDecimal sum;
     }
@@ -188,9 +190,12 @@ public class GenericUDAFSum extends Abst
     }
 
     /** class for storing double sum value. */
-    static class SumDoubleAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class SumDoubleAgg extends AbstractAggregationBuffer {
       boolean empty;
       double sum;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2;
}
     }
 
     @Override
@@ -270,9 +275,12 @@ public class GenericUDAFSum extends Abst
     }
 
     /** class for storing double sum value. */
-    static class SumLongAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class SumLongAgg extends AbstractAggregationBuffer {
       boolean empty;
       long sum;
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES1 + JavaDataModel.PRIMITIVES2;
}
     }
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
(original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
Sat Apr 27 03:38:23 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.De
 import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -171,10 +172,13 @@ public class GenericUDAFVariance extends
       }
     }
 
-    static class StdAgg implements AggregationBuffer {
+    @AggregationType(estimable = true)
+    static class StdAgg extends AbstractAggregationBuffer {
       long count; // number of elements
       double sum; // sum of elements
       double variance; // sum[x-avg^2] (this is actually n times the variance)
+      @Override
+      public int estimate() { return JavaDataModel.PRIMITIVES2 * 3; }
     };
 
     @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFnGrams.java Sat
Apr 27 03:38:23 2013
@@ -338,7 +338,7 @@ public class GenericUDAFnGrams implement
     }
 
     // Aggregation buffer methods. 
-    static class NGramAggBuf implements AggregationBuffer {
+    static class NGramAggBuf extends AbstractAggregationBuffer {
       NGramEstimator nge;
       int n;
     };

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java?rev=1476483&r1=1476482&r2=1476483&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumericHistogram.java Sat
Apr 27 03:38:23 2013
@@ -313,4 +313,8 @@ public class NumericHistogram {
 
     return result;
   }
+
+  public int getNumBins() {
+    return bins == null ? 0 : bins.size();
+  }
 }



Mime
View raw message