hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From heyongqi...@apache.org
Subject svn commit: r917160 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
Date Sun, 28 Feb 2010 10:39:58 GMT
Author: heyongqiang
Date: Sun Feb 28 10:39:58 2010
New Revision: 917160

URL: http://svn.apache.org/viewvc?rev=917160&view=rev
Log:
HIVE-259. Add PERCENTILE aggregate function.(Jerome Boulon, Zheng via He Yongqiang)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=917160&r1=917159&r2=917160&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Sun Feb 28 10:39:58 2010
@@ -39,6 +39,9 @@
     HIVE-1193. ensure sorting properties for a table.
     (Namit via He Yongqiang)
 
+    HIVE-259. Add PERCENTILE aggregate function.
+    (Jerome Boulon, Zheng via He Yongqiang)
+
   IMPROVEMENTS
     HIVE-983. Function from_unixtime takes long.
     (Ning Zhang via zshao)

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=917160&r1=917159&r2=917160&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sun
Feb 28 10:39:58 2010
@@ -37,6 +37,7 @@
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.UDAFPercentile;
 import org.apache.hadoop.hive.ql.udf.UDFAbs;
 import org.apache.hadoop.hive.ql.udf.UDFAcos;
 import org.apache.hadoop.hive.ql.udf.UDFAscii;
@@ -167,7 +168,6 @@
 
 /**
  * FunctionRegistry.
- *
  */
 public final class FunctionRegistry {
 
@@ -322,6 +322,8 @@
     registerGenericUDAF("var_pop", new GenericUDAFVariance());
     registerGenericUDAF("var_samp", new GenericUDAFVarianceSample());
 
+    registerUDAF("percentile", UDAFPercentile.class);
+    
     // Generic UDFs
     registerGenericUDF("array", GenericUDFArray.class);
     registerGenericUDF("map", GenericUDFMap.class);

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java?rev=917160&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java Sun Feb
28 10:39:58 2010
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDAF;
+import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * UDAF for calculating the percentile values.
+ * There are several definitions of percentile, and we take the method recommended by
+ * NIST.
+ * @see http://en.wikipedia.org/wiki/Percentile#Alternative_methods
+ */
+@Description(name = "percentile",
+    value = "_FUNC_(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1])."
+      + "pc can be a double or double array")
+public class UDAFPercentile extends UDAF {
+
+  /**
+   * A state class to store intermediate aggregation results.
+   */
+  public static class State {
+    private Map<LongWritable, LongWritable> counts;
+    private List<DoubleWritable> percentiles;
+  }
+
+  /**
+   * A comparator to sort the entries in order.
+   */
+  public static class MyComparator implements Comparator<Map.Entry<LongWritable, LongWritable>>
{
+    @Override
+    public int compare(Map.Entry<LongWritable, LongWritable> o1,
+        Map.Entry<LongWritable, LongWritable> o2) {
+      return o1.getKey().compareTo(o2.getKey());
+    }
+  }
+
+  /**
+   * Increment the State object with o as the key, and i as the count.
+   */
+  private static void increment(State s, LongWritable o, long i) {
+    if (s.counts == null) {
+      s.counts = new HashMap<LongWritable, LongWritable>();
+    }
+    LongWritable count = s.counts.get(o);
+    if (count == null) {
+      // We have to create a new object, because the object o belongs
+      // to the code that creates it and may get its value changed.
+      LongWritable key = new LongWritable();
+      key.set(o.get());
+      s.counts.put(key, new LongWritable(i));
+    } else {
+      count.set(count.get() + i);
+    }
+  }
+
+  /**
+   * Get the percentile value.
+   */
+  private static double getPercentile(List<Map.Entry<LongWritable, LongWritable>>
entriesList,
+      double position) {
+    // We may need to do linear interpolation to get the exact percentile
+    long lower = (long)Math.floor(position);
+    long higher = (long)Math.ceil(position);
+
+    // Linear search since this won't take much time from the total execution anyway
+    // lower has the range of [0 .. total-1]
+    // The first entry with accumulated count (lower+1) corresponds to the lower position.
+    int i = 0;
+    while (entriesList.get(i).getValue().get() < lower + 1) {
+      i++;
+    }
+
+    long lowerKey = entriesList.get(i).getKey().get();
+    if (higher == lower) {
+      // no interpolation needed because position does not have a fraction
+      return lowerKey;
+    }
+
+    if (entriesList.get(i).getValue().get() < higher + 1) {
+      i++;
+    }
+    long higherKey = entriesList.get(i).getKey().get();
+
+    if (higherKey == lowerKey) {
+      // no interpolation needed because lower position and higher position has the same
key
+      return lowerKey;
+    }
+
+    // Linear interpolation to get the exact percentile
+    return (higher - position) * lowerKey + (position - lower) * higherKey;
+  }
+
+
+  /**
+   * The evaluator for percentile computation based on long.
+   */
+  public static class PercentileLongEvaluator implements UDAFEvaluator {
+
+    private State state;
+
+    public PercentileLongEvaluator() {
+      state = new State();
+    }
+
+    public void init() {
+      if (state.counts != null) {
+        state.counts.clear();
+      }
+    }
+
+    public boolean iterate(LongWritable o, double percentile) {
+      if (state.percentiles == null) {
+        state.percentiles = new ArrayList<DoubleWritable>(1);
+        state.percentiles.add(new DoubleWritable(percentile));
+      }
+      if (o != null) {
+        increment(state, o, 1);
+      }
+      return true;
+    }
+
+    public State terminatePartial() {
+      return state;
+    }
+
+    public boolean merge(State other) {
+      if (state.percentiles == null) {
+        state.percentiles = new ArrayList<DoubleWritable>(other.percentiles);
+      }
+      for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
+        increment(state, e.getKey(), e.getValue().get());
+      }
+      return true;
+    }
+
+    private DoubleWritable result;
+
+    public DoubleWritable terminate() {
+      // No input data.
+      if (state.counts == null) {
+        return null;
+      }
+
+      // Get all items into an array and sort them.
+      Set<Map.Entry<LongWritable, LongWritable>> entries = state.counts.entrySet();
+      List<Map.Entry<LongWritable, LongWritable>> entriesList =
+        new ArrayList<Map.Entry<LongWritable, LongWritable>>(entries);
+      Collections.sort(entriesList, new MyComparator());
+
+      // Accumulate the counts.
+      long total = 0;
+      for (int i = 0; i < entriesList.size(); i++) {
+        LongWritable count = entriesList.get(i).getValue();
+        total += count.get();
+        count.set(total);
+      }
+
+      // Initialize the result.
+      if (result == null) {
+        result = new DoubleWritable();
+      }
+
+      // maxPosition is the 1.0 percentile
+      long maxPosition = total - 1;
+      double position = maxPosition * state.percentiles.get(0).get();
+      result.set(getPercentile(entriesList, position));
+      return result;
+    }
+  }
+
+  /**
+   * The evaluator for percentile computation based on long for an array of percentiles.
+   */
+  public static class PercentileLongArrayEvaluator implements UDAFEvaluator {
+
+    private State state;
+
+    public PercentileLongArrayEvaluator() {
+      state = new State();
+    }
+
+    public void init() {
+      if (state.counts != null) {
+        state.counts.clear();
+      }
+    }
+
+    public boolean iterate(LongWritable o, List<DoubleWritable> percentiles) {
+      if (state.percentiles == null) {
+        state.percentiles = new ArrayList<DoubleWritable>(percentiles);
+      }
+      if (o != null) {
+        increment(state, o, 1);
+      }
+      return true;
+    }
+
+    public State terminatePartial() {
+      return state;
+    }
+
+    public boolean merge(State other) {
+      if (state.percentiles == null) {
+        state.percentiles = new ArrayList<DoubleWritable>(other.percentiles);
+      }
+      for (Map.Entry<LongWritable, LongWritable> e: other.counts.entrySet()) {
+        increment(state, e.getKey(), e.getValue().get());
+      }
+      return true;
+    }
+
+
+    private List<DoubleWritable> results;
+
+    public List<DoubleWritable> terminate() {
+      // No input data
+      if (state.counts == null) {
+        return null;
+      }
+
+      // Get all items into an array and sort them
+      Set<Map.Entry<LongWritable, LongWritable>> entries = state.counts.entrySet();
+      List<Map.Entry<LongWritable, LongWritable>> entriesList =
+        new ArrayList<Map.Entry<LongWritable, LongWritable>>(entries);
+      Collections.sort(entriesList, new MyComparator());
+
+      // accumulate the counts
+      long total = 0;
+      for (int i = 0; i < entriesList.size(); i++) {
+        LongWritable count = entriesList.get(i).getValue();
+        total += count.get();
+        count.set(total);
+      }
+
+      // maxPosition is the 1.0 percentile
+      long maxPosition = total - 1;
+
+      // Initialize the results
+      if (results == null) {
+        results = new ArrayList<DoubleWritable>();
+        for (int i = 0; i < state.percentiles.size(); i++) {
+          results.add(new DoubleWritable());
+        }
+      }
+      // Set the results
+      for (int i = 0; i < state.percentiles.size(); i++) {
+        double position = maxPosition * state.percentiles.get(i).get();
+        results.get(i).set(getPercentile(entriesList, position));
+      }
+      return results;
+    }
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q?rev=917160&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q Sun Feb 28 10:39:58
2010
@@ -0,0 +1,51 @@
+DESCRIBE FUNCTION percentile;
+DESCRIBE FUNCTION EXTENDED percentile;
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = false;
+
+SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = false;
+
+SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+
+set hive.map.aggr = false;
+set hive.groupby.skewindata = true;
+
+SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;
+
+
+set hive.map.aggr = true;
+set hive.groupby.skewindata = true;
+
+SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10;

Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=917160&r1=917159&r2=917160&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Sun Feb 28 10:39:58
2010
@@ -82,6 +82,7 @@
 not
 or
 parse_url
+percentile
 pmod
 positive
 pow
@@ -154,6 +155,7 @@
 locate
 minute
 negative
+percentile
 positive
 regexp_replace
 reverse

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out?rev=917160&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out Sun Feb 28 10:39:58
2010
@@ -0,0 +1,290 @@
+PREHOOK: query: DESCRIBE FUNCTION percentile
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION percentile
+POSTHOOK: type: DESCFUNCTION
+percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a
double or double array
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile
+POSTHOOK: type: DESCFUNCTION
+percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a
double or double array
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-30-57_926_4752964414489468009/10000
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-30-57_926_4752964414489468009/10000
+0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
+1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
+2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
+3	30.0	35.0	37.0	[30.0,35.0,37.0,37.0]
+4	41.0	42.5	47.0	[41.0,42.5,46.849999999999994,47.0]
+5	51.0	54.0	58.0	[51.0,54.0,58.0,58.0]
+6	64.0	66.5	69.0	[64.0,66.5,68.9,69.0]
+7	70.0	73.0	78.0	[70.0,73.0,77.91000000000001,78.0]
+8	80.0	84.0	87.0	[80.0,84.0,86.92,87.0]
+9	90.0	95.0	98.0	[90.0,95.0,98.0,98.0]
+10	100.0	103.0	105.0	[100.0,103.0,104.94,105.0]
+11	111.0	117.0	119.0	[111.0,117.0,119.0,119.0]
+12	120.0	127.0	129.0	[120.0,127.0,129.0,129.0]
+13	131.0	137.0	138.0	[131.0,137.0,138.0,138.0]
+14	143.0	146.0	149.0	[143.0,146.0,149.0,149.0]
+15	150.0	154.0	158.0	[150.0,154.0,157.92999999999998,158.0]
+16	160.0	166.5	169.0	[160.0,166.5,169.0,169.0]
+17	170.0	175.0	179.0	[170.0,175.0,179.0,179.0]
+18	180.0	186.5	189.0	[180.0,186.5,188.86,189.0]
+19	190.0	194.5	199.0	[190.0,194.5,199.0,199.0]
+20	200.0	205.0	209.0	[200.0,205.0,209.0,209.0]
+21	213.0	216.5	219.0	[213.0,216.5,219.0,219.0]
+22	221.0	224.0	229.0	[221.0,224.0,229.0,229.0]
+23	230.0	234.0	239.0	[230.0,234.0,239.0,239.0]
+24	241.0	244.0	249.0	[241.0,244.0,248.94,249.0]
+25	252.0	256.0	258.0	[252.0,256.0,257.94,258.0]
+26	260.0	264.0	266.0	[260.0,264.0,265.95,266.0]
+27	272.0	275.0	278.0	[272.0,275.0,278.0,278.0]
+28	280.0	283.5	289.0	[280.0,283.5,288.87,289.0]
+29	291.0	297.0	298.0	[291.0,297.0,298.0,298.0]
+30	302.0	307.0	309.0	[302.0,307.0,309.0,309.0]
+31	310.0	316.0	318.0	[310.0,316.0,318.0,318.0]
+32	321.0	324.0	327.0	[321.0,324.0,327.0,327.0]
+33	331.0	333.0	339.0	[331.0,333.0,338.92,339.0]
+34	341.0	345.0	348.0	[341.0,345.0,348.0,348.0]
+35	351.0	353.0	356.0	[351.0,353.0,355.91,356.0]
+36	360.0	367.0	369.0	[360.0,367.0,369.0,369.0]
+37	373.0	376.0	379.0	[373.0,376.0,378.95,379.0]
+38	382.0	384.0	389.0	[382.0,384.0,388.82,389.0]
+39	392.0	396.0	399.0	[392.0,396.0,399.0,399.0]
+40	400.0	403.5	409.0	[400.0,403.5,409.0,409.0]
+41	411.0	415.5	419.0	[411.0,415.5,418.91,419.0]
+42	421.0	425.5	429.0	[421.0,425.5,429.0,429.0]
+43	430.0	435.0	439.0	[430.0,435.0,439.0,439.0]
+44	443.0	446.0	449.0	[443.0,446.0,448.96,449.0]
+45	452.0	455.0	459.0	[452.0,455.0,459.0,459.0]
+46	460.0	467.5	469.0	[460.0,467.5,469.0,469.0]
+47	470.0	477.0	479.0	[470.0,477.0,478.94,479.0]
+48	480.0	484.0	489.0	[480.0,484.0,489.0,489.0]
+49	490.0	494.5	498.0	[490.0,494.5,498.0,498.0]
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-02_222_1225909126411093990/10000
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-02_222_1225909126411093990/10000
+0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
+1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
+2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
+3	30.0	35.0	37.0	[30.0,35.0,37.0,37.0]
+4	41.0	42.5	47.0	[41.0,42.5,46.849999999999994,47.0]
+5	51.0	54.0	58.0	[51.0,54.0,58.0,58.0]
+6	64.0	66.5	69.0	[64.0,66.5,68.9,69.0]
+7	70.0	73.0	78.0	[70.0,73.0,77.91000000000001,78.0]
+8	80.0	84.0	87.0	[80.0,84.0,86.92,87.0]
+9	90.0	95.0	98.0	[90.0,95.0,98.0,98.0]
+10	100.0	103.0	105.0	[100.0,103.0,104.94,105.0]
+11	111.0	117.0	119.0	[111.0,117.0,119.0,119.0]
+12	120.0	127.0	129.0	[120.0,127.0,129.0,129.0]
+13	131.0	137.0	138.0	[131.0,137.0,138.0,138.0]
+14	143.0	146.0	149.0	[143.0,146.0,149.0,149.0]
+15	150.0	154.0	158.0	[150.0,154.0,157.92999999999998,158.0]
+16	160.0	166.5	169.0	[160.0,166.5,169.0,169.0]
+17	170.0	175.0	179.0	[170.0,175.0,179.0,179.0]
+18	180.0	186.5	189.0	[180.0,186.5,188.86,189.0]
+19	190.0	194.5	199.0	[190.0,194.5,199.0,199.0]
+20	200.0	205.0	209.0	[200.0,205.0,209.0,209.0]
+21	213.0	216.5	219.0	[213.0,216.5,219.0,219.0]
+22	221.0	224.0	229.0	[221.0,224.0,229.0,229.0]
+23	230.0	234.0	239.0	[230.0,234.0,239.0,239.0]
+24	241.0	244.0	249.0	[241.0,244.0,248.94,249.0]
+25	252.0	256.0	258.0	[252.0,256.0,257.94,258.0]
+26	260.0	264.0	266.0	[260.0,264.0,265.95,266.0]
+27	272.0	275.0	278.0	[272.0,275.0,278.0,278.0]
+28	280.0	283.5	289.0	[280.0,283.5,288.87,289.0]
+29	291.0	297.0	298.0	[291.0,297.0,298.0,298.0]
+30	302.0	307.0	309.0	[302.0,307.0,309.0,309.0]
+31	310.0	316.0	318.0	[310.0,316.0,318.0,318.0]
+32	321.0	324.0	327.0	[321.0,324.0,327.0,327.0]
+33	331.0	333.0	339.0	[331.0,333.0,338.92,339.0]
+34	341.0	345.0	348.0	[341.0,345.0,348.0,348.0]
+35	351.0	353.0	356.0	[351.0,353.0,355.91,356.0]
+36	360.0	367.0	369.0	[360.0,367.0,369.0,369.0]
+37	373.0	376.0	379.0	[373.0,376.0,378.95,379.0]
+38	382.0	384.0	389.0	[382.0,384.0,388.82,389.0]
+39	392.0	396.0	399.0	[392.0,396.0,399.0,399.0]
+40	400.0	403.5	409.0	[400.0,403.5,409.0,409.0]
+41	411.0	415.5	419.0	[411.0,415.5,418.91,419.0]
+42	421.0	425.5	429.0	[421.0,425.5,429.0,429.0]
+43	430.0	435.0	439.0	[430.0,435.0,439.0,439.0]
+44	443.0	446.0	449.0	[443.0,446.0,448.96,449.0]
+45	452.0	455.0	459.0	[452.0,455.0,459.0,459.0]
+46	460.0	467.5	469.0	[460.0,467.5,469.0,469.0]
+47	470.0	477.0	479.0	[470.0,477.0,478.94,479.0]
+48	480.0	484.0	489.0	[480.0,484.0,489.0,489.0]
+49	490.0	494.5	498.0	[490.0,494.5,498.0,498.0]
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-06_237_1106980062235474069/10000
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-06_237_1106980062235474069/10000
+0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
+1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
+2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
+3	30.0	35.0	37.0	[30.0,35.0,37.0,37.0]
+4	41.0	42.5	47.0	[41.0,42.5,46.849999999999994,47.0]
+5	51.0	54.0	58.0	[51.0,54.0,58.0,58.0]
+6	64.0	66.5	69.0	[64.0,66.5,68.9,69.0]
+7	70.0	73.0	78.0	[70.0,73.0,77.91000000000001,78.0]
+8	80.0	84.0	87.0	[80.0,84.0,86.92,87.0]
+9	90.0	95.0	98.0	[90.0,95.0,98.0,98.0]
+10	100.0	103.0	105.0	[100.0,103.0,104.94,105.0]
+11	111.0	117.0	119.0	[111.0,117.0,119.0,119.0]
+12	120.0	127.0	129.0	[120.0,127.0,129.0,129.0]
+13	131.0	137.0	138.0	[131.0,137.0,138.0,138.0]
+14	143.0	146.0	149.0	[143.0,146.0,149.0,149.0]
+15	150.0	154.0	158.0	[150.0,154.0,157.92999999999998,158.0]
+16	160.0	166.5	169.0	[160.0,166.5,169.0,169.0]
+17	170.0	175.0	179.0	[170.0,175.0,179.0,179.0]
+18	180.0	186.5	189.0	[180.0,186.5,188.86,189.0]
+19	190.0	194.5	199.0	[190.0,194.5,199.0,199.0]
+20	200.0	205.0	209.0	[200.0,205.0,209.0,209.0]
+21	213.0	216.5	219.0	[213.0,216.5,219.0,219.0]
+22	221.0	224.0	229.0	[221.0,224.0,229.0,229.0]
+23	230.0	234.0	239.0	[230.0,234.0,239.0,239.0]
+24	241.0	244.0	249.0	[241.0,244.0,248.94,249.0]
+25	252.0	256.0	258.0	[252.0,256.0,257.94,258.0]
+26	260.0	264.0	266.0	[260.0,264.0,265.95,266.0]
+27	272.0	275.0	278.0	[272.0,275.0,278.0,278.0]
+28	280.0	283.5	289.0	[280.0,283.5,288.87,289.0]
+29	291.0	297.0	298.0	[291.0,297.0,298.0,298.0]
+30	302.0	307.0	309.0	[302.0,307.0,309.0,309.0]
+31	310.0	316.0	318.0	[310.0,316.0,318.0,318.0]
+32	321.0	324.0	327.0	[321.0,324.0,327.0,327.0]
+33	331.0	333.0	339.0	[331.0,333.0,338.92,339.0]
+34	341.0	345.0	348.0	[341.0,345.0,348.0,348.0]
+35	351.0	353.0	356.0	[351.0,353.0,355.91,356.0]
+36	360.0	367.0	369.0	[360.0,367.0,369.0,369.0]
+37	373.0	376.0	379.0	[373.0,376.0,378.95,379.0]
+38	382.0	384.0	389.0	[382.0,384.0,388.82,389.0]
+39	392.0	396.0	399.0	[392.0,396.0,399.0,399.0]
+40	400.0	403.5	409.0	[400.0,403.5,409.0,409.0]
+41	411.0	415.5	419.0	[411.0,415.5,418.91,419.0]
+42	421.0	425.5	429.0	[421.0,425.5,429.0,429.0]
+43	430.0	435.0	439.0	[430.0,435.0,439.0,439.0]
+44	443.0	446.0	449.0	[443.0,446.0,448.96,449.0]
+45	452.0	455.0	459.0	[452.0,455.0,459.0,459.0]
+46	460.0	467.5	469.0	[460.0,467.5,469.0,469.0]
+47	470.0	477.0	479.0	[470.0,477.0,478.94,479.0]
+48	480.0	484.0	489.0	[480.0,484.0,489.0,489.0]
+49	490.0	494.5	498.0	[490.0,494.5,498.0,498.0]
+PREHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-13_357_1069357782802139112/10000
+POSTHOOK: query: SELECT CAST(key AS INT) DIV 10,
+       percentile(CAST(substr(value, 5) AS INT), 0.0),
+       percentile(CAST(substr(value, 5) AS INT), 0.5),
+       percentile(CAST(substr(value, 5) AS INT), 1.0),
+       percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0))
+FROM src
+GROUP BY CAST(key AS INT) DIV 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-13_357_1069357782802139112/10000
+0	0.0	4.5	9.0	[0.0,4.5,8.91,9.0]
+1	10.0	15.0	19.0	[10.0,15.0,18.91,19.0]
+2	20.0	26.0	28.0	[20.0,26.0,27.939999999999998,28.0]
+3	30.0	35.0	37.0	[30.0,35.0,37.0,37.0]
+4	41.0	42.5	47.0	[41.0,42.5,46.849999999999994,47.0]
+5	51.0	54.0	58.0	[51.0,54.0,58.0,58.0]
+6	64.0	66.5	69.0	[64.0,66.5,68.9,69.0]
+7	70.0	73.0	78.0	[70.0,73.0,77.91000000000001,78.0]
+8	80.0	84.0	87.0	[80.0,84.0,86.92,87.0]
+9	90.0	95.0	98.0	[90.0,95.0,98.0,98.0]
+10	100.0	103.0	105.0	[100.0,103.0,104.94,105.0]
+11	111.0	117.0	119.0	[111.0,117.0,119.0,119.0]
+12	120.0	127.0	129.0	[120.0,127.0,129.0,129.0]
+13	131.0	137.0	138.0	[131.0,137.0,138.0,138.0]
+14	143.0	146.0	149.0	[143.0,146.0,149.0,149.0]
+15	150.0	154.0	158.0	[150.0,154.0,157.92999999999998,158.0]
+16	160.0	166.5	169.0	[160.0,166.5,169.0,169.0]
+17	170.0	175.0	179.0	[170.0,175.0,179.0,179.0]
+18	180.0	186.5	189.0	[180.0,186.5,188.86,189.0]
+19	190.0	194.5	199.0	[190.0,194.5,199.0,199.0]
+20	200.0	205.0	209.0	[200.0,205.0,209.0,209.0]
+21	213.0	216.5	219.0	[213.0,216.5,219.0,219.0]
+22	221.0	224.0	229.0	[221.0,224.0,229.0,229.0]
+23	230.0	234.0	239.0	[230.0,234.0,239.0,239.0]
+24	241.0	244.0	249.0	[241.0,244.0,248.94,249.0]
+25	252.0	256.0	258.0	[252.0,256.0,257.94,258.0]
+26	260.0	264.0	266.0	[260.0,264.0,265.95,266.0]
+27	272.0	275.0	278.0	[272.0,275.0,278.0,278.0]
+28	280.0	283.5	289.0	[280.0,283.5,288.87,289.0]
+29	291.0	297.0	298.0	[291.0,297.0,298.0,298.0]
+30	302.0	307.0	309.0	[302.0,307.0,309.0,309.0]
+31	310.0	316.0	318.0	[310.0,316.0,318.0,318.0]
+32	321.0	324.0	327.0	[321.0,324.0,327.0,327.0]
+33	331.0	333.0	339.0	[331.0,333.0,338.92,339.0]
+34	341.0	345.0	348.0	[341.0,345.0,348.0,348.0]
+35	351.0	353.0	356.0	[351.0,353.0,355.91,356.0]
+36	360.0	367.0	369.0	[360.0,367.0,369.0,369.0]
+37	373.0	376.0	379.0	[373.0,376.0,378.95,379.0]
+38	382.0	384.0	389.0	[382.0,384.0,388.82,389.0]
+39	392.0	396.0	399.0	[392.0,396.0,399.0,399.0]
+40	400.0	403.5	409.0	[400.0,403.5,409.0,409.0]
+41	411.0	415.5	419.0	[411.0,415.5,418.91,419.0]
+42	421.0	425.5	429.0	[421.0,425.5,429.0,429.0]
+43	430.0	435.0	439.0	[430.0,435.0,439.0,439.0]
+44	443.0	446.0	449.0	[443.0,446.0,448.96,449.0]
+45	452.0	455.0	459.0	[452.0,455.0,459.0,459.0]
+46	460.0	467.5	469.0	[460.0,467.5,469.0,469.0]
+47	470.0	477.0	479.0	[470.0,477.0,478.94,479.0]
+48	480.0	484.0	489.0	[480.0,484.0,489.0,489.0]
+49	490.0	494.5	498.0	[490.0,494.5,498.0,498.0]



Mime
View raw message