Return-Path: Delivered-To: apmail-hadoop-hive-commits-archive@minotaur.apache.org Received: (qmail 74841 invoked from network); 1 Mar 2010 08:27:12 -0000 Received: from unknown (HELO mail.apache.org) (140.211.11.3) by 140.211.11.9 with SMTP; 1 Mar 2010 08:27:12 -0000 Received: (qmail 55230 invoked by uid 500); 28 Feb 2010 10:40:31 -0000 Delivered-To: apmail-hadoop-hive-commits-archive@hadoop.apache.org Received: (qmail 55182 invoked by uid 500); 28 Feb 2010 10:40:31 -0000 Mailing-List: contact hive-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hadoop.apache.org Delivered-To: mailing list hive-commits@hadoop.apache.org Received: (qmail 55174 invoked by uid 99); 28 Feb 2010 10:40:31 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 28 Feb 2010 10:40:31 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 28 Feb 2010 10:40:20 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id C966A238890A; Sun, 28 Feb 2010 10:39:58 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r917160 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/udf/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ Date: Sun, 28 Feb 2010 10:39:58 -0000 To: hive-commits@hadoop.apache.org From: heyongqiang@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100228103958.C966A238890A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: heyongqiang Date: Sun Feb 28 10:39:58 2010 New Revision: 917160 URL: http://svn.apache.org/viewvc?rev=917160&view=rev Log: HIVE-259. Add PERCENTILE aggregate function.(Jerome Boulon, Zheng via He Yongqiang) Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out Modified: hadoop/hive/trunk/CHANGES.txt hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Modified: hadoop/hive/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=917160&r1=917159&r2=917160&view=diff ============================================================================== --- hadoop/hive/trunk/CHANGES.txt (original) +++ hadoop/hive/trunk/CHANGES.txt Sun Feb 28 10:39:58 2010 @@ -39,6 +39,9 @@ HIVE-1193. ensure sorting properties for a table. (Namit via He Yongqiang) + HIVE-259. Add PERCENTILE aggregate function. + (Jerome Boulon, Zheng via He Yongqiang) + IMPROVEMENTS HIVE-983. Function from_unixtime takes long. (Ning Zhang via zshao) Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java?rev=917160&r1=917159&r2=917160&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (original) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java Sun Feb 28 10:39:58 2010 @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.udf.UDAFPercentile; import org.apache.hadoop.hive.ql.udf.UDFAbs; import org.apache.hadoop.hive.ql.udf.UDFAcos; import org.apache.hadoop.hive.ql.udf.UDFAscii; @@ -167,7 +168,6 @@ /** * FunctionRegistry. - * */ public final class FunctionRegistry { @@ -322,6 +322,8 @@ registerGenericUDAF("var_pop", new GenericUDAFVariance()); registerGenericUDAF("var_samp", new GenericUDAFVarianceSample()); + registerUDAF("percentile", UDAFPercentile.class); + // Generic UDFs registerGenericUDF("array", GenericUDFArray.class); registerGenericUDF("map", GenericUDFMap.class); Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java?rev=917160&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java (added) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFPercentile.java Sun Feb 28 10:39:58 2010 @@ -0,0 +1,282 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDAF; +import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.io.LongWritable; + +/** + * UDAF for calculating the percentile values. + * There are several definitions of percentile, and we take the method recommended by + * NIST. + * @see http://en.wikipedia.org/wiki/Percentile#Alternative_methods + */ +@Description(name = "percentile", + value = "_FUNC_(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1])." + + "pc can be a double or double array") +public class UDAFPercentile extends UDAF { + + /** + * A state class to store intermediate aggregation results. + */ + public static class State { + private Map counts; + private List percentiles; + } + + /** + * A comparator to sort the entries in order. + */ + public static class MyComparator implements Comparator> { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getKey().compareTo(o2.getKey()); + } + } + + /** + * Increment the State object with o as the key, and i as the count. + */ + private static void increment(State s, LongWritable o, long i) { + if (s.counts == null) { + s.counts = new HashMap(); + } + LongWritable count = s.counts.get(o); + if (count == null) { + // We have to create a new object, because the object o belongs + // to the code that creates it and may get its value changed. + LongWritable key = new LongWritable(); + key.set(o.get()); + s.counts.put(key, new LongWritable(i)); + } else { + count.set(count.get() + i); + } + } + + /** + * Get the percentile value. + */ + private static double getPercentile(List> entriesList, + double position) { + // We may need to do linear interpolation to get the exact percentile + long lower = (long)Math.floor(position); + long higher = (long)Math.ceil(position); + + // Linear search since this won't take much time from the total execution anyway + // lower has the range of [0 .. total-1] + // The first entry with accumulated count (lower+1) corresponds to the lower position. + int i = 0; + while (entriesList.get(i).getValue().get() < lower + 1) { + i++; + } + + long lowerKey = entriesList.get(i).getKey().get(); + if (higher == lower) { + // no interpolation needed because position does not have a fraction + return lowerKey; + } + + if (entriesList.get(i).getValue().get() < higher + 1) { + i++; + } + long higherKey = entriesList.get(i).getKey().get(); + + if (higherKey == lowerKey) { + // no interpolation needed because lower position and higher position has the same key + return lowerKey; + } + + // Linear interpolation to get the exact percentile + return (higher - position) * lowerKey + (position - lower) * higherKey; + } + + + /** + * The evaluator for percentile computation based on long. + */ + public static class PercentileLongEvaluator implements UDAFEvaluator { + + private State state; + + public PercentileLongEvaluator() { + state = new State(); + } + + public void init() { + if (state.counts != null) { + state.counts.clear(); + } + } + + public boolean iterate(LongWritable o, double percentile) { + if (state.percentiles == null) { + state.percentiles = new ArrayList(1); + state.percentiles.add(new DoubleWritable(percentile)); + } + if (o != null) { + increment(state, o, 1); + } + return true; + } + + public State terminatePartial() { + return state; + } + + public boolean merge(State other) { + if (state.percentiles == null) { + state.percentiles = new ArrayList(other.percentiles); + } + for (Map.Entry e: other.counts.entrySet()) { + increment(state, e.getKey(), e.getValue().get()); + } + return true; + } + + private DoubleWritable result; + + public DoubleWritable terminate() { + // No input data. + if (state.counts == null) { + return null; + } + + // Get all items into an array and sort them. + Set> entries = state.counts.entrySet(); + List> entriesList = + new ArrayList>(entries); + Collections.sort(entriesList, new MyComparator()); + + // Accumulate the counts. + long total = 0; + for (int i = 0; i < entriesList.size(); i++) { + LongWritable count = entriesList.get(i).getValue(); + total += count.get(); + count.set(total); + } + + // Initialize the result. + if (result == null) { + result = new DoubleWritable(); + } + + // maxPosition is the 1.0 percentile + long maxPosition = total - 1; + double position = maxPosition * state.percentiles.get(0).get(); + result.set(getPercentile(entriesList, position)); + return result; + } + } + + /** + * The evaluator for percentile computation based on long for an array of percentiles. + */ + public static class PercentileLongArrayEvaluator implements UDAFEvaluator { + + private State state; + + public PercentileLongArrayEvaluator() { + state = new State(); + } + + public void init() { + if (state.counts != null) { + state.counts.clear(); + } + } + + public boolean iterate(LongWritable o, List percentiles) { + if (state.percentiles == null) { + state.percentiles = new ArrayList(percentiles); + } + if (o != null) { + increment(state, o, 1); + } + return true; + } + + public State terminatePartial() { + return state; + } + + public boolean merge(State other) { + if (state.percentiles == null) { + state.percentiles = new ArrayList(other.percentiles); + } + for (Map.Entry e: other.counts.entrySet()) { + increment(state, e.getKey(), e.getValue().get()); + } + return true; + } + + + private List results; + + public List terminate() { + // No input data + if (state.counts == null) { + return null; + } + + // Get all items into an array and sort them + Set> entries = state.counts.entrySet(); + List> entriesList = + new ArrayList>(entries); + Collections.sort(entriesList, new MyComparator()); + + // accumulate the counts + long total = 0; + for (int i = 0; i < entriesList.size(); i++) { + LongWritable count = entriesList.get(i).getValue(); + total += count.get(); + count.set(total); + } + + // maxPosition is the 1.0 percentile + long maxPosition = total - 1; + + // Initialize the results + if (results == null) { + results = new ArrayList(); + for (int i = 0; i < state.percentiles.size(); i++) { + results.add(new DoubleWritable()); + } + } + // Set the results + for (int i = 0; i < state.percentiles.size(); i++) { + double position = maxPosition * state.percentiles.get(i).get(); + results.get(i).set(getPercentile(entriesList, position)); + } + return results; + } + } + +} Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q?rev=917160&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q (added) +++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/udf_percentile.q Sun Feb 28 10:39:58 2010 @@ -0,0 +1,51 @@ +DESCRIBE FUNCTION percentile; +DESCRIBE FUNCTION EXTENDED percentile; + + +set hive.map.aggr = false; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = false; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + + +set hive.map.aggr = false; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; + + +set hive.map.aggr = true; +set hive.groupby.skewindata = true; + +SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10; Modified: hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out?rev=917160&r1=917159&r2=917160&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out (original) +++ hadoop/hive/trunk/ql/src/test/results/clientpositive/show_functions.q.out Sun Feb 28 10:39:58 2010 @@ -82,6 +82,7 @@ not or parse_url +percentile pmod positive pow @@ -154,6 +155,7 @@ locate minute negative +percentile positive regexp_replace reverse Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out?rev=917160&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out (added) +++ hadoop/hive/trunk/ql/src/test/results/clientpositive/udf_percentile.q.out Sun Feb 28 10:39:58 2010 @@ -0,0 +1,290 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-30-57_926_4752964414489468009/10000 +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-30-57_926_4752964414489468009/10000 +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-02_222_1225909126411093990/10000 +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-02_222_1225909126411093990/10000 +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-06_237_1106980062235474069/10000 +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-06_237_1106980062235474069/10000 +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-13_357_1069357782802139112/10000 +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/zshao/hadoop_hive_trunk/build/ql/scratchdir/hive_2010-02-28_01-31-13_357_1069357782802139112/10000 +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0]