Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 8DA33200CCB for ; Thu, 20 Jul 2017 12:16:32 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 8C24F16B02B; Thu, 20 Jul 2017 10:16:32 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 5F24516B02C for ; Thu, 20 Jul 2017 12:16:30 +0200 (CEST) Received: (qmail 95821 invoked by uid 500); 20 Jul 2017 10:16:29 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 95422 invoked by uid 99); 20 Jul 2017 10:16:28 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 20 Jul 2017 10:16:28 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 10851F3270; Thu, 20 Jul 2017 10:16:27 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mmccline@apache.org To: commits@hive.apache.org Date: Thu, 20 Jul 2017 10:16:59 -0000 Message-Id: <5b30a21c5ce24939969ef0c4948d2454@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [33/36] hive git commit: HIVE-16369: Vectorization: Support PTF (Part 1: No Custom Window Framing -- Default Only) (Matt McCline, reviewed by Ashutosh Chauhan) archived-at: Thu, 20 Jul 2017 10:16:32 -0000 http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing.q b/ql/src/test/queries/clientpositive/vector_windowing.q new file mode 100644 index 0000000..4bcd77e --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing.q @@ -0,0 +1,791 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +set hive.mapred.mode=nonstrict; +set mapred.reduce.tasks=4; +-- SORT_QUERY_RESULTS + +-- 1. testWindowing +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; + +-- 2. testGroupByWithPartitioning +explain vectorization detail +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +; +select p_mfgr, p_name, p_size, +min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name)as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +; + +-- 3. testGroupByHavingWithSWQ +explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; +select p_mfgr, p_name, p_size, min(p_retailprice), +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; + +-- 4. testCount +explain vectorization detail +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +; +select p_mfgr, p_name, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd +from part +; + +-- 5. testCountWithWindowingUDAF +explain vectorization detail +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +; +select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +; + +-- 6. testCountInSubQ +explain vectorization detail +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1; +select sub1.r, sub1.dr, sub1.cd, sub1.s1, sub1.deltaSz +from (select p_mfgr, p_name, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +count(p_size) over(distribute by p_mfgr sort by p_name) as cd, +p_retailprice, round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +) sub1; + +-- 7. testJoinWithWindowingAndPTF +explain vectorization detail +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; +select abc.p_mfgr, abc.p_name, +rank() over(distribute by abc.p_mfgr sort by abc.p_name) as r, +dense_rank() over(distribute by abc.p_mfgr sort by abc.p_name) as dr, +abc.p_retailprice, round(sum(abc.p_retailprice) over (distribute by abc.p_mfgr sort by abc.p_name rows between unbounded preceding and current row),2) as s1, +abc.p_size, abc.p_size - lag(abc.p_size,1,abc.p_size) over(distribute by abc.p_mfgr sort by abc.p_name) as deltaSz +from noop(on part +partition by p_mfgr +order by p_name +) abc join part p1 on abc.p_partkey = p1.p_partkey +; + +-- 8. testMixedCaseAlias +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name, p_size desc) as R +from part +; + +-- 9. testHavingWithWindowingNoGBY +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; + +-- 10. testHavingWithWindowingCondRankNoGBY +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s1 +from part +; + +-- 11. testFirstLast +explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 12. testFirstLastWithWhere +explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2, +first_value(p_size) over w1 as f, +last_value(p_size, false) over w1 as l +from part +where p_mfgr = 'Manufacturer#3' +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 13. testSumWindow +explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over (distribute by p_mfgr sort by p_name rows between current row and current row) as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 14. testNoSortClause +explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, dense_rank() over(distribute by p_mfgr sort by p_name) as dr +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 15. testExpressions +explain vectorization detail +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +percent_rank() over(distribute by p_mfgr sort by p_name) as pr, +ntile(3) over(distribute by p_mfgr sort by p_name) as nt, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +avg(p_size) over(distribute by p_mfgr sort by p_name) as avg, +stddev(p_size) over(distribute by p_mfgr sort by p_name) as st, +first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv, +last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 16. testMultipleWindows +explain vectorization detail +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, + rank() over(distribute by p_mfgr sort by p_name) as r, + dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +sum(p_size) over (distribute by p_mfgr sort by p_name range between unbounded preceding and current row) as s1, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row) as s2, +first_value(p_size) over w1 as fv1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 17. testCountStar +explain vectorization detail +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name ) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fvW1 +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 18. testUDAFs +explain vectorization detail +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) over w1 as mi, +max(p_retailprice) over w1 as ma, +round(avg(p_retailprice) over w1,2) as ag +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 19. testUDAFsWithGBY +explain vectorization detail +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, p_retailprice, +round(sum(p_retailprice) over w1,2) as s, +min(p_retailprice) as mi , +max(p_retailprice) as ma , +round(avg(p_retailprice) over w1,2) as ag +from part +group by p_mfgr,p_name, p_size, p_retailprice +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 20. testSTATs +explain vectorization detail +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +stddev(p_retailprice) over w1 as sdev, +stddev_pop(p_retailprice) over w1 as sdev_pop, +collect_set(p_size) over w1 as uniq_size, +variance(p_retailprice) over w1 as var, +round(corr(p_size, p_retailprice) over w1,5) as cor, +covar_pop(p_size, p_retailprice) over w1 as covarp +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 21. testDISTs +explain vectorization detail +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +histogram_numeric(p_retailprice, 5) over w1 as hist, +percentile(p_partkey, 0.5) over w1 as per, +row_number() over(distribute by p_mfgr sort by p_mfgr, p_name) as rn +from part +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +-- 22. testViewAsTableInputWithWindowing +explain vectorization detail +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand; +create view IF NOT EXISTS mfgr_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice),2) as s +from part +group by p_mfgr, p_brand; + +explain vectorization detail +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand; +select * +from ( +select p_mfgr, p_brand, s, +round(sum(s) over w1 , 2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_mfgr ) +) sq +order by p_mfgr, p_brand; + +select p_mfgr, p_brand, s, +round(sum(s) over w1 ,2) as s1 +from mfgr_price_view +window w1 as (distribute by p_mfgr sort by p_brand rows between 2 preceding and current row); + +-- 23. testCreateViewWithWindowingQuery +explain vectorization detail +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row); +create view IF NOT EXISTS mfgr_brand_price_view as +select p_mfgr, p_brand, +round(sum(p_retailprice) over w1,2) as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and current row); + +explain vectorization detail +select * from mfgr_brand_price_view; +select * from mfgr_brand_price_view; + +-- 24. testLateralViews +explain vectorization detail +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row); +select p_mfgr, p_name, +lv_col, p_size, sum(p_size) over w1 as s +from (select p_mfgr, p_name, p_size, array(1,2,3) arr from part) p +lateral view explode(arr) part_lv as lv_col +window w1 as (distribute by p_mfgr sort by p_size, lv_col rows between 2 preceding and current row); + +-- 25. testMultipleInserts3SWQs +CREATE TABLE part_1( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +s DOUBLE); + +CREATE TABLE part_2( +p_mfgr STRING, +p_name STRING, +p_size INT, +r INT, +dr INT, +cud INT, +s2 DOUBLE, +fv1 INT); + +CREATE TABLE part_3( +p_mfgr STRING, +p_name STRING, +p_size INT, +c INT, +ca INT, +fv INT); + +explain vectorization detail +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); +from part +INSERT OVERWRITE TABLE part_1 +select p_mfgr, p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name ) as r, +dense_rank() over(distribute by p_mfgr sort by p_name ) as dr, +round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between unbounded preceding and current row),2) as s +INSERT OVERWRITE TABLE part_2 +select p_mfgr,p_name, p_size, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +cume_dist() over(distribute by p_mfgr sort by p_name) as cud, +round(sum(p_size) over (distribute by p_mfgr sort by p_size range between 5 preceding and current row),1) as s2, +first_value(p_size) over w1 as fv1 +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following) +INSERT OVERWRITE TABLE part_3 +select p_mfgr,p_name, p_size, +count(*) over(distribute by p_mfgr sort by p_name) as c, +count(p_size) over(distribute by p_mfgr sort by p_name) as ca, +first_value(p_size) over w1 as fv +window w1 as (distribute by p_mfgr sort by p_mfgr, p_name rows between 2 preceding and 2 following); + +select * from part_1; + +select * from part_2; + +select * from part_3; + +-- 26. testGroupByHavingWithSWQAndAlias +explain vectorization detail +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; +select p_mfgr, p_name, p_size, min(p_retailprice) as mi, +rank() over(distribute by p_mfgr sort by p_name) as r, +dense_rank() over(distribute by p_mfgr sort by p_name) as dr, +p_size, p_size - lag(p_size,1,p_size) over(distribute by p_mfgr sort by p_name) as deltaSz +from part +group by p_mfgr, p_name, p_size +having p_size > 0 +; + +-- 27. testMultipleRangeWindows +explain vectorization detail +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following); +select p_mfgr,p_name, p_size, +sum(p_size) over (distribute by p_mfgr sort by p_size range between 10 preceding and current row) as s2, +sum(p_size) over (distribute by p_mfgr sort by p_size range between current row and 10 following ) as s1 +from part +window w1 as (rows between 2 preceding and 2 following); + +-- 28. testPartOrderInUDAFInvoke +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part; +select p_mfgr, p_name, p_size, +sum(p_size) over (partition by p_mfgr order by p_name rows between 2 preceding and 2 following) as s +from part; + +-- 29. testPartOrderInWdwDef +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (partition by p_mfgr order by p_name rows between 2 preceding and 2 following); + +-- 30. testDefaultPartitioningSpecRules +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s, +sum(p_size) over w2 as s2 +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following), + w2 as (partition by p_mfgr order by p_name); + +-- 31. testWindowCrossReference +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1; +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as w1; + + +-- 32. testWindowInheritance +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2 +from part +window w1 as (partition by p_mfgr order by p_name range between 2 preceding and 2 following), + w2 as (w1 rows between unbounded preceding and current row); + + +-- 33. testWindowForwardReference +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over w3 as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); + + +-- 34. testWindowDefinitionPropagation +explain vectorization detail +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); +select p_mfgr, p_name, p_size, +sum(p_size) over w1 as s1, +sum(p_size) over w2 as s2, +sum(p_size) over (w3 rows between 2 preceding and 2 following) as s3 +from part +window w1 as (distribute by p_mfgr sort by p_name range between 2 preceding and 2 following), + w2 as w3, + w3 as (distribute by p_mfgr sort by p_name range between unbounded preceding and current row); + +-- 35. testDistinctWithWindowing +explain vectorization detail +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +select DISTINCT p_mfgr, p_name, p_size, +sum(p_size) over w1 as s +from part +window w1 as (distribute by p_mfgr sort by p_name rows between 2 preceding and 2 following); + +-- 36. testRankWithPartitioning +explain vectorization detail +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part; +select p_mfgr, p_name, p_size, +rank() over (partition by p_mfgr order by p_name ) as r +from part; + +-- 37. testPartitioningVariousForms +explain vectorization detail +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part; +select p_mfgr, +round(sum(p_retailprice) over (partition by p_mfgr order by p_mfgr),2) as s1, +min(p_retailprice) over (partition by p_mfgr) as s2, +max(p_retailprice) over (distribute by p_mfgr sort by p_mfgr) as s3, +round(avg(p_retailprice) over (distribute by p_mfgr),2) as s4, +count(p_retailprice) over (cluster by p_mfgr ) as s5 +from part; + +-- 38. testPartitioningVariousForms2 +explain vectorization detail +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part; +select p_mfgr, p_name, p_size, +round(sum(p_retailprice) over (partition by p_mfgr, p_name order by p_mfgr, p_name rows between unbounded preceding and current row),2) as s1, +min(p_retailprice) over (distribute by p_mfgr, p_name sort by p_mfgr, p_name rows between unbounded preceding and current row) as s2, +max(p_retailprice) over (partition by p_mfgr, p_name order by p_name) as s3 +from part; + +-- 39. testUDFOnOrderCols +explain vectorization detail +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part; +select p_mfgr, p_type, substr(p_type, 2) as short_ptype, +rank() over (partition by p_mfgr order by substr(p_type, 2)) as r +from part; + +-- 40. testNoBetweenForRows +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part ; +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows unbounded preceding),2) as s1 + from part ; + +-- 41. testNoBetweenForRange +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part ; + +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range unbounded preceding),2) as s1 + from part ; + +-- 42. testUnboundedFollowingForRows +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part ; +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_name rows between current row and unbounded following),2) as s1 + from part ; + +-- 43. testUnboundedFollowingForRange +explain vectorization detail +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part ; +select p_mfgr, p_name, p_size, + round(sum(p_retailprice) over (distribute by p_mfgr sort by p_size range between current row and unbounded following),2) as s1 + from part ; + +-- 44. testOverNoPartitionSingleAggregate +explain vectorization detail +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name; +select p_name, p_retailprice, +round(avg(p_retailprice) over(),2) +from part +order by p_name; + +-- 45. empty partition test +explain vectorization detail +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +; +select p_mfgr, + sum(p_size) over (partition by p_mfgr order by p_size rows between unbounded preceding and current row) +from part +where p_mfgr = 'Manufacturer#6' +; + +-- 46. window sz is same as partition sz +explain vectorization detail +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1'; +select p_retailprice, round(avg(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2), +round(sum(p_retailprice) over (partition by p_mfgr order by p_name rows between current row and 6 following),2) +from part +where p_mfgr='Manufacturer#1'; + +-- 47. empty partition +explain vectorization detail +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1'; +select sum(p_size) over (partition by p_mfgr ) +from part where p_mfgr = 'm1'; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_expressions.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_expressions.q b/ql/src/test/queries/clientpositive/vector_windowing_expressions.q new file mode 100644 index 0000000..7d8c5d5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_expressions.q @@ -0,0 +1,94 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) = round(sum(lag(p_retailprice,1,0.0)) over w1 + last_value(p_retailprice) over w1 , 2), +max(p_retailprice) over w1 - min(p_retailprice) over w1 = last_value(p_retailprice) over w1 - first_value(p_retailprice) over w1 +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +; + +explain vectorization detail +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; +select p_mfgr, p_retailprice, p_size, +rank() over (distribute by p_mfgr sort by p_retailprice) as r, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) as s2, +sum(p_retailprice) over (distribute by p_mfgr sort by p_retailprice rows between unbounded preceding and current row) -5 as s1 +from part +; + +explain vectorization detail +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100; +select s, si, f, si - lead(f, 3) over (partition by t order by bo,s,si,f desc) from over10k limit 100; +explain vectorization detail +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100; +select s, i, i - lead(i, 3, 0) over (partition by si order by i,s) from over10k limit 100; +explain vectorization detail +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100; +select s, si, d, si - lag(d, 3) over (partition by b order by si,s,d) from over10k limit 100; +explain vectorization detail +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100; +select s, lag(s, 3, 'fred') over (partition by f order by b) from over10k limit 100; + +explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr, p_type order by p_mfgr) from part; + +explain vectorization detail +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; +select p_mfgr, avg(p_retailprice) over(partition by p_mfgr order by p_type,p_mfgr rows between unbounded preceding and current row) from part; + +-- multi table insert test +create table t1 (a1 int, b1 string); +create table t2 (a1 int, b1 string); +explain vectorization detail +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +from (select sum(i) over (partition by ts order by i), s from over10k) tt insert overwrite table t1 select * insert overwrite table t2 select * ; +select * from t1 limit 3; +select * from t2 limit 3; + +explain vectorization detail +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11; +select p_mfgr, p_retailprice, p_size, +round(sum(p_retailprice) over w1 , 2) + 50.0 = round(sum(lag(p_retailprice,1,50.0)) over w1 + (last_value(p_retailprice) over w1),2) +from part +window w1 as (distribute by p_mfgr sort by p_retailprice) +limit 11; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_gby.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_gby.q b/ql/src/test/queries/clientpositive/vector_windowing_gby.q new file mode 100644 index 0000000..c7e9e7c --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_gby.q @@ -0,0 +1,21 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +set hive.mapred.mode=nonstrict; + +explain vectorization detail + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +; + + select rank() over (order by return_ratio) as return_rank from + (select sum(wr.cint)/sum(ws.c_int) as return_ratio + from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 + group by ws.c_boolean ) in_web +; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_gby2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_gby2.q b/ql/src/test/queries/clientpositive/vector_windowing_gby2.q new file mode 100644 index 0000000..2f51bcb --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_gby2.q @@ -0,0 +1,48 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +set hive.mapred.mode=nonstrict; + +explain vectorization detail +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +select rank() over (order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by ws.key; + +explain vectorization detail +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +select avg(cast(ws.key as int)) over (partition by min(ws.value) order by sum(ws.c_int)) as return_rank +from cbo_t3 ws +group by cast(ws.key as int); + +explain vectorization detail +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +select rank () over(partition by key order by sum(c_int - c_float) desc) , +dense_rank () over(partition by lower(value) order by sum(c_float/c_int) asc), +percent_rank () over(partition by max(c_int) order by sum((c_float/c_int) - c_int) asc) +from cbo_t3 +group by key, value; + +explain vectorization detail +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; + +select rank() over (order by sum(wr.cint)/sum(ws.c_int)) as return_rank +from cbo_t3 ws join alltypesorc wr on ws.value = wr.cstring1 +group by ws.c_boolean; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q b/ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q new file mode 100644 index 0000000..cdd6e03 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_multipartitioning.q @@ -0,0 +1,73 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k; +select s, rank() over (partition by s order by si), sum(b) over (partition by s order by si) from over10k; + +explain vectorization detail +select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck'; +select s, +rank() over (partition by s order by `dec` desc), +sum(b) over (partition by s order by ts desc) +from over10k +where s = 'tom allen' or s = 'bob steinbeck'; + +explain vectorization detail +select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' ; +select s, sum(i) over (partition by s), sum(f) over (partition by si) from over10k where s = 'tom allen' or s = 'bob steinbeck' ; + +explain vectorization detail +select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck'; +select s, rank() over (partition by s order by bo), rank() over (partition by si order by bin desc) from over10k +where s = 'tom allen' or s = 'bob steinbeck'; + +explain vectorization detail +select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck'; +select s, sum(f) over (partition by i), row_number() over (order by f) from over10k where s = 'tom allen' or s = 'bob steinbeck'; + +explain vectorization detail +select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +; +select s, rank() over w1, +rank() over w2 +from over10k +where s = 'tom allen' or s = 'bob steinbeck' +window +w1 as (partition by s order by `dec`), +w2 as (partition by si order by f) +; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_navfn.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_navfn.q b/ql/src/test/queries/clientpositive/vector_windowing_navfn.q index 9acbe97..22011cf 100644 --- a/ql/src/test/queries/clientpositive/vector_windowing_navfn.q +++ b/ql/src/test/queries/clientpositive/vector_windowing_navfn.q @@ -1,6 +1,8 @@ set hive.explain.user=false; set hive.cli.print.header=true; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; set hive.fetch.task.conversion=none; drop table over10k; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_order_null.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_order_null.q b/ql/src/test/queries/clientpositive/vector_windowing_order_null.q new file mode 100644 index 0000000..35d260d --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_order_null.q @@ -0,0 +1,58 @@ +set hive.explain.user=false; +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; +load data local inpath '../../data/files/over4_null' into table over10k; + +explain vectorization detail +select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10; +select i, s, b, sum(b) over (partition by i order by s nulls last,b rows unbounded preceding) from over10k limit 10; + +explain vectorization detail +select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10; +select d, s, f, sum(f) over (partition by d order by s,f desc nulls first rows unbounded preceding) from over10k limit 10; + +explain vectorization detail +select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10; +select ts, s, f, sum(f) over (partition by ts order by f asc nulls first range between current row and unbounded following) from over10k limit 10; + +explain vectorization detail +select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10; +select t, s, d, avg(d) over (partition by t order by s,d desc nulls first rows between 5 preceding and 5 following) from over10k limit 10; + +explain vectorization detail +select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3; +select ts, s, sum(i) over(partition by ts order by s nulls last) from over10k limit 10 offset 3; + +explain vectorization detail +select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5; +select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k limit 5; + +explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5; +select s, i, round(avg(d) over (partition by s order by i desc nulls last) / 10.0 , 3) from over10k limit 5; + +explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5; +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),3) from over10k window w1 as (partition by s order by i nulls last) limit 5; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q b/ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q new file mode 100644 index 0000000..694431c --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_range_multiorder.q @@ -0,0 +1,68 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select first_value(t) over ( partition by si order by i, b ) from over10k limit 100; +select first_value(t) over ( partition by si order by i, b ) from over10k limit 100; + +explain vectorization detail +select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100; +select last_value(i) over (partition by si, bo order by i, f desc range current row) from over10k limit 100; + +explain vectorization detail +select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100; +select row_number() over (partition by si, bo order by i, f desc range between unbounded preceding and unbounded following) from over10k limit 100; + +explain vectorization detail +select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k; +select s, si, i, avg(i) over (partition by s range between unbounded preceding and current row) from over10k; + +explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, avg(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, min(i) over (partition by s order by si, i range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100; +select s, si, i, avg(i) over (partition by s order by si, i desc range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100; +select si, bo, i, f, max(i) over (partition by si, bo order by i, f desc range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100; +select bo, rank() over (partition by i order by bo nulls first, b nulls last range between unbounded preceding and unbounded following) from over10k limit 100; + +explain vectorization detail +select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; +select CAST(s as CHAR(12)), rank() over (partition by i order by CAST(s as CHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; + +explain vectorization detail +select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; +select CAST(s as VARCHAR(12)), rank() over (partition by i order by CAST(s as VARCHAR(12)) nulls last range between unbounded preceding and unbounded following) from over10k limit 100; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_rank.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_rank.q b/ql/src/test/queries/clientpositive/vector_windowing_rank.q new file mode 100644 index 0000000..9f36330 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_rank.q @@ -0,0 +1,117 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select s, rank() over (partition by f order by t) from over10k limit 100; +select s, rank() over (partition by f order by t) from over10k limit 100; + +explain vectorization detail +select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100; +select s, dense_rank() over (partition by ts order by i,s desc) from over10k limit 100; + +explain vectorization detail +select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100; +select s, cume_dist() over (partition by bo order by b,s) from over10k limit 100; + +explain vectorization detail +select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100; +select s, percent_rank() over (partition by `dec` order by f) from over10k limit 100; + +-- If following tests fail, look for the comments in class PTFPPD::process() + +explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10; +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where rnk = 1 limit 10; + +explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10; +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + ) joined + ) ranked +where `dec` = 89.5 limit 10; + +explain vectorization detail +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10; +select ts, `dec`, rnk +from + (select ts, `dec`, + rank() over (partition by ts order by `dec`) as rnk + from + (select other.ts, other.`dec` + from over10k other + join over10k on (other.b = over10k.b) + where other.t < 10 + ) joined + ) ranked +where rnk = 1 limit 10; + http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_streaming.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_streaming.q b/ql/src/test/queries/clientpositive/vector_windowing_streaming.q new file mode 100644 index 0000000..424261a --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_streaming.q @@ -0,0 +1,85 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal(4,2), + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +set hive.limit.pushdown.memory.usage=.8; + +-- part tests +explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +; + +explain vectorization detail +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4; + +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 4; + +select * +from ( select p_mfgr, rank() over(partition by p_mfgr order by p_name) r from part) a +where r < 2; + +-- over10k tests +explain vectorization detail +select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5; + +select * +from (select t, f, rank() over(partition by t order by f) r from over10k) a +where r < 6 and t < 5; + +select * +from (select t, f, row_number() over(partition by t order by f) r from over10k) a +where r < 8 and t < 0; + +set hive.vectorized.execution.enabled=false; +set hive.limit.pushdown.memory.usage=0.8; + +explain vectorization detail +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; + +drop table if exists sB; +create table sB ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; + +select * from sB +where ctinyint is null; + +set hive.vectorized.execution.enabled=true; +set hive.limit.pushdown.memory.usage=0.8; +drop table if exists sD; + +explain vectorization detail +create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; +create table sD ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE as +select * from (select ctinyint, cdouble, rank() over(partition by ctinyint order by cdouble) r from alltypesorc) a where r < 5; + +select * from sD +where ctinyint is null; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_windowspec.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_windowspec.q b/ql/src/test/queries/clientpositive/vector_windowing_windowspec.q new file mode 100644 index 0000000..f2836c6 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_windowspec.q @@ -0,0 +1,70 @@ +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table over10k; + +create table over10k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + `dec` decimal, + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/over10k' into table over10k; + +explain vectorization detail +select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100; +select s, sum(b) over (partition by i order by s,b rows unbounded preceding) from over10k limit 100; + +explain vectorization detail +select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100; +select s, sum(f) over (partition by d order by s,f rows unbounded preceding) from over10k limit 100; + +explain vectorization detail +select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100; +select s, sum(f) over (partition by ts order by f range between current row and unbounded following) from over10k limit 100; + +explain vectorization detail +select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100; +select s, avg(f) over (partition by ts order by s,f rows between current row and 5 following) from over10k limit 100; + +explain vectorization detail +select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100; +select s, avg(d) over (partition by t order by s,d desc rows between 5 preceding and 5 following) from over10k limit 100; + +explain vectorization detail +select s, sum(i) over(partition by ts order by s) from over10k limit 100; +select s, sum(i) over(partition by ts order by s) from over10k limit 100; + +explain vectorization detail +select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100; +select f, sum(f) over (partition by ts order by f range between unbounded preceding and current row) from over10k limit 100; + +explain vectorization detail +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100; +select f, sum(f) over (partition by ts order by f rows between 2 preceding and 1 preceding) from over10k limit 100; + +explain vectorization detail +select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; +select s, i, round(avg(d) over (partition by s order by i) / 10.0 , 2) from over10k limit 7; + +explain vectorization detail +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; +select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i) limit 7; + +set hive.cbo.enable=false; +-- HIVE-9228 +explain vectorization detail +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7; +select s, i from ( select s, i, round((avg(d) over w1 + 10.0) - (avg(d) over w1 - 10.0),2) from over10k window w1 as (partition by s order by i)) X limit 7; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q b/ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q new file mode 100644 index 0000000..a787a43 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_windowing_windowspec4.q @@ -0,0 +1,37 @@ +--Test small dataset with larger windowing + +set hive.cli.print.header=true; +SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; +set hive.fetch.task.conversion=none; + +drop table if exists smalltable_windowing; + +create table smalltable_windowing( + i int, + type string); +insert into smalltable_windowing values(3, 'a'), (1, 'a'), (2, 'a'); + +explain vectorization detail +select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing; + +select type, i, +max(i) over (partition by type order by i rows between 1 preceding and 7 following), +min(i) over (partition by type order by i rows between 1 preceding and 7 following), +first_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +last_value(i) over (partition by type order by i rows between 1 preceding and 7 following), +avg(i) over (partition by type order by i rows between 1 preceding and 7 following), +sum(i) over (partition by type order by i rows between 1 preceding and 7 following), +collect_set(i) over (partition by type order by i rows between 1 preceding and 7 following), +count(i) over (partition by type order by i rows between 1 preceding and 7 following) +from smalltable_windowing; http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/queries/clientpositive/vectorized_ptf.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/vectorized_ptf.q b/ql/src/test/queries/clientpositive/vectorized_ptf.q index dbc7ca6..7f5a055 100644 --- a/ql/src/test/queries/clientpositive/vectorized_ptf.q +++ b/ql/src/test/queries/clientpositive/vectorized_ptf.q @@ -1,4 +1,5 @@ SET hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.ptf.enabled=true; set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/correlationoptimizer12.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out index 23443ee..ee9a6e7 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer12.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer12.q.out @@ -57,7 +57,7 @@ STAGE PLANS: arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), count_window_0 (type: bigint) @@ -142,7 +142,7 @@ STAGE PLANS: arguments: _col1 name: count window function: GenericUDAFCountEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), count_window_0 (type: bigint) http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/ctas_colname.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/ctas_colname.q.out b/ql/src/test/results/clientpositive/ctas_colname.q.out index b0cab7e..8d61c9d 100644 --- a/ql/src/test/results/clientpositive/ctas_colname.q.out +++ b/ql/src/test/results/clientpositive/ctas_colname.q.out @@ -190,7 +190,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -354,7 +354,7 @@ STAGE PLANS: arguments: _col0, 1 name: lead window function: GenericUDAFLeadEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/distinct_windowing.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/distinct_windowing.q.out b/ql/src/test/results/clientpositive/distinct_windowing.q.out index 1605a62..197687a 100644 --- a/ql/src/test/results/clientpositive/distinct_windowing.q.out +++ b/ql/src/test/results/clientpositive/distinct_windowing.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: first_value_window_0 (type: tinyint) @@ -208,7 +208,7 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int) @@ -330,13 +330,13 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: first_value_window_1 arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out index aac939f..85d0777 100644 --- a/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out +++ b/ql/src/test/results/clientpositive/distinct_windowing_no_cbo.q.out @@ -91,7 +91,7 @@ STAGE PLANS: arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: first_value_window_0 (type: tinyint) @@ -208,7 +208,7 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int) @@ -330,13 +330,13 @@ STAGE PLANS: arguments: _col2 name: last_value window function: GenericUDAFLastValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT window function definition alias: first_value_window_1 arguments: _col0 name: first_value window function: GenericUDAFFirstValueEvaluator - window frame: PRECEDING(MAX)~CURRENT + window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 84795 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: last_value_window_0 (type: int), first_value_window_1 (type: tinyint) @@ -540,7 +540,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 21198 Data size: 169584 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -680,7 +680,7 @@ STAGE PLANS: arguments: _col2 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 127193 Data size: 1017544 Basic stats: COMPLETE Column stats: NONE Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/groupby_grouping_window.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_grouping_window.q.out b/ql/src/test/results/clientpositive/groupby_grouping_window.q.out index 4fc36ed..32135e4 100644 --- a/ql/src/test/results/clientpositive/groupby_grouping_window.q.out +++ b/ql/src/test/results/clientpositive/groupby_grouping_window.q.out @@ -110,7 +110,7 @@ STAGE PLANS: arguments: _col3 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Select Operator http://git-wip-us.apache.org/repos/asf/hive/blob/a0df0ace/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out b/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out index 1bfde2f..d3b85f8 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_resolution.q.out @@ -720,7 +720,7 @@ STAGE PLANS: arguments: _col1 name: rank window function: GenericUDAFRankEvaluator - window frame: PRECEDING(MAX)~FOLLOWING(MAX) + window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true Statistics: Num rows: 83 Data size: 7885 Basic stats: COMPLETE Column stats: COMPLETE Select Operator