Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 10995200B8B for ; Tue, 20 Sep 2016 00:41:53 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0F370160ACC; Mon, 19 Sep 2016 22:41:53 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id BA988160AE1 for ; Tue, 20 Sep 2016 00:41:50 +0200 (CEST) Received: (qmail 9706 invoked by uid 500); 19 Sep 2016 22:41:48 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 8951 invoked by uid 99); 19 Sep 2016 22:41:47 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Sep 2016 22:41:47 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9A5D2ED321; Mon, 19 Sep 2016 22:41:47 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Date: Mon, 19 Sep 2016 22:41:56 -0000 Message-Id: <4e524d3ce1a0453da068b3d6dcd7447c@git.apache.org> In-Reply-To: <795019fcbe024823972664264739d9f8@git.apache.org> References: <795019fcbe024823972664264739d9f8@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [10/34] hive git commit: HIVE-14251: Union All of different types resolves to incorrect data (Aihua Xu, reviewed by Mohit Sabharwal) archived-at: Mon, 19 Sep 2016 22:41:53 -0000 HIVE-14251: Union All of different types resolves to incorrect data (Aihua Xu, reviewed by Mohit Sabharwal) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/76fe9e78 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/76fe9e78 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/76fe9e78 Branch: refs/heads/hive-14535 Commit: 76fe9e783643a59c3e92c1c47d4a39ad744771cc Parents: 0b62e6f Author: Aihua Xu Authored: Fri Jul 22 13:26:18 2016 -0400 Committer: Aihua Xu Committed: Wed Sep 14 14:50:33 2016 -0400 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 33 ++--- .../hive/ql/exec/TestFunctionRegistry.java | 15 +- .../clientpositive/alter_partition_change_col.q | 2 +- .../clientpositive/alter_table_cascade.q | 4 +- .../queries/clientpositive/groupby_sort_1_23.q | 4 +- .../clientpositive/groupby_sort_skew_1_23.q | 8 +- ql/src/test/queries/clientpositive/union32.q | 8 +- ql/src/test/queries/clientpositive/union33.q | 10 +- ql/src/test/queries/clientpositive/union36.q | 4 +- .../queries/clientpositive/unionDistinct_1.q | 16 +-- .../queries/clientpositive/union_date_trim.q | 2 +- ql/src/test/queries/clientpositive/union_null.q | 8 +- .../queries/clientpositive/union_remove_12.q | 4 +- .../queries/clientpositive/union_remove_13.q | 4 +- .../queries/clientpositive/union_remove_14.q | 4 +- .../queries/clientpositive/union_type_chk.q | 7 - .../clientpositive/unionall_join_nullconstant.q | 2 +- .../alter_partition_change_col.q.out | 4 +- .../clientpositive/alter_table_cascade.q.out | 8 +- .../clientpositive/groupby_sort_1_23.q.out | 122 ++++++++-------- .../clientpositive/groupby_sort_skew_1_23.q.out | 132 ++++++++--------- .../clientpositive/llap/unionDistinct_1.q.out | 70 +++++---- .../clientpositive/llap/union_type_chk.q.out | 30 ---- .../spark/groupby_sort_1_23.q.out | 134 ++++++++--------- .../spark/groupby_sort_skew_1_23.q.out | 144 +++++++++---------- .../results/clientpositive/spark/union32.q.out | 130 ++++++++--------- .../results/clientpositive/spark/union33.q.out | 16 +-- .../clientpositive/spark/union_date_trim.q.out | 4 +- .../clientpositive/spark/union_null.q.out | 12 +- .../clientpositive/spark/union_remove_12.q.out | 48 +++---- .../clientpositive/spark/union_remove_13.q.out | 52 +++---- .../clientpositive/spark/union_remove_14.q.out | 48 +++---- .../test/results/clientpositive/union32.q.out | 130 ++++++++--------- .../test/results/clientpositive/union33.q.out | 16 +-- .../test/results/clientpositive/union36.q.out | 28 ++-- .../clientpositive/unionDistinct_1.q.out | 70 +++++---- .../clientpositive/union_date_trim.q.out | 4 +- .../results/clientpositive/union_null.q.out | 12 +- .../clientpositive/union_remove_12.q.out | 8 +- .../clientpositive/union_remove_13.q.out | 26 ++-- .../clientpositive/union_remove_14.q.out | 8 +- .../results/clientpositive/union_type_chk.q.out | 30 ---- .../unionall_join_nullconstant.q.out | 4 +- .../hive/serde2/typeinfo/TypeInfoUtils.java | 3 + 44 files changed, 660 insertions(+), 768 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 4710b8f..a854f9f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -697,7 +697,9 @@ public final class FunctionRegistry { } /** - * Find a common class for union-all operator + * Find a common type for union-all operator. Only the common types for the same + * type group will resolve to a common type. No implicit conversion across different + * type groups will be done. */ public static TypeInfo getCommonClassForUnionAll(TypeInfo a, TypeInfo b) { if (a.equals(b)) { @@ -716,26 +718,21 @@ public final class FunctionRegistry { PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); - // handle string types properly - if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { - return getTypeInfoForPrimitiveCategory( - (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + if (pgA != pgB) { + return null; } - if (TypeInfoUtils.implicitConvertible(a, b)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); - } - if (TypeInfoUtils.implicitConvertible(b, a)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); - } - for (PrimitiveCategory t : TypeInfoUtils.numericTypeList) { - if (TypeInfoUtils.implicitConvertible(pcA, t) - && TypeInfoUtils.implicitConvertible(pcB, t)) { - return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); - } + switch(pgA) { + case STRING_GROUP: + return getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + case NUMERIC_GROUP: + return TypeInfoUtils.implicitConvertible(a, b) ? b : a; + case DATE_GROUP: + return TypeInfoFactory.timestampTypeInfo; + default: + return null; } - - return null; } /** http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index 59ecd1e..d2d5a1b 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -327,14 +327,12 @@ public class TestFunctionRegistry extends TestCase { } public void testCommonClassUnionAll() { + unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.intTypeInfo, + TypeInfoFactory.doubleTypeInfo); unionAll(TypeInfoFactory.intTypeInfo, TypeInfoFactory.decimalTypeInfo, TypeInfoFactory.decimalTypeInfo); - unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, - TypeInfoFactory.stringTypeInfo); unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo, TypeInfoFactory.doubleTypeInfo); - unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, - TypeInfoFactory.stringTypeInfo); unionAll(varchar5, varchar10, varchar10); unionAll(varchar10, varchar5, varchar10); @@ -346,8 +344,13 @@ public class TestFunctionRegistry extends TestCase { unionAll(char10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); unionAll(TypeInfoFactory.stringTypeInfo, char10, TypeInfoFactory.stringTypeInfo); - // common class for char/varchar is string? - comparison(char10, varchar5, TypeInfoFactory.stringTypeInfo); + unionAll(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.timestampTypeInfo); + + // Invalid cases + unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, null); + unionAll(TypeInfoFactory.doubleTypeInfo, varchar10, null); + } public void testGetTypeInfoForPrimitiveCategory() { http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/alter_partition_change_col.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/alter_partition_change_col.q b/ql/src/test/queries/clientpositive/alter_partition_change_col.q index 360f4d2..23de3d7 100644 --- a/ql/src/test/queries/clientpositive/alter_partition_change_col.q +++ b/ql/src/test/queries/clientpositive/alter_partition_change_col.q @@ -12,7 +12,7 @@ create table alter_partition_change_col1 (c1 string, c2 string) partitioned by ( insert overwrite table alter_partition_change_col1 partition (p1, p2) select c1, c2, 'abc', '123' from alter_partition_change_col0 union all - select c1, c2, null, '123' from alter_partition_change_col0; + select c1, c2, cast(null as string), '123' from alter_partition_change_col0; show partitions alter_partition_change_col1; select * from alter_partition_change_col1 where p1='abc'; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/alter_table_cascade.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/alter_table_cascade.q b/ql/src/test/queries/clientpositive/alter_table_cascade.q index acca4e8..288fe4a 100644 --- a/ql/src/test/queries/clientpositive/alter_table_cascade.q +++ b/ql/src/test/queries/clientpositive/alter_table_cascade.q @@ -15,7 +15,7 @@ create table alter_table_cascade (c1 string) partitioned by (p1 string, p2 strin insert overwrite table alter_table_cascade partition (p1, p2) select c1, 'abc', '123' from alter_table_src union all - select c1, null, '123' from alter_table_src; + select c1, cast(null as string), '123' from alter_table_src; show partitions alter_table_cascade; describe alter_table_cascade; @@ -92,7 +92,7 @@ create table alter_table_restrict (c1 string) partitioned by (p1 string, p2 stri insert overwrite table alter_table_restrict partition (p1, p2) select c1, 'abc', '123' from alter_table_src union all - select c1, null, '123' from alter_table_src; + select c1, cast(null as string), '123' from alter_table_src; show partitions alter_table_restrict; describe alter_table_restrict; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/groupby_sort_1_23.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q index 67fdd23..f0a00fb 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_1_23.q @@ -134,14 +134,14 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1; INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1; SELECT * FROM outputTbl1; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q index 39b9420..38384dc 100644 --- a/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q +++ b/ql/src/test/queries/clientpositive/groupby_sort_skew_1_23.q @@ -100,12 +100,12 @@ SELECT * FROM outputTbl3; -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key; INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key; @@ -135,14 +135,14 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1; INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1; SELECT * FROM outputTbl1; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union32.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union32.q b/ql/src/test/queries/clientpositive/union32.q index b18d484..4d9de6a 100644 --- a/ql/src/test/queries/clientpositive/union32.q +++ b/ql/src/test/queries/clientpositive/union32.q @@ -51,13 +51,13 @@ SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION ALL SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; @@ -67,11 +67,11 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION ALL -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union33.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union33.q b/ql/src/test/queries/clientpositive/union33.q index 017442e..994060a 100644 --- a/ql/src/test/queries/clientpositive/union33.q +++ b/ql/src/test/queries/clientpositive/union33.q @@ -11,7 +11,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION ALL - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a; @@ -20,7 +20,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION ALL - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a; @@ -28,7 +28,7 @@ SELECT COUNT(*) FROM test_src; EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION ALL SELECT key, value FROM src @@ -37,7 +37,7 @@ UNION ALL INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION ALL SELECT key, value FROM src @@ -45,4 +45,4 @@ UNION ALL )a; SELECT COUNT(*) FROM test_src; - \ No newline at end of file + http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union36.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union36.q b/ql/src/test/queries/clientpositive/union36.q index b79ff0f..f050e1a 100644 --- a/ql/src/test/queries/clientpositive/union36.q +++ b/ql/src/test/queries/clientpositive/union36.q @@ -2,9 +2,9 @@ set hive.mapred.mode=nonstrict; set hive.cbo.enable=false; -- SORT_QUERY_RESULTS -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u; +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast('100000000' as decimal(10,0)) x from (select * from src limit 2) s3)u; -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u; +select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as decimal(10,0)) x from (select * from src limit 2) s3)u; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/unionDistinct_1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/unionDistinct_1.q b/ql/src/test/queries/clientpositive/unionDistinct_1.q index 5c52d9b..9792267 100644 --- a/ql/src/test/queries/clientpositive/unionDistinct_1.q +++ b/ql/src/test/queries/clientpositive/unionDistinct_1.q @@ -911,13 +911,13 @@ SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a ; @@ -927,13 +927,13 @@ EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a ; -- union33.q @@ -950,7 +950,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a; @@ -959,7 +959,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a; @@ -967,7 +967,7 @@ SELECT COUNT(*) FROM test_src; EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -976,7 +976,7 @@ UNION DISTINCT INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_date_trim.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_date_trim.q b/ql/src/test/queries/clientpositive/union_date_trim.q index 6842e56..51f5997 100644 --- a/ql/src/test/queries/clientpositive/union_date_trim.q +++ b/ql/src/test/queries/clientpositive/union_date_trim.q @@ -4,4 +4,4 @@ insert into table testDate select 1, '2014-04-07' from src where key=100 limit 1 insert into table testDate select 2, '2014-04-08' from src where key=100 limit 1; insert into table testDate select 3, '2014-04-09' from src where key=100 limit 1; --- without the fix following query will throw HiveException: Incompatible types for union operator -insert into table testDate select id, tm from (select id, dt as tm from testDate where id = 1 union all select id, dt as tm from testDate where id = 2 union all select id, trim(Cast (dt as string)) as tm from testDate where id = 3 ) a; +insert into table testDate select id, tm from (select id, dt as tm from testDate where id = 1 union all select id, dt as tm from testDate where id = 2 union all select id, cast(trim(Cast (dt as string)) as date) as tm from testDate where id = 3 ) a; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_null.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_null.q b/ql/src/test/queries/clientpositive/union_null.q index 23da07a..45448b4 100644 --- a/ql/src/test/queries/clientpositive/union_null.q +++ b/ql/src/test/queries/clientpositive/union_null.q @@ -1,10 +1,10 @@ -- SORT_BEFORE_DIFF -- HIVE-2901 -select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a; -set hive.cbo.returnpath.hiveop=true; -select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select NULL as x from src limit 5)b )a; +select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a; +set hive.cbo.returnpath.hiveop=true; +select x from (select * from (select value as x from src order by x limit 5)a union all select * from (select cast(NULL as string) as x from src limit 5)b )a; set hive.cbo.returnpath.hiveop=false; -- HIVE-4837 -select * from (select * from (select null as N from src1 group by key)a UNION ALL select * from (select null as N from src1 group by key)b ) a; +select * from (select * from (select cast(null as string) as N from src1 group by key)a UNION ALL select * from (select cast(null as string) as N from src1 group by key)b ) a; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_remove_12.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_remove_12.q b/ql/src/test/queries/clientpositive/union_remove_12.q index b665666..6bfb991 100644 --- a/ql/src/test/queries/clientpositive/union_remove_12.q +++ b/ql/src/test/queries/clientpositive/union_remove_12.q @@ -34,7 +34,7 @@ SELECT * FROM ( select key, 1 as `values` from inputTbl1 union all -select a.key as key, b.val as `values` +select a.key as key, cast(b.val as bigint) as `values` FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; @@ -43,7 +43,7 @@ SELECT * FROM ( select key, 1 as `values` from inputTbl1 union all -select a.key as key, b.val as `values` +select a.key as key, cast(b.val as bigint) as `values` FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_remove_13.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_remove_13.q b/ql/src/test/queries/clientpositive/union_remove_13.q index 11077fd..4d59b6b 100644 --- a/ql/src/test/queries/clientpositive/union_remove_13.q +++ b/ql/src/test/queries/clientpositive/union_remove_13.q @@ -34,7 +34,7 @@ SELECT * FROM ( select key, count(1) as `values` from inputTbl1 group by key union all -select a.key as key, b.val as `values` +select a.key as key, cast(b.val as bigint) as `values` FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; @@ -43,7 +43,7 @@ SELECT * FROM ( select key, count(1) as `values` from inputTbl1 group by key union all -select a.key as key, b.val as `values` +select a.key as key, cast(b.val as bigint) as `values` FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_remove_14.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_remove_14.q b/ql/src/test/queries/clientpositive/union_remove_14.q index b559b35..3ffb985 100644 --- a/ql/src/test/queries/clientpositive/union_remove_14.q +++ b/ql/src/test/queries/clientpositive/union_remove_14.q @@ -35,7 +35,7 @@ SELECT * FROM ( select key, 1 as `values` from inputTbl1 union all -select a.key as key, b.val as `values` +select a.key as key, cast(b.val as bigint) as `values` FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; @@ -44,7 +44,7 @@ SELECT * FROM ( select key, 1 as `values` from inputTbl1 union all -select a.key as key, b.val as `values` +select a.key as key, cast(b.val as bigint) as `values` FROM inputTbl1 a join inputTbl1 b on a.key=b.key )c; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/union_type_chk.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/union_type_chk.q b/ql/src/test/queries/clientpositive/union_type_chk.q deleted file mode 100644 index ff2e7cf..0000000 --- a/ql/src/test/queries/clientpositive/union_type_chk.q +++ /dev/null @@ -1,7 +0,0 @@ -set hive.mapred.mode=nonstrict; -set hive.cbo.enable=false; - --- SORT_QUERY_RESULTS -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u; - -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u; http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q b/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q index 4f0ffa6..6d6fa66 100644 --- a/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q +++ b/ql/src/test/queries/clientpositive/unionall_join_nullconstant.q @@ -21,7 +21,7 @@ CREATE TABLE table_b2 CREATE VIEW a_view AS SELECT substring(a1.composite_key, 1, locate('|',a1.composite_key) - 1) AS autoname, -NULL AS col1 +cast(NULL as string) AS col1 FROM table_a1 a1 FULL OUTER JOIN table_a2 a2 ON a1.composite_key = a2.composite_key http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/alter_partition_change_col.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/alter_partition_change_col.q.out b/ql/src/test/results/clientpositive/alter_partition_change_col.q.out index 23febee..fff987c 100644 --- a/ql/src/test/results/clientpositive/alter_partition_change_col.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_change_col.q.out @@ -29,14 +29,14 @@ POSTHOOK: Output: default@alter_partition_change_col1 PREHOOK: query: insert overwrite table alter_partition_change_col1 partition (p1, p2) select c1, c2, 'abc', '123' from alter_partition_change_col0 union all - select c1, c2, null, '123' from alter_partition_change_col0 + select c1, c2, cast(null as string), '123' from alter_partition_change_col0 PREHOOK: type: QUERY PREHOOK: Input: default@alter_partition_change_col0 PREHOOK: Output: default@alter_partition_change_col1 POSTHOOK: query: insert overwrite table alter_partition_change_col1 partition (p1, p2) select c1, c2, 'abc', '123' from alter_partition_change_col0 union all - select c1, c2, null, '123' from alter_partition_change_col0 + select c1, c2, cast(null as string), '123' from alter_partition_change_col0 POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_partition_change_col0 POSTHOOK: Output: default@alter_partition_change_col1@p1=__HIVE_DEFAULT_PARTITION__/p2=123 http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/alter_table_cascade.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/alter_table_cascade.q.out b/ql/src/test/results/clientpositive/alter_table_cascade.q.out index 1d8204c..5b9c9ee 100644 --- a/ql/src/test/results/clientpositive/alter_table_cascade.q.out +++ b/ql/src/test/results/clientpositive/alter_table_cascade.q.out @@ -37,14 +37,14 @@ POSTHOOK: Output: default@alter_table_cascade PREHOOK: query: insert overwrite table alter_table_cascade partition (p1, p2) select c1, 'abc', '123' from alter_table_src union all - select c1, null, '123' from alter_table_src + select c1, cast(null as string), '123' from alter_table_src PREHOOK: type: QUERY PREHOOK: Input: default@alter_table_src PREHOOK: Output: default@alter_table_cascade POSTHOOK: query: insert overwrite table alter_table_cascade partition (p1, p2) select c1, 'abc', '123' from alter_table_src union all - select c1, null, '123' from alter_table_src + select c1, cast(null as string), '123' from alter_table_src POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_table_src POSTHOOK: Output: default@alter_table_cascade@p1=__HIVE_DEFAULT_PARTITION__/p2=123 @@ -902,14 +902,14 @@ POSTHOOK: Output: default@alter_table_restrict PREHOOK: query: insert overwrite table alter_table_restrict partition (p1, p2) select c1, 'abc', '123' from alter_table_src union all - select c1, null, '123' from alter_table_src + select c1, cast(null as string), '123' from alter_table_src PREHOOK: type: QUERY PREHOOK: Input: default@alter_table_src PREHOOK: Output: default@alter_table_restrict POSTHOOK: query: insert overwrite table alter_table_restrict partition (p1, p2) select c1, 'abc', '123' from alter_table_src union all - select c1, null, '123' from alter_table_src + select c1, cast(null as string), '123' from alter_table_src POSTHOOK: type: QUERY POSTHOOK: Input: default@alter_table_src POSTHOOK: Output: default@alter_table_restrict@p1=__HIVE_DEFAULT_PARTITION__/p2=123 http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 81fe0d9..e70f912 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -2791,7 +2791,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by @@ -2800,7 +2800,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -2902,23 +2902,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Select Operator + expressions: UDFToString(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -2937,47 +2941,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false TableScan GatherStats: false Union @@ -3027,7 +3027,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3036,7 +3036,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3310,7 +3310,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -3319,7 +3319,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 5cf0ea2..fc52984 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -2330,7 +2330,7 @@ PREHOOK: query: -- it should not matter what follows the group by -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY @@ -2340,7 +2340,7 @@ POSTHOOK: query: -- it should not matter what follows the group by -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY @@ -2514,7 +2514,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger(UDFToString(_col0)) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2581,14 +2581,14 @@ STAGE PLANS: #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY @@ -3055,7 +3055,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by @@ -3064,7 +3064,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -3232,23 +3232,27 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + Select Operator + expressions: UDFToString(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types double,bigint - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Map Reduce @@ -3267,47 +3271,43 @@ STAGE PLANS: mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Union + Union + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 1 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 1 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false TableScan GatherStats: false Union @@ -3357,7 +3357,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3366,7 +3366,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 - columns.types double,bigint + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -3640,7 +3640,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -3649,7 +3649,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index 52af8fd..624d886 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -13532,14 +13532,14 @@ POSTHOOK: Input: default@t2 PREHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the left selecting multiple columns EXPLAIN SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS STRING) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -13580,7 +13580,7 @@ STAGE PLANS: 1 Map 4 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(UDFToDouble(_col1)) (type: string) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToString(CAST( _col1 AS varchar(20))) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -13660,7 +13660,7 @@ STAGE PLANS: ListSink PREHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a PREHOOK: type: QUERY @@ -13668,7 +13668,7 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM -(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key +(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS CHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key UNION DISTINCT SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a POSTHOOK: type: QUERY @@ -13676,30 +13676,24 @@ POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### 0.0 0 -0.0 0.0 2.0 2 -2.0 2.0 4.0 4 -4.0 4.0 5.0 5 -5.0 5.0 8.0 8 -8.0 8.0 9.0 9 -9.0 9.0 PREHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY POSTHOOK: query: -- Test union with join on the right selecting multiple columns EXPLAIN SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -13721,18 +13715,18 @@ STAGE PLANS: alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(key) (type: double), UDFToDouble(key) (type: double) + expressions: UDFToDouble(key) (type: double), key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: double), _col1 (type: double) + keys: _col0 (type: double), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs @@ -13759,18 +13753,18 @@ STAGE PLANS: 1 Map 5 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(UDFToLong(_col0)) (type: double), UDFToDouble(_col1) (type: double) + expressions: UDFToDouble(UDFToLong(_col0)) (type: double), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: double), _col1 (type: double) + keys: _col0 (type: double), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: double) + key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: double) + Map-reduce partition columns: _col0 (type: double), _col1 (type: string) Statistics: Num rows: 21 Data size: 147 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs @@ -13797,7 +13791,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: double), KEY._col1 (type: double) + keys: KEY._col0 (type: double), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -13820,7 +13814,7 @@ STAGE PLANS: PREHOOK: query: SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 @@ -13828,17 +13822,17 @@ PREHOOK: Input: default@t2 POSTHOOK: query: SELECT * FROM (SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2 UNION DISTINCT -SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a +SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS VARCHAR(20)) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -0.0 0.0 -2.0 2.0 -4.0 4.0 -5.0 5.0 -8.0 8.0 -9.0 9.0 +0.0 0 +2.0 2 +4.0 4 +5.0 5 +8.0 8 +9.0 9 PREHOOK: query: -- union33.q -- SORT_BEFORE_DIFF @@ -13868,7 +13862,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a PREHOOK: type: QUERY @@ -13877,7 +13871,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a POSTHOOK: type: QUERY @@ -14007,7 +14001,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a PREHOOK: type: QUERY @@ -14018,7 +14012,7 @@ SELECT key, value FROM ( SELECT key, value FROM src WHERE key = 0 UNION DISTINCT - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key )a POSTHOOK: type: QUERY @@ -14037,7 +14031,7 @@ POSTHOOK: Input: default@test_src 310 PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -14046,7 +14040,7 @@ UNION DISTINCT PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -14176,7 +14170,7 @@ STAGE PLANS: PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src @@ -14187,7 +14181,7 @@ PREHOOK: Input: default@src PREHOOK: Output: default@test_src POSTHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( - SELECT key, COUNT(*) AS value FROM src + SELECT key, cast(COUNT(*) as string) AS value FROM src GROUP BY key UNION DISTINCT SELECT key, value FROM src http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/llap/union_type_chk.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/union_type_chk.q.out b/ql/src/test/results/clientpositive/llap/union_type_chk.q.out deleted file mode 100644 index 1eb0182..0000000 --- a/ql/src/test/results/clientpositive/llap/union_type_chk.q.out +++ /dev/null @@ -1,30 +0,0 @@ -PREHOOK: query: -- SORT_QUERY_RESULTS -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: -- SORT_QUERY_RESULTS -select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select '100000000' x from (select * from src limit 2) s3)u -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0.4999999900000002 -0.4999999900000002 -4.999999900000002E-9 -4.999999900000002E-9 -4.999999900000002E-9 -4.999999900000002E-9 -PREHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u -PREHOOK: type: QUERY -PREHOOK: Input: default@src -#### A masked pattern was here #### -POSTHOOK: query: select (x/sum(x) over()) as y from(select cast(1 as decimal(10,0)) as x from (select * from src limit 2)s1 union all select cast(1 as decimal(10,0)) x from (select * from src limit 2) s2 union all select cast (null as string) x from (select * from src limit 2) s3)u -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -#### A masked pattern was here #### -0.25 -0.25 -0.25 -0.25 -NULL -NULL http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 408c1b9..c6a7982 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -1941,7 +1941,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by @@ -1950,7 +1950,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -1982,44 +1982,40 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2161,40 +2157,44 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToString(_col0) (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2231,7 +2231,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -2240,7 +2240,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 http://git-wip-us.apache.org/repos/asf/hive/blob/76fe9e78/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index 6325889..a438124 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -1467,7 +1467,7 @@ PREHOOK: query: -- it should not matter what follows the group by -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY @@ -1477,7 +1477,7 @@ POSTHOOK: query: -- it should not matter what follows the group by -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY @@ -1610,7 +1610,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToInteger(UDFToString(_col0)) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1677,14 +1677,14 @@ STAGE PLANS: #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 -SELECT key + key, sum(cnt) from +SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY @@ -2017,7 +2017,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by @@ -2026,7 +2026,7 @@ INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -2059,44 +2059,40 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2256,40 +2252,44 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + expressions: UDFToString(_col0) (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + NumFilesPerFileSink: 1 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + columns key,cnt + columns.comments + columns.types int:int +#### A masked pattern was here #### + name default.outputtbl1 + numFiles 4 + numRows 10 + rawDataSize 30 + serialization.ddl struct outputtbl1 { i32 key, i32 cnt} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 40 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2326,7 +2326,7 @@ PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 @@ -2335,7 +2335,7 @@ POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL -SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key +SELECT cast(key + key as string) as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1