Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 60141200C8E for ; Thu, 25 May 2017 01:51:45 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 5E951160BD0; Wed, 24 May 2017 23:51:45 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 916E7160BDA for ; Thu, 25 May 2017 01:51:43 +0200 (CEST) Received: (qmail 56475 invoked by uid 500); 24 May 2017 23:51:37 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 54676 invoked by uid 99); 24 May 2017 23:51:36 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 24 May 2017 23:51:36 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3E193E943C; Wed, 24 May 2017 23:51:36 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: weiz@apache.org To: commits@hive.apache.org Date: Wed, 24 May 2017 23:52:06 -0000 Message-Id: In-Reply-To: <398328dec060429180949c9c22cb3e43@git.apache.org> References: <398328dec060429180949c9c22cb3e43@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [32/54] [abbrv] hive git commit: HIVE-16113 PartitionPruner::removeNonPartCols needs to handle AND/OR cases (Gopal V reviewed by Ashutosh C) archived-at: Wed, 24 May 2017 23:51:45 -0000 HIVE-16113 PartitionPruner::removeNonPartCols needs to handle AND/OR cases (Gopal V reviewed by Ashutosh C) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5f4eaa9b Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5f4eaa9b Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5f4eaa9b Branch: refs/heads/hive-14535 Commit: 5f4eaa9b13e7beec8bb16fea94fec386e2bc1e00 Parents: 7429f5f Author: Remus Rusanu Authored: Sat May 20 12:05:29 2017 -0700 Committer: Remus Rusanu Committed: Sat May 20 12:05:29 2017 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 2 + .../hive/ql/optimizer/ppr/PartitionPruner.java | 23 +- .../queries/clientpositive/partition_pruning.q | 15 + .../clientpositive/llap/partition_pruning.q.out | 387 ++++++++++++++++++ .../clientpositive/partition_pruning.q.out | 396 +++++++++++++++++++ .../clientpositive/tez/explainanalyze_3.q.out | 8 +- .../clientpositive/tez/explainuser_3.q.out | 8 +- 7 files changed, 828 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 1d9296a..e23ef63 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -214,6 +214,7 @@ minillaplocal.shared.query.files=alter_merge_2_orc.q,\ orc_ppd_schema_evol_2a.q,\ orc_ppd_schema_evol_2b.q,\ parallel.q,\ + partition_pruning.q,\ ptf.q,\ ptf_matchpath.q,\ ptf_streaming.q,\ @@ -539,6 +540,7 @@ minillaplocal.query.files=acid_globallimit.q,\ order_null.q,\ partition_multilevels.q,\ partition_shared_scan.q,\ + partition_pruning.q,\ ptf.q,\ ptf_streaming.q,\ quotedid_smb.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java index 2acfef7..6624865 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java @@ -66,6 +66,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; /** * The transformation step that does partition pruning. @@ -377,7 +379,7 @@ public class PartitionPruner extends Transform { // list or struct fields. return new ExprNodeConstantDesc(expr.getTypeInfo(), null); } - if (expr instanceof ExprNodeColumnDesc) { + else if (expr instanceof ExprNodeColumnDesc) { String column = ((ExprNodeColumnDesc) expr).getColumn(); if (!partCols.contains(column)) { // Column doesn't appear to be a partition column for the table. @@ -385,10 +387,25 @@ public class PartitionPruner extends Transform { } referred.add(column); } - if (expr instanceof ExprNodeGenericFuncDesc) { + else if (expr instanceof ExprNodeGenericFuncDesc) { List children = expr.getChildren(); for (int i = 0; i < children.size(); ++i) { - children.set(i, removeNonPartCols(children.get(i), partCols, referred)); + ExprNodeDesc other = removeNonPartCols(children.get(i), partCols, referred); + if (ExprNodeDescUtils.isNullConstant(other)) { + if (FunctionRegistry.isOpAnd(expr)) { + // partcol=... AND nonpartcol=... is replaced with partcol=... AND TRUE + // which will be folded to partcol=... + // This cannot be done also for OR + Preconditions.checkArgument(expr.getTypeInfo().accept(TypeInfoFactory.booleanTypeInfo)); + other = new ExprNodeConstantDesc(expr.getTypeInfo(), true); + } else { + // Functions like NVL, COALESCE, CASE can change a + // NULL introduced by a nonpart column removal into a non-null + // and cause overaggressive prunning, missing data (incorrect result) + return new ExprNodeConstantDesc(expr.getTypeInfo(), null); + } + } + children.set(i, other); } } return expr; http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/ql/src/test/queries/clientpositive/partition_pruning.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/partition_pruning.q b/ql/src/test/queries/clientpositive/partition_pruning.q new file mode 100644 index 0000000..f5cab3f --- /dev/null +++ b/ql/src/test/queries/clientpositive/partition_pruning.q @@ -0,0 +1,15 @@ +create table daysales (customer int) partitioned by (dt string); + +insert into daysales partition(dt='2001-01-01') values(1); +insert into daysales partition(dt='2001-01-03') values(3); + +select * from daysales where nvl(dt='2001-01-01' and customer=1, false); +select * from daysales where nvl(dt='2001-01-02' and customer=1, false); +select * from daysales where nvl(dt='2001-01-01' and customer=1, true); +select * from daysales where (dt='2001-01-01' and customer=1); +select * from daysales where (dt='2001-01-01' or customer=3); +select * from daysales where (dt='2001-01-03' or customer=100); + +explain extended select * from daysales where nvl(dt='2001-01-01' and customer=1, false); +explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false); +explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false); http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/ql/src/test/results/clientpositive/llap/partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/llap/partition_pruning.q.out b/ql/src/test/results/clientpositive/llap/partition_pruning.q.out new file mode 100644 index 0000000..c525ee7 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/partition_pruning.q.out @@ -0,0 +1,387 @@ +PREHOOK: query: create table daysales (customer int) partitioned by (dt string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@daysales +POSTHOOK: query: create table daysales (customer int) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@daysales +PREHOOK: query: insert into daysales partition(dt='2001-01-01') values(1) +PREHOOK: type: QUERY +PREHOOK: Output: default@daysales@dt=2001-01-01 +POSTHOOK: query: insert into daysales partition(dt='2001-01-01') values(1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@daysales@dt=2001-01-01 +POSTHOOK: Lineage: daysales PARTITION(dt=2001-01-01).customer EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into daysales partition(dt='2001-01-03') values(3) +PREHOOK: type: QUERY +PREHOOK: Output: default@daysales@dt=2001-01-03 +POSTHOOK: query: insert into daysales partition(dt='2001-01-03') values(3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@daysales@dt=2001-01-03 +POSTHOOK: Lineage: daysales PARTITION(dt=2001-01-03).customer EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +1 2001-01-01 +PREHOOK: query: select * from daysales where nvl(dt='2001-01-02' and customer=1, false) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where nvl(dt='2001-01-02' and customer=1, false) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +#### A masked pattern was here #### +PREHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, true) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +1 2001-01-01 +PREHOOK: query: select * from daysales where (dt='2001-01-01' and customer=1) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where (dt='2001-01-01' and customer=1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +1 2001-01-01 +PREHOOK: query: select * from daysales where (dt='2001-01-01' or customer=3) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +PREHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where (dt='2001-01-01' or customer=3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +POSTHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +1 2001-01-01 +3 2001-01-03 +PREHOOK: query: select * from daysales where (dt='2001-01-03' or customer=100) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +PREHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where (dt='2001-01-03' or customer=100) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +POSTHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +3 2001-01-03 +PREHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-01 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Processor Tree: + TableScan + alias: daysales + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: NVL((customer = 1),false) (type: boolean) + Select Operator + expressions: customer (type: int), dt (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-01 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-03 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Processor Tree: + TableScan + alias: daysales + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: NVL(((dt = '2001-01-01') or (customer = 3)),false) (type: boolean) + Select Operator + expressions: customer (type: int), dt (type: string) + outputColumnNames: _col0, _col1 + ListSink + +PREHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-01 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-03 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Processor Tree: + TableScan + alias: daysales + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: NVL(((dt = '2001-01-01') or (customer = 3)),false) (type: boolean) + Select Operator + expressions: customer (type: int), dt (type: string) + outputColumnNames: _col0, _col1 + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/ql/src/test/results/clientpositive/partition_pruning.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/partition_pruning.q.out b/ql/src/test/results/clientpositive/partition_pruning.q.out new file mode 100644 index 0000000..4614108 --- /dev/null +++ b/ql/src/test/results/clientpositive/partition_pruning.q.out @@ -0,0 +1,396 @@ +PREHOOK: query: create table daysales (customer int) partitioned by (dt string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@daysales +POSTHOOK: query: create table daysales (customer int) partitioned by (dt string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@daysales +PREHOOK: query: insert into daysales partition(dt='2001-01-01') values(1) +PREHOOK: type: QUERY +PREHOOK: Output: default@daysales@dt=2001-01-01 +POSTHOOK: query: insert into daysales partition(dt='2001-01-01') values(1) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@daysales@dt=2001-01-01 +POSTHOOK: Lineage: daysales PARTITION(dt=2001-01-01).customer EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into daysales partition(dt='2001-01-03') values(3) +PREHOOK: type: QUERY +PREHOOK: Output: default@daysales@dt=2001-01-03 +POSTHOOK: query: insert into daysales partition(dt='2001-01-03') values(3) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@daysales@dt=2001-01-03 +POSTHOOK: Lineage: daysales PARTITION(dt=2001-01-03).customer EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +1 2001-01-01 +PREHOOK: query: select * from daysales where nvl(dt='2001-01-02' and customer=1, false) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where nvl(dt='2001-01-02' and customer=1, false) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +#### A masked pattern was here #### +PREHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, true) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where nvl(dt='2001-01-01' and customer=1, true) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +1 2001-01-01 +PREHOOK: query: select * from daysales where (dt='2001-01-01' and customer=1) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where (dt='2001-01-01' and customer=1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +#### A masked pattern was here #### +1 2001-01-01 +PREHOOK: query: select * from daysales where (dt='2001-01-01' or customer=3) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +PREHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where (dt='2001-01-01' or customer=3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +POSTHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +1 2001-01-01 +3 2001-01-03 +PREHOOK: query: select * from daysales where (dt='2001-01-03' or customer=100) +PREHOOK: type: QUERY +PREHOOK: Input: default@daysales +PREHOOK: Input: default@daysales@dt=2001-01-01 +PREHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +POSTHOOK: query: select * from daysales where (dt='2001-01-03' or customer=100) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@daysales +POSTHOOK: Input: default@daysales@dt=2001-01-01 +POSTHOOK: Input: default@daysales@dt=2001-01-03 +#### A masked pattern was here #### +3 2001-01-03 +PREHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' and customer=1, false) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-01 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Processor Tree: + TableScan + alias: daysales + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: NVL((customer = 1),false) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: customer (type: int), dt (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-01 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-03 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Processor Tree: + TableScan + alias: daysales + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: NVL(((dt = '2001-01-01') or (customer = 3)),false) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: customer (type: int), dt (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from daysales where nvl(dt='2001-01-01' or customer=3, false) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Partition Description: + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-01 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Partition + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + dt 2001-01-03 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + numFiles 1 + numRows 1 + partition_columns dt + partition_columns.types string + rawDataSize 1 + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 2 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns customer + columns.comments + columns.types int +#### A masked pattern was here #### + name default.daysales + partition_columns dt + partition_columns.types string + serialization.ddl struct daysales { i32 customer} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.daysales + name: default.daysales + Processor Tree: + TableScan + alias: daysales + Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: NVL(((dt = '2001-01-01') or (customer = 3)),false) (type: boolean) + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: customer (type: int), dt (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE + ListSink + http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 087f916..e5c8d6c 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -827,7 +827,7 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (CUSTOM_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE) Stage-0 Fetch Operator @@ -838,9 +838,9 @@ Stage-0 Select Operator [SEL_9] (rows=550/480 width=18) Output:["_col0","_col1","_col2"] Map Join Operator [MAPJOIN_25] (rows=550/480 width=18) - BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] - <-Map 1 [CUSTOM_EDGE] - MULTICAST [RS_6] + Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_6] PartitionCols:_col0 Select Operator [SEL_2] (rows=242/242 width=18) Output:["_col0","_col1"] http://git-wip-us.apache.org/repos/asf/hive/blob/5f4eaa9b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index adcff44..65c9114 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -655,7 +655,7 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 2 <- Map 1 (CUSTOM_EDGE) +Map 2 <- Map 1 (BROADCAST_EDGE) Stage-0 Fetch Operator @@ -666,9 +666,9 @@ Stage-0 Select Operator [SEL_33] (rows=550 width=18) Output:["_col0","_col1","_col2"] Map Join Operator [MAPJOIN_32] (rows=550 width=18) - BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] - <-Map 1 [CUSTOM_EDGE] vectorized - MULTICAST [RS_29] + Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] + <-Map 1 [BROADCAST_EDGE] vectorized + BROADCAST [RS_29] PartitionCols:_col0 Select Operator [SEL_28] (rows=242 width=18) Output:["_col0","_col1"]