Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 118E4200B38 for ; Fri, 8 Jul 2016 22:12:26 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 0E83B160A77; Fri, 8 Jul 2016 20:12:26 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E1021160A36 for ; Fri, 8 Jul 2016 22:12:24 +0200 (CEST) Received: (qmail 27459 invoked by uid 500); 8 Jul 2016 20:12:24 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 27427 invoked by uid 99); 8 Jul 2016 20:12:23 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 08 Jul 2016 20:12:23 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 0F784DFFF8; Fri, 8 Jul 2016 20:12:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jcamacho@apache.org To: commits@hive.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-14147: Hive PPD might remove predicates when they are defined as a simple expr e.g. WHERE 'a' (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Date: Fri, 8 Jul 2016 20:12:22 +0000 (UTC) archived-at: Fri, 08 Jul 2016 20:12:26 -0000 Repository: hive Updated Branches: refs/heads/branch-1 d2b6bdf94 -> 67fecf3f7 HIVE-14147: Hive PPD might remove predicates when they are defined as a simple expr e.g. WHERE 'a' (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/67fecf3f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/67fecf3f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/67fecf3f Branch: refs/heads/branch-1 Commit: 67fecf3f76b8a93529525653288d1d2655c4ac24 Parents: d2b6bdf Author: Jesus Camacho Rodriguez Authored: Fri Jul 8 20:45:57 2016 +0100 Committer: Jesus Camacho Rodriguez Committed: Fri Jul 8 21:12:12 2016 +0100 ---------------------------------------------------------------------- data/files/small_csv.csv | 32 ++ .../hadoop/hive/ql/ppd/ExprWalkerInfo.java | 12 + .../hive/ql/ppd/ExprWalkerProcFactory.java | 5 +- .../hadoop/hive/ql/ppd/OpProcFactory.java | 2 + .../clientpositive/filter_cond_pushdown2.q | 71 ++++ .../clientpositive/filter_cond_pushdown2.q.out | 344 +++++++++++++++++++ 6 files changed, 465 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/67fecf3f/data/files/small_csv.csv ---------------------------------------------------------------------- diff --git a/data/files/small_csv.csv b/data/files/small_csv.csv new file mode 100644 index 0000000..ba424c0 --- /dev/null +++ b/data/files/small_csv.csv @@ -0,0 +1,32 @@ +00117,b95f91b8-a2a0-4d28-85ab-54ec0a80535d,FALSE,TRUE,FALSE,FALSE,FALSE +194483,ced84e01-c225-4a4f-b666-75ad4f62fe20,FALSE,FALSE,TRUE,TRUE,TRUE +269414,98451f7e-c82d-4fab-964d-ea49d4c112fe,TRUE,FALSE,FALSE,TRUE,TRUE +591165,a4124e14-cb38-42bb-a01c-41c67dbf96cd,FALSE,FALSE,TRUE,FALSE,FALSE +239413,5d61b10e-ff2a-42f9-b53d-633624de361f,TRUE,TRUE,FALSE,FALSE,TRUE +210078,7264397c-68e9-4a58-9f65-88132bdf6e19,FALSE,TRUE,FALSE,TRUE,FALSE +453386,a8451345-1d3e-4119-9bdd-29a53ac3c6e9,TRUE,FALSE,FALSE,FALSE,FALSE +252028,0e1b95c7-8b1b-40ce-a0a3-ca7264c85c36,TRUE,FALSE,FALSE,FALSE,TRUE +34488,40b8125c-9281-46c4-9d5b-cf224602e518,FALSE,FALSE,FALSE,TRUE,TRUE +634211,92a674cc-a56d-4653-b63f-a054cc2fd357,FALSE,FALSE,FALSE,TRUE,TRUE +300632,39dad068-50f8-49cd-8ee9-fbfa08440212,TRUE,TRUE,TRUE,TRUE,FALSE +807477,4c736a2c-efc9-4bb2-be67-4ef661cfa5bc,TRUE,FALSE,FALSE,FALSE,TRUE +750354,9e6a04c4-2432-4321-903c-b6ac5355b8cc,TRUE,FALSE,FALSE,TRUE,FALSE +244589,85054796-31c5-4f8c-a921-c216be9c6c4f,FALSE,TRUE,FALSE,FALSE,TRUE +474061,8521a204-3288-48ba-8c12-c8399acf05b6,TRUE,TRUE,TRUE,FALSE,FALSE +537646,35241560-d282-4807-9ecb-d1e6e6d74b61,TRUE,FALSE,TRUE,FALSE,FALSE +715496,53fbfa25-0571-4bf4-a4cc-721bdca030f1,TRUE,TRUE,FALSE,FALSE,TRUE +301813,98e711fa-80c7-44b4-9140-684cae60e79f,TRUE,FALSE,TRUE,FALSE,TRUE +438598,6124cb91-2bf8-4d18-bc1a-aadbf6b8d543,TRUE,FALSE,TRUE,TRUE,TRUE +409611,6cc3ee6f-53e0-4867-8ebc-f846241c813d,TRUE,TRUE,FALSE,TRUE,TRUE +575004,646fcfc5-72d4-41ab-8595-74cfaee7eaae,TRUE,TRUE,TRUE,TRUE,TRUE +613055,e775fd20-67e3-40a6-ac0b-b7182a89acd0,FALSE,FALSE,FALSE,TRUE,TRUE +95304,951e4ac8-ac20-4d12-ac31-d3a1b205e06a,FALSE,TRUE,TRUE,TRUE,FALSE +440611,a29e1e6f-a419-46e1-b85f-af8b607c77e1,FALSE,FALSE,FALSE,TRUE,FALSE +198916,b8a955a1-3c75-4428-9af2-b081e1209f3a,FALSE,TRUE,TRUE,FALSE,TRUE +676296,702ad9a0-7aa4-4c32-968b-c9c61623d71a,TRUE,TRUE,TRUE,TRUE,FALSE +117344,ed9bf09e-b3dd-44ad-8d54-b8f9372fe002,FALSE,TRUE,TRUE,FALSE,FALSE +486264,ffff7b94-0e5d-4f01-a008-ddb21674bd03,FALSE,FALSE,FALSE,TRUE,FALSE +216051,da7bdb1c-688f-49e5-9a49-0ec65c2f2d08,TRUE,FALSE,TRUE,TRUE,TRUE +804209,bea55182-b650-4c4f-9983-568745f7e96a,TRUE,TRUE,FALSE,TRUE,FALSE +31651,a33cca26-60e6-4744-a1c6-589e4aeb012b,TRUE,TRUE,TRUE,FALSE,TRUE +324048,c645867d-6e80-4ac4-a375-9103340a327d,FALSE,TRUE,TRUE,TRUE,FALSE http://git-wip-us.apache.org/repos/asf/hive/blob/67fecf3f/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java index fca671c..e3fb8fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerInfo.java @@ -212,6 +212,18 @@ public class ExprWalkerInfo implements NodeProcessorCtx { return false; } + public boolean hasNonFinalCandidates() { + if (nonFinalPreds == null || nonFinalPreds.isEmpty()) { + return false; + } + for (List exprs : nonFinalPreds.values()) { + if (!exprs.isEmpty()) { + return true; + } + } + return false; + } + /** * Adds the specified expr as a non-final candidate * http://git-wip-us.apache.org/repos/asf/hive/blob/67fecf3f/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index fbd6739..e067ff3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -47,6 +47,8 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.ppd.ExprWalkerInfo.ExprInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Expression factory for predicate pushdown processing. Each processor @@ -372,7 +374,8 @@ public final class ExprWalkerProcFactory { ExprInfo exprInfo = ctx.getExprInfo(expr); if (exprInfo != null && exprInfo.isCandidate) { - ctx.addFinalCandidate(exprInfo.alias, expr); + ctx.addFinalCandidate(exprInfo.alias, exprInfo.convertedExpr != null ? + exprInfo.convertedExpr : expr); return; } else if (!FunctionRegistry.isOpAnd(expr) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEPPDREMOVEDUPLICATEFILTERS)) { http://git-wip-us.apache.org/repos/asf/hive/blob/67fecf3f/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java index dbd021b..9988dca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/OpProcFactory.java @@ -386,6 +386,8 @@ public final class OpProcFactory { TableScanOperator tsOp = (TableScanOperator) nd; mergeWithChildrenPred(tsOp, owi, null, null); ExprWalkerInfo pushDownPreds = owi.getPrunedPreds(tsOp); + // nonFinalCandidates predicates should be empty + assert pushDownPreds == null || !pushDownPreds.hasNonFinalCandidates(); return createFilter(tsOp, pushDownPreds, owi); } http://git-wip-us.apache.org/repos/asf/hive/blob/67fecf3f/ql/src/test/queries/clientpositive/filter_cond_pushdown2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/filter_cond_pushdown2.q b/ql/src/test/queries/clientpositive/filter_cond_pushdown2.q new file mode 100644 index 0000000..4234834 --- /dev/null +++ b/ql/src/test/queries/clientpositive/filter_cond_pushdown2.q @@ -0,0 +1,71 @@ +set hive.cbo.enable=false; + +drop table if exists users_table; +CREATE TABLE users_table( + `field_1` int, + `field_2` string, + `field_3` boolean, + `field_4` boolean, + `field_5` boolean, + `field_6` boolean, + `field_7` boolean) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +STORED AS TEXTFILE; +load data local inpath '../../data/files/small_csv.csv' into table users_table; + +explain +with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1; + +with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1; http://git-wip-us.apache.org/repos/asf/hive/blob/67fecf3f/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out b/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out new file mode 100644 index 0000000..13e17e8 --- /dev/null +++ b/ql/src/test/results/clientpositive/filter_cond_pushdown2.q.out @@ -0,0 +1,344 @@ +PREHOOK: query: drop table if exists users_table +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists users_table +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE users_table( + `field_1` int, + `field_2` string, + `field_3` boolean, + `field_4` boolean, + `field_5` boolean, + `field_6` boolean, + `field_7` boolean) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@users_table +POSTHOOK: query: CREATE TABLE users_table( + `field_1` int, + `field_2` string, + `field_3` boolean, + `field_4` boolean, + `field_5` boolean, + `field_6` boolean, + `field_7` boolean) +ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@users_table +PREHOOK: query: load data local inpath '../../data/files/small_csv.csv' into table users_table +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@users_table +POSTHOOK: query: load data local inpath '../../data/files/small_csv.csv' into table users_table +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@users_table +PREHOOK: query: explain +with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: users_table + Statistics: Num rows: 21 Data size: 2280 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: field_3 (type: boolean) + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: field_1 (type: int), field_2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 542 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 5 Data size: 542 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 5 Data size: 542 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int), _col1 (type: string) + 1 _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 5 Data size: 596 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), sum(if(_col3 is not null, 1, 0)) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 596 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 596 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 238 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 2 Data size: 238 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 238 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 238 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: users_table + Statistics: Num rows: 21 Data size: 2280 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: field_4 (type: boolean) + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: field_1 (type: int), field_2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 10 Data size: 1085 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 542 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@users_table +#### A masked pattern was here #### +POSTHOOK: query: with all_hits as ( +select * from users_table +), +all_exposed_users as ( +select distinct +field_1, +field_2 +from all_hits +where field_3 +), +interacted as ( +select distinct +field_1, +field_2 +from all_hits +where field_4 +) +select +all_exposed_users.field_1, +count(*) as nr_exposed, +sum(if(interacted.field_2 is not null, 1, 0)) as nr_interacted +from all_exposed_users +left outer join interacted +on all_exposed_users.field_1 = interacted.field_1 +and all_exposed_users.field_2 = interacted.field_2 +group by all_exposed_users.field_1 +order by all_exposed_users.field_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@users_table +#### A masked pattern was here #### +31651 1 1 +216051 1 0 +239413 1 1 +252028 1 0 +269414 1 0 +300632 1 1 +301813 1 0 +409611 1 1 +438598 1 0 +453386 1 0 +474061 1 1 +537646 1 0 +575004 1 1 +676296 1 1 +715496 1 1 +750354 1 0 +804209 1 1 +807477 1 0