Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id CCA5DDCAA for ; Mon, 20 May 2013 23:37:06 +0000 (UTC) Received: (qmail 61348 invoked by uid 500); 20 May 2013 23:37:07 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 61266 invoked by uid 500); 20 May 2013 23:37:06 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 61254 invoked by uid 99); 20 May 2013 23:37:06 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 20 May 2013 23:37:06 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 20 May 2013 23:37:00 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 93C502388906; Mon, 20 May 2013 23:36:38 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1484623 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/optimizer/ java/org/apache/hadoop/hive/ql/optimizer/physical/ test/queries/clientpositive/ test/results/clientpositive/ Date: Mon, 20 May 2013 23:36:38 -0000 To: commits@hive.apache.org From: omalley@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130520233638.93C502388906@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: omalley Date: Mon May 20 23:36:37 2013 New Revision: 1484623 URL: http://svn.apache.org/r1484623 Log: HIVE-4521 Auto join conversion fails in certain cases (Gunther Hagleitner via omalley) Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java?rev=1484623&r1=1484622&r2=1484623&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractBucketJoinProc.java Mon May 20 23:36:37 2013 @@ -303,7 +303,7 @@ abstract public class AbstractBucketJoin // The number of files for the table should be same as number of buckets. int bucketCount = p.getBucketCount(); - if (fileNames.size() != bucketCount) { + if (fileNames.size() != 0 && fileNames.size() != bucketCount) { String msg = "The number of buckets for table " + tbl.getTableName() + " partition " + p.getName() + " is " + p.getBucketCount() + ", whereas the number of files is " + fileNames.size(); @@ -333,7 +333,7 @@ abstract public class AbstractBucketJoin Integer num = new Integer(tbl.getNumBuckets()); // The number of files for the table should be same as number of buckets. - if (fileNames.size() != num) { + if (fileNames.size() != 0 && fileNames.size() != num) { String msg = "The number of buckets for table " + tbl.getTableName() + " is " + tbl.getNumBuckets() + ", whereas the number of files is " + fileNames.size(); Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java?rev=1484623&r1=1484622&r2=1484623&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.java Mon May 20 23:36:37 2013 @@ -48,7 +48,7 @@ public class AvgPartitionSizeBasedBigTab public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) throws SemanticException { int bigTablePos = 0; - long maxSize = 0; + long maxSize = -1; int numPartitionsCurrentBigTable = 0; // number of partitions for the chosen big table HiveConf conf = parseCtx.getConf(); @@ -79,7 +79,7 @@ public class AvgPartitionSizeBasedBigTab for (Partition part : partsList.getNotDeniedPartns()) { totalSize += getSize(conf, part); } - averageSize = totalSize/numPartitions; + averageSize = numPartitions == 0 ? 0 : totalSize/numPartitions; } if (averageSize > maxSize) { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java?rev=1484623&r1=1484622&r2=1484623&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/TableSizeBasedBigTableSelectorForAutoSMJ.java Mon May 20 23:36:37 2013 @@ -41,7 +41,7 @@ implements BigTableSelectorForAutoSMJ { public int getBigTablePosition(ParseContext parseCtx, JoinOperator joinOp) throws SemanticException { int bigTablePos = 0; - long maxSize = 0; + long maxSize = -1; HiveConf conf = parseCtx.getConf(); try { Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java?rev=1484623&r1=1484622&r2=1484623&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/CommonJoinTaskDispatcher.java Mon May 20 23:36:37 2013 @@ -466,7 +466,7 @@ public class CommonJoinTaskDispatcher ex HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); boolean bigTableFound = false; - long largestBigTableCandidateSize = 0; + long largestBigTableCandidateSize = -1; long sumTableSizes = 0; for (String alias : aliasToWork.keySet()) { int tablePosition = getPosition(currWork, joinOp, alias); Added: hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q?rev=1484623&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/auto_join32.q Mon May 20 23:36:37 2013 @@ -0,0 +1,78 @@ +set hive.auto.convert.join=true; + +-- empty tables +create table studenttab10k (name string, age int, gpa double); +create table votertab10k (name string, age int, registration string, contributions float); + +explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name; + +set hive.optimize.bucketmapjoin=true; +set hive.optimize.bucketmapjoin.sortedmerge=true; +set hive.auto.convert.sortmerge.join=true; + +-- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets; +create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets; + +explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb; +load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb; +load data local inpath '../data/files/empty1.txt' into table votertab10k_smb; +load data local inpath '../data/files/empty2.txt' into table votertab10k_smb; + +explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name; + +-- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets; +create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets; + +load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo'); +load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo'); +load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo'); +load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo'); + +explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name; + +select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name; + +drop table studenttab10k; +drop table votertab10k; +drop table studenttab10k_smb; +drop table votertab10k_smb; +drop table studenttab10k_part; +drop table votertab10k_part; \ No newline at end of file Added: hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out?rev=1484623&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/auto_join32.q.out Mon May 20 23:36:37 2013 @@ -0,0 +1,633 @@ +PREHOOK: query: -- empty tables +create table studenttab10k (name string, age int, gpa double) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- empty tables +create table studenttab10k (name string, age int, gpa double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table votertab10k (name string, age int, registration string, contributions float) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@votertab10k +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k) s) (TOK_TABREF (TOK_TABNAME votertab10k) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-4 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + s + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + s + TableScan + alias: s + HashTable Sink Operator + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + Position of Big Table: 1 + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + v + TableScan + alias: v + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col7 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: string + outputColumnNames: _col0, _col7 + Group By Operator + aggregations: + expr: count(DISTINCT _col7) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k +PREHOOK: Input: default@votertab10k +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k s join votertab10k v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Input: default@votertab10k +#### A masked pattern was here #### +PREHOOK: query: -- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- smb +create table studenttab10k_smb (name string, age int, gpa double) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table votertab10k_smb (name string, age int, registration string, contributions float) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF (TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: string + outputColumnNames: _col0, _col7 + Group By Operator + aggregations: + expr: count(DISTINCT _col7) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_smb +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_smb) s) (TOK_TABREF (TOK_TABNAME votertab10k_smb) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col7 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col7 + type: string + outputColumnNames: _col0, _col7 + Group By Operator + aggregations: + expr: count(DISTINCT _col7) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_smb s join votertab10k_smb v +on (s.name = v.name) +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Input: default@votertab10k_smb +#### A masked pattern was here #### +PREHOOK: query: -- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- smb + partitions +create table studenttab10k_part (name string, age int, gpa double) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table votertab10k_part (name string, age int, registration string, contributions float) partitioned by (p string) clustered by (name) sorted by (name) into 2 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@votertab10k_part +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@studenttab10k_part@p=foo +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table studenttab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@studenttab10k_part@p=foo +PREHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: load data local inpath '../data/files/empty1.txt' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo') +PREHOOK: type: LOAD +PREHOOK: Output: default@votertab10k_part@p=foo +POSTHOOK: query: load data local inpath '../data/files/empty2.txt' into table votertab10k_part partition (p='foo') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@votertab10k_part@p=foo +PREHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +POSTHOOK: query: explain select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME studenttab10k_part) s) (TOK_TABREF (TOK_TABNAME votertab10k_part) v) (= (. (TOK_TABLE_OR_COL s) name) (. (TOK_TABLE_OR_COL v) name)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s) name)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL registration)))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL s) p) 'bar') (= (. (TOK_TABLE_OR_COL v) p) 'bar'))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s) name)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s + TableScan + alias: s + Filter Operator + predicate: + expr: (p = 'bar') + type: boolean + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {name} + 1 {registration} + handleSkewJoin: false + keys: + 0 [Column[name]] + 1 [Column[name]] + outputColumnNames: _col0, _col8 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col8 + type: string + outputColumnNames: _col0, _col8 + Group By Operator + aggregations: + expr: count(DISTINCT _col8) + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col8 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +PREHOOK: type: QUERY +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +POSTHOOK: query: select s.name, count(distinct registration) +from studenttab10k_part s join votertab10k_part v +on (s.name = v.name) +where s.p = 'bar' +and v.p = 'bar' +group by s.name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Input: default@votertab10k_part +#### A masked pattern was here #### +PREHOOK: query: drop table studenttab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k +PREHOOK: Output: default@studenttab10k +POSTHOOK: query: drop table studenttab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k +POSTHOOK: Output: default@studenttab10k +PREHOOK: query: drop table votertab10k +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k +PREHOOK: Output: default@votertab10k +POSTHOOK: query: drop table votertab10k +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k +POSTHOOK: Output: default@votertab10k +PREHOOK: query: drop table studenttab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_smb +PREHOOK: Output: default@studenttab10k_smb +POSTHOOK: query: drop table studenttab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_smb +POSTHOOK: Output: default@studenttab10k_smb +PREHOOK: query: drop table votertab10k_smb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_smb +PREHOOK: Output: default@votertab10k_smb +POSTHOOK: query: drop table votertab10k_smb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_smb +POSTHOOK: Output: default@votertab10k_smb +PREHOOK: query: drop table studenttab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@studenttab10k_part +PREHOOK: Output: default@studenttab10k_part +POSTHOOK: query: drop table studenttab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@studenttab10k_part +POSTHOOK: Output: default@studenttab10k_part +PREHOOK: query: drop table votertab10k_part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@votertab10k_part +PREHOOK: Output: default@votertab10k_part +POSTHOOK: query: drop table votertab10k_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@votertab10k_part +POSTHOOK: Output: default@votertab10k_part