Return-Path: Delivered-To: apmail-hadoop-hive-commits-archive@minotaur.apache.org Received: (qmail 36693 invoked from network); 10 Nov 2009 21:32:40 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 10 Nov 2009 21:32:40 -0000 Received: (qmail 70984 invoked by uid 500); 10 Nov 2009 21:32:40 -0000 Delivered-To: apmail-hadoop-hive-commits-archive@hadoop.apache.org Received: (qmail 70963 invoked by uid 500); 10 Nov 2009 21:32:40 -0000 Mailing-List: contact hive-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hadoop.apache.org Delivered-To: mailing list hive-commits@hadoop.apache.org Received: (qmail 70933 invoked by uid 99); 10 Nov 2009 21:32:36 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 10 Nov 2009 21:32:36 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 10 Nov 2009 21:32:33 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id A093323888DC; Tue, 10 Nov 2009 21:32:11 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r834678 - in /hadoop/hive/trunk: CHANGES.txt ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java ql/src/test/queries/clientpositive/join38.q ql/src/test/results/clientpositive/join38.q.out Date: Tue, 10 Nov 2009 21:32:11 -0000 To: hive-commits@hadoop.apache.org From: namit@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20091110213211.A093323888DC@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: namit Date: Tue Nov 10 21:32:10 2009 New Revision: 834678 URL: http://svn.apache.org/viewvc?rev=834678&view=rev Log: HIVE-921 MapJoin schema reordering (Ning Zhang via namit) Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out Modified: hadoop/hive/trunk/CHANGES.txt hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java Modified: hadoop/hive/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=834678&r1=834677&r2=834678&view=diff ============================================================================== --- hadoop/hive/trunk/CHANGES.txt (original) +++ hadoop/hive/trunk/CHANGES.txt Tue Nov 10 21:32:10 2009 @@ -252,6 +252,8 @@ HIVE-804 Support deletion of partitions based on a prefix partition spefication (Zheng Shao via namit) + HIVE-921 MapJoin schema reordering (Ning Zhang via namit) + Release 0.4.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java?rev=834678&r1=834677&r2=834678&view=diff ============================================================================== --- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java (original) +++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinFactory.java Tue Nov 10 21:32:10 2009 @@ -199,8 +199,8 @@ Task mjTask = TaskFactory.get(mjPlan, parseCtx.getConf()); tableDesc tt_desc = - PlanUtils.getIntermediateFileTableDesc(PlanUtils.sortFieldSchemas( - PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol"))); + PlanUtils.getIntermediateFileTableDesc( + PlanUtils.getFieldSchemasFromRowSchema(mapJoin.getSchema(), "temporarycol")); // generate the temporary file Context baseCtx = parseCtx.getContext(); @@ -239,7 +239,7 @@ return null; } } - + /** * MapJoin followed by MapJoin */ Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q?rev=834678&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q (added) +++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/join38.q Tue Nov 10 21:32:10 2009 @@ -0,0 +1,20 @@ +drop table tmp; + +create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string); + +insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100; + +select * from tmp; + +explain +FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5; + +FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5; + +drop table tmp; Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out?rev=834678&view=auto ============================================================================== --- hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out (added) +++ hadoop/hive/trunk/ql/src/test/results/clientpositive/join38.q.out Tue Nov 10 21:32:10 2009 @@ -0,0 +1,207 @@ +PREHOOK: query: drop table tmp +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tmp +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tmp(col0 string, col1 string,col2 string,col3 string,col4 string,col5 string,col6 string,col7 string,col8 string,col9 string,col10 string,col11 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tmp +PREHOOK: query: insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp +POSTHOOK: query: insert overwrite table tmp select key, cast(key + 1 as int), key +2, key+3, key+4, cast(key+5 as int), key+6, key+7, key+8, key+9, key+10, cast(key+11 as int) from src where key = 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp +PREHOOK: query: select * from tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp +PREHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1628655450/10000 +POSTHOOK: query: select * from tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp +POSTHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1628655450/10000 +100 101 102.0 103.0 104.0 105 106.0 107.0 108.0 109.0 110.0 111 +100 101 102.0 103.0 104.0 105 106.0 107.0 108.0 109.0 110.0 111 +PREHOOK: query: explain +FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +PREHOOK: type: QUERY +POSTHOOK: query: explain +FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src a) (TOK_TABREF tmp b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (col11 = 111) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 {col5} {col11} + keys: + 0 [Column[key]] + 1 [Column[col11]] + outputColumnNames: _col1, _col7, _col13 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 {col5} {col11} + keys: + 0 [Column[key]] + 1 [Column[col11]] + outputColumnNames: _col1, _col7, _col13 + Position of Big Table: 1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/2083729567/10002 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col7 + type: string + expr: _col13 + type: string + outputColumnNames: _col1, _col7, _col13 + Filter Operator + predicate: + expr: (_col13 = 111) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col7 + type: string + outputColumnNames: _col1, _col7 + Group By Operator + aggregations: + expr: count(1) + keys: + expr: _col1 + type: string + expr: _col7 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1273936990/10000 +POSTHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11) +SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count +where b.col11 = 111 +group by a.value, b.col5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/nzhang/work/876/apache-hive/build/ql/tmp/1273936990/10000 +val_111 105 2 +PREHOOK: query: drop table tmp +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tmp +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@tmp