hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1498150 - in /hive/trunk: build-common.xml ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java ql/src/test/queries/clientpositive/leftsemijoin_mr.q ql/src/test/results/clientpositive/leftsemijoin_mr.q.out
Date Sun, 30 Jun 2013 16:44:58 GMT
Author: hashutosh
Date: Sun Jun 30 16:44:58 2013
New Revision: 1498150

URL: http://svn.apache.org/r1498150
Log:
HIVE-4781 : LEFT SEMI JOIN generates wrong results when the number of rows belonging to a
single key of the right table exceed hive.join.emit.interval (Yin Huai via Ashutosh Chauhan)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
    hive/trunk/ql/src/test/results/clientpositive/leftsemijoin_mr.q.out
Modified:
    hive/trunk/build-common.xml
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java

Modified: hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hive/trunk/build-common.xml?rev=1498150&r1=1498149&r2=1498150&view=diff
==============================================================================
--- hive/trunk/build-common.xml (original)
+++ hive/trunk/build-common.xml Sun Jun 30 16:44:58 2013
@@ -59,7 +59,7 @@
   <property name="test.output" value="true"/>
   <property name="test.junit.output.format" value="xml"/>
   <property name="test.junit.output.usefile" value="true"/>
-  <property name="minimr.query.files" value="list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,schemeAuthority.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q"/>
+  <property name="minimr.query.files" value="list_bucket_dml_10.q,input16_cc.q,scriptfile1.q,bucket4.q,bucketmapjoin6.q,disable_merge_for_bucketing.q,reduce_deduplicate.q,smb_mapjoin_8.q,join1.q,groupby2.q,bucketizedhiveinputformat.q,bucketmapjoin7.q,optrstat_groupby.q,bucket_num_reducers.q,bucket5.q,load_fs2.q,bucket_num_reducers2.q,infer_bucket_sort_merge.q,infer_bucket_sort_reducers_power_two.q,infer_bucket_sort_dyn_part.q,infer_bucket_sort_bucketed_table.q,infer_bucket_sort_map_operators.q,infer_bucket_sort_num_buckets.q,leftsemijoin_mr.q,schemeAuthority.q,truncate_column_buckets.q,remote_script.q,,load_hdfs_file_with_space_in_the_name.q,parallel_orderby.q"/>
   <property name="minimr.query.negative.files" value="cluster_tasklog_retrieval.q,minimr_broken_pipe.q,mapreduce_stack_trace.q,mapreduce_stack_trace_turnoff.q,mapreduce_stack_trace_hadoop20.q,mapreduce_stack_trace_turnoff_hadoop20.q"
/>
   <property name="test.silent" value="true"/>
   <property name="hadoopVersion" value="${hadoop.version.ant-internal}"/>

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java?rev=1498150&r1=1498149&r2=1498150&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java Sun Jun 30 16:44:58
2013
@@ -93,7 +93,8 @@ public class JoinOperator extends Common
           .toString());
       List keyObject = (List) soi.getStructFieldData(row, sf);
       // Are we consuming too much memory
-      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag
>= 0)) {
+      if (alias == numAliases - 1 && !(handleSkewJoin && skewJoinKeyContext.currBigKeyTag
>= 0) &&
+          !hasLeftSemiJoin) {
         if (sz == joinEmitInterval) {
           // The input is sorted by alias, so if we are already in the last join
           // operand,

Added: hive/trunk/ql/src/test/queries/clientpositive/leftsemijoin_mr.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/leftsemijoin_mr.q?rev=1498150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/leftsemijoin_mr.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/leftsemijoin_mr.q Sun Jun 30 16:44:58 2013
@@ -0,0 +1,20 @@
+CREATE TABLE T1(key INT);
+LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE T1;
+CREATE TABLE T2(key INT);
+LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE T2;
+
+-- Run this query using TestMinimrCliDriver
+
+SELECT * FROM T1;
+SELECT * FROM T2;
+
+set hive.auto.convert.join=false;
+set mapred.reduce.tasks=2;
+
+set hive.join.emit.interval=100;
+
+SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key);
+
+set hive.join.emit.interval=1;
+
+SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp ON (T1.key=tmp.key);

Added: hive/trunk/ql/src/test/results/clientpositive/leftsemijoin_mr.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/leftsemijoin_mr.q.out?rev=1498150&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/leftsemijoin_mr.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/leftsemijoin_mr.q.out Sun Jun 30 16:44:58
2013
@@ -0,0 +1,88 @@
+PREHOOK: query: CREATE TABLE T1(key INT)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE
T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t1.txt' INTO TABLE
T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: CREATE TABLE T2(key INT)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T2(key INT)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE
T2
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/leftsemijoin_mr_t2.txt' INTO TABLE
T2
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t2
+PREHOOK: query: -- Run this query using TestMinimrCliDriver
+
+SELECT * FROM T1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: -- Run this query using TestMinimrCliDriver
+
+SELECT * FROM T1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+1
+1
+PREHOOK: query: SELECT * FROM T2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM T2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp
ON (T1.key=tmp.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp
ON (T1.key=tmp.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+1
+1
+PREHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp
ON (T1.key=tmp.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT T1.key FROM T1 LEFT SEMI JOIN (SELECT key FROM T2 SORT BY key) tmp
ON (T1.key=tmp.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+#### A masked pattern was here ####
+1
+1



Mime
View raw message