hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "LuGuangMing (Jira)" <j...@apache.org>
Subject [jira] [Updated] (HIVE-22198) Execute unoin-all with childs Join in parallel
Date Fri, 20 Sep 2019 06:43:00 GMT

     [ https://issues.apache.org/jira/browse/HIVE-22198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

LuGuangMing updated HIVE-22198:
-------------------------------
    Description: 
set parallel is true, set skewjoin is false, set auto convert join is false. run a unoin all,
There is nothing error message, but some result data is missing, details check attatchment 

create table tab1(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;
 create table tab2(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;
 create table tab3(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;
 create table tab4(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;

insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde');
 insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde');
 insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde');
 insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde');

set hive.auto.convert.join=false;
 set hive.optimize.skewjoin=true;
 set hive.exec.parallel=true;

SELECT sum(1) as a 
 FROM tab1 t1 
 INNER JOIN tab2 t2 
 ON t1.com = t2.com
 UNION ALL
 SELECT sum(1) as a 
 FROM tab3 t3 
 INNER JOIN tab4 t4 
 ON t3.com = t4.com;

create table test_parallel stored as orcfile as 
 SELECT sum(1) as a 
 FROM tab1 t1 
 INNER JOIN tab2 t2 
 ON t1.com = t2.com
 UNION ALL
 SELECT sum(1) as a 
 FROM tab3 t3 
 INNER JOIN tab4 t4 
 ON t3.com = t4.com;

select * from test_parallel;

The result data should be two, but only one.

  was:
set parallel is true, set skewjoin is false, set auto convert join is false. run a unoin all,
There is nothing error message, but some result data is missing, details check attatchment
sql file

create table tab1(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;
 create table tab2(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;
 create table tab3(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;
 create table tab4(tid int, com string) row format delimited fields terminated by '\t' stored
as textfile;

insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde');
 insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde');
 insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde');
 insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde');

set hive.auto.convert.join=false;
 set hive.optimize.skewjoin=true;
 set hive.exec.parallel=true;

SELECT sum(1) as a 
 FROM tab1 t1 
 INNER JOIN tab2 t2 
 ON t1.com = t2.com
 UNION ALL
 SELECT sum(1) as a 
 FROM tab3 t3 
 INNER JOIN tab4 t4 
 ON t3.com = t4.com;

create table test_parallel stored as orcfile as 
 SELECT sum(1) as a 
 FROM tab1 t1 
 INNER JOIN tab2 t2 
 ON t1.com = t2.com
 UNION ALL
 SELECT sum(1) as a 
 FROM tab3 t3 
 INNER JOIN tab4 t4 
 ON t3.com = t4.com;

select * from test_parallel;

The result data should be two, but only one.


> Execute unoin-all with childs Join in parallel
> ----------------------------------------------
>
>                 Key: HIVE-22198
>                 URL: https://issues.apache.org/jira/browse/HIVE-22198
>             Project: Hive
>          Issue Type: Bug
>    Affects Versions: 1.2.0, 3.0.0, 3.1.0
>            Reporter: LuGuangMing
>            Assignee: LuGuangMing
>            Priority: Major
>         Attachments: image-2019-09-20-11-38-37-433.png, image-2019-09-20-11-39-30-347.png,
test-parallel.sql
>
>
> set parallel is true, set skewjoin is false, set auto convert join is false. run a unoin
all, There is nothing error message, but some result data is missing, details check attatchment 
> create table tab1(tid int, com string) row format delimited fields terminated by '\t'
stored as textfile;
>  create table tab2(tid int, com string) row format delimited fields terminated by '\t'
stored as textfile;
>  create table tab3(tid int, com string) row format delimited fields terminated by '\t'
stored as textfile;
>  create table tab4(tid int, com string) row format delimited fields terminated by '\t'
stored as textfile;
> insert into tab1 values(1,'abc'),(2,'bcd'),(3,'cde');
>  insert into tab2 values(1,'abc'),(2,'bcd'),(3,'cde');
>  insert into tab3 values(1,'abc'),(2,'bcd'),(3,'cde');
>  insert into tab4 values(1,'abc'),(2,'bcd'),(3,'cde');
> set hive.auto.convert.join=false;
>  set hive.optimize.skewjoin=true;
>  set hive.exec.parallel=true;
> SELECT sum(1) as a 
>  FROM tab1 t1 
>  INNER JOIN tab2 t2 
>  ON t1.com = t2.com
>  UNION ALL
>  SELECT sum(1) as a 
>  FROM tab3 t3 
>  INNER JOIN tab4 t4 
>  ON t3.com = t4.com;
> create table test_parallel stored as orcfile as 
>  SELECT sum(1) as a 
>  FROM tab1 t1 
>  INNER JOIN tab2 t2 
>  ON t1.com = t2.com
>  UNION ALL
>  SELECT sum(1) as a 
>  FROM tab3 t3 
>  INNER JOIN tab4 t4 
>  ON t3.com = t4.com;
> select * from test_parallel;
> The result data should be two, but only one.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Mime
View raw message