hive-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From 范宜臻 (Jira) <j...@apache.org>
Subject [jira] [Updated] (HIVE-23438) Missing Rows When Left Outer Join In N-way HybridGraceHashJoin
Date Mon, 11 May 2020 14:31:00 GMT

     [ https://issues.apache.org/jira/browse/HIVE-23438?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

范宜臻 updated HIVE-23438:
-----------------------
    Description: 
*STEP 1. Create test data(q_test_init_tez.sql)*
{code:java}
//create table src1
CREATE TABLE src1 (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS
TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv3.txt" INTO TABLE src1;

//create table src2
CREATE TABLE src2(key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS
TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv11.txt" OVERWRITE INTO TABLE src2;

//create table srcpart
CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT 'default')
PARTITIONED BY (ds STRING, hr STRING)
STORED AS TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="11");

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="12");

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="11");

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="12");{code}
*STEP 2. Run query*
{code:java}
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000000;
set hive.cbo.enable=false;
set hive.mapjoin.hybridgrace.hashtable=true;

select *
from
(
select key from src1 group by key
) x
left join src2 z on x.key = z.key
join
(
select key from srcpart y group by key
) y on y.key = x.key;

{code}

  was:
*STEP 1. Create test data(q_test_init_tez.sql)*
{code:java}
//create table src1
CREATE TABLE src1 (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS
TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv3.txt" INTO TABLE src1;

//create table src2
CREATE TABLE src2(key STRING COMMENT 'default', value STRING COMMENT 'default') STORED AS
TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv11.txt" OVERWRITE INTO TABLE src2;

//create table srcpart
CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT 'default')
PARTITIONED BY (ds STRING, hr STRING)
STORED AS TEXTFILE;

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="11");

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="12");

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="11");

LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="12");{code}
*STEP 2. Run query*
{code:java}
set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000000;
set hive.cbo.enable=false;

{code}


> Missing Rows When Left Outer Join In N-way HybridGraceHashJoin
> --------------------------------------------------------------
>
>                 Key: HIVE-23438
>                 URL: https://issues.apache.org/jira/browse/HIVE-23438
>             Project: Hive
>          Issue Type: Bug
>          Components: SQL, Tez
>    Affects Versions: 2.3.4
>            Reporter: 范宜臻
>            Priority: Major
>         Attachments: HIVE-23438.patch
>
>
> *STEP 1. Create test data(q_test_init_tez.sql)*
> {code:java}
> //create table src1
> CREATE TABLE src1 (key STRING COMMENT 'default', value STRING COMMENT 'default') STORED
AS TEXTFILE;
> LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv3.txt" INTO TABLE src1;
> //create table src2
> CREATE TABLE src2(key STRING COMMENT 'default', value STRING COMMENT 'default') STORED
AS TEXTFILE;
> LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv11.txt" OVERWRITE INTO TABLE src2;
> //create table srcpart
> CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT 'default')
> PARTITIONED BY (ds STRING, hr STRING)
> STORED AS TEXTFILE;
> LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
> OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="11");
> LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
> OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-08", hr="12");
> LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
> OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="11");
> LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt"
> OVERWRITE INTO TABLE srcpart PARTITION (ds="2008-04-09", hr="12");{code}
> *STEP 2. Run query*
> {code:java}
> set hive.auto.convert.join=true;
> set hive.auto.convert.join.noconditionaltask=true;
> set hive.auto.convert.join.noconditionaltask.size=10000000;
> set hive.cbo.enable=false;
> set hive.mapjoin.hybridgrace.hashtable=true;
> select *
> from
> (
> select key from src1 group by key
> ) x
> left join src2 z on x.key = z.key
> join
> (
> select key from srcpart y group by key
> ) y on y.key = x.key;
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Mime
View raw message