hive-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Chris Kudelka (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (HIVE-8068) Dynamic partition insert overwrite does not overwrite files, but instead appends.
Date Fri, 12 Sep 2014 03:58:34 GMT

     [ https://issues.apache.org/jira/browse/HIVE-8068?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Chris Kudelka updated HIVE-8068:
--------------------------------
    Description: 
-- using a reference table `one_row` with contents:
||dummy_field||
|dummy_value|

-- create test table
create table if not exists test_table (
  line string
) partitioned by (
  my_part string
)

-- run first time
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20,
rawDataSize=1]

select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
2 rows

----
-- compare to non-dynamic partition insert, which overwrites as expected
----

-- drop table and recreate with sme definition
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]
select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
1 row

  was:
-- using a reference table `one_row` with contents:
||dummy_field||
|dummy_value|

{{-- create test table}}
create table if not exists test_table (
  line string
) partitioned by (
  my_part string
)

-- run first time
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20,
rawDataSize=1]

select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
2 rows

----
-- compare to non-dynamic partition insert, which overwrites as expected
----

-- drop table and recreate with sme definition
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]

-- run again
insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]
select * from test_table where my_part = 'partVal';

Expected result:
1 row

Actual result:
1 row


> Dynamic partition insert overwrite does not overwrite files, but instead appends.
> ---------------------------------------------------------------------------------
>
>                 Key: HIVE-8068
>                 URL: https://issues.apache.org/jira/browse/HIVE-8068
>             Project: Hive
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 0.13.1
>         Environment: Centos 6.2; Amazon S3 as DFS
>            Reporter: Chris Kudelka
>
> -- using a reference table `one_row` with contents:
> ||dummy_field||
> |dummy_value|
> -- create test table
> create table if not exists test_table (
>   line string
> ) partitioned by (
>   my_part string
> )
> -- run first time
> insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]
> -- run again
> insert overwrite table test_table partition (my_part) select 'a', 'partVal' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=2, numRows=1, totalSize=20,
rawDataSize=1]
> select * from test_table where my_part = 'partVal';
> Expected result:
> 1 row
> Actual result:
> 2 rows
> ----
> -- compare to non-dynamic partition insert, which overwrites as expected
> ----
> -- drop table and recreate with sme definition
> insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]
> -- run again
> insert overwrite table test_table partition (my_part='partVal') select 'a' from one_row;
> Partition test_db.test_table{my_part=partVal} stats: [numFiles=1, numRows=1, totalSize=10,
rawDataSize=1]
> select * from test_table where my_part = 'partVal';
> Expected result:
> 1 row
> Actual result:
> 1 row



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message