hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pv...@apache.org
Subject hive git commit: HIVE-16845: INSERT OVERWRITE a table with dynamic partitions on S3 fails with NPE (Marta Kuczora, reviewed by Sahil Takiar)
Date Thu, 03 Aug 2017 10:16:57 GMT
Repository: hive
Updated Branches:
  refs/heads/master 889a60a44 -> 8df9f6551


HIVE-16845: INSERT OVERWRITE a table with dynamic partitions on S3 fails with NPE (Marta Kuczora,
reviewed by Sahil Takiar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8df9f655
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8df9f655
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8df9f655

Branch: refs/heads/master
Commit: 8df9f65511746eddf26e7a77eb7312faa67d99d5
Parents: 889a60a
Author: Peter Vary <pvary@cloudera.com>
Authored: Thu Aug 3 12:15:44 2017 +0200
Committer: Peter Vary <pvary@cloudera.com>
Committed: Thu Aug 3 12:15:44 2017 +0200

----------------------------------------------------------------------
 ...rt_overwrite_dynamic_partitions_merge_move.q |  37 +++++
 ...rt_overwrite_dynamic_partitions_merge_only.q |  37 +++++
 ...ert_overwrite_dynamic_partitions_move_only.q |  37 +++++
 ...verwrite_dynamic_partitions_merge_move.q.out | 138 +++++++++++++++++
 ...verwrite_dynamic_partitions_merge_only.q.out | 128 ++++++++++++++++
 ...overwrite_dynamic_partitions_move_only.q.out | 148 +++++++++++++++++++
 .../ql/plan/ConditionalResolverMergeFiles.java  |  11 +-
 7 files changed, 535 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q
b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q
new file mode 100644
index 0000000..44360b0
--- /dev/null
+++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q
@@ -0,0 +1,37 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SET hive.blobstore.optimizations.enabled=true;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET mapreduce.input.fileinputformat.split.maxsize=10;
+SET hive.merge.mapfiles=true;
+set hive.optimize.sort.dynamic.partition=false;
+
+CREATE TABLE tmp_table_merge_move (id string, name string, dt string, pid int);
+
+INSERT INTO tmp_table_merge_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000),
('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10002);
+
+CREATE EXTERNAL TABLE s3_table_merge_move (user_id string, event_name string) PARTITIONED
BY (reported_date string, product_id int) LOCATION '${hiveconf:test.blobstore.path.unique}/s3_table_merge_move/';
+
+INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge_move t;
+
+select * from s3_table_merge_move order by user_id;
+
+SET hive.blobstore.optimizations.enabled=false;
+
+INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge_move t;
+
+select * from s3_table_merge_move order by user_id;
+
+DROP TABLE s3_table_merge_move;
+DROP TABLE tmp_table_merge_move;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q
b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q
new file mode 100644
index 0000000..25562d9
--- /dev/null
+++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q
@@ -0,0 +1,37 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SET hive.blobstore.optimizations.enabled=true;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET mapreduce.input.fileinputformat.split.maxsize=10;
+SET hive.merge.mapfiles=true;
+set hive.optimize.sort.dynamic.partition=false;
+
+CREATE TABLE tmp_table_merge (id string, name string, dt string, pid int);
+
+INSERT INTO tmp_table_merge values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000),
('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10001);
+
+CREATE EXTERNAL TABLE s3_table_merge (user_id string, event_name string) PARTITIONED BY (reported_date
string, product_id int) LOCATION '${hiveconf:test.blobstore.path.unique}/s3_table_merge/';
+
+INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge t;
+
+select * from s3_table_merge order by user_id;
+
+SET hive.blobstore.optimizations.enabled=false;
+
+INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge t;
+
+select * from s3_table_merge order by user_id;
+
+DROP TABLE s3_table_merge;
+DROP TABLE tmp_table_merge;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q
b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q
new file mode 100644
index 0000000..cb1a32b
--- /dev/null
+++ b/itests/hive-blobstore/src/test/queries/clientpositive/insert_overwrite_dynamic_partitions_move_only.q
@@ -0,0 +1,37 @@
+set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat;
+SET hive.blobstore.optimizations.enabled=true;
+SET hive.exec.dynamic.partition.mode=nonstrict;
+SET mapreduce.input.fileinputformat.split.maxsize=10;
+SET hive.merge.mapfiles=true;
+set hive.optimize.sort.dynamic.partition=false;
+
+CREATE TABLE tmp_table_move (id string, name string, dt string, pid int);
+
+INSERT INTO tmp_table_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10001),
('u3','name3','2017-04-10',10002), ('u4','name4','2017-04-12',10001), ('u5','name5','2017-04-12',10002);
+
+CREATE EXTERNAL TABLE s3_table_move (user_id string, event_name string) PARTITIONED BY (reported_date
string, product_id int) LOCATION '${hiveconf:test.blobstore.path.unique}/s3_table_move/';
+
+INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_move t;
+
+select * from s3_table_move order by user_id;
+
+SET hive.blobstore.optimizations.enabled=false;
+
+INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_move t;
+
+select * from s3_table_move order by user_id;
+
+DROP TABLE s3_table_move;
+DROP TABLE tmp_table_move;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out
b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out
new file mode 100644
index 0000000..bfebad6
--- /dev/null
+++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_move.q.out
@@ -0,0 +1,138 @@
+PREHOOK: query: CREATE TABLE tmp_table_merge_move (id string, name string, dt string, pid
int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_table_merge_move
+POSTHOOK: query: CREATE TABLE tmp_table_merge_move (id string, name string, dt string, pid
int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_table_merge_move
+PREHOOK: query: INSERT INTO tmp_table_merge_move values ('u1','name1','2017-04-10',10000),
('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001),
('u5','name5','2017-04-10',10002)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tmp_table_merge_move
+POSTHOOK: query: INSERT INTO tmp_table_merge_move values ('u1','name1','2017-04-10',10000),
('u2','name2','2017-04-10',10000), ('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001),
('u5','name5','2017-04-10',10002)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tmp_table_merge_move
+POSTHOOK: Lineage: tmp_table_merge_move.dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_merge_move.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_merge_move.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_merge_move.pid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4,
type:string, comment:), ]
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/s3_table_merge_move
+PREHOOK: Output: database:default
+PREHOOK: Output: default@s3_table_merge_move
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/s3_table_merge_move
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@s3_table_merge_move
+PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge_move t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_table_merge_move
+PREHOOK: Output: default@s3_table_merge_move
+POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge_move t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_table_merge_move
+POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).event_name
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).user_id
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).event_name
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).user_id
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).event_name
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).user_id
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ]
+PREHOOK: query: select * from s3_table_merge_move order by user_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s3_table_merge_move
+PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000
+PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001
+PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002
+#### A masked pattern was here ####
+POSTHOOK: query: select * from s3_table_merge_move order by user_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s3_table_merge_move
+POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002
+#### A masked pattern was here ####
+u1	name1	2017-04-10	10000
+u2	name2	2017-04-10	10000
+u3	name3	2017-04-10	10000
+u4	name4	2017-04-10	10001
+u5	name5	2017-04-10	10002
+PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge_move t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_table_merge_move
+PREHOOK: Output: default@s3_table_merge_move
+POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge_move t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_table_merge_move
+POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Output: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).event_name
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10000).user_id
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).event_name
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10001).user_id
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).event_name
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge_move PARTITION(reported_date=2017-04-10,product_id=10002).user_id
SIMPLE [(tmp_table_merge_move)t.FieldSchema(name:id, type:string, comment:null), ]
+PREHOOK: query: select * from s3_table_merge_move order by user_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s3_table_merge_move
+PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000
+PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001
+PREHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002
+#### A masked pattern was here ####
+POSTHOOK: query: select * from s3_table_merge_move order by user_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s3_table_merge_move
+POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Input: default@s3_table_merge_move@reported_date=2017-04-10/product_id=10002
+#### A masked pattern was here ####
+u1	name1	2017-04-10	10000
+u2	name2	2017-04-10	10000
+u3	name3	2017-04-10	10000
+u4	name4	2017-04-10	10001
+u5	name5	2017-04-10	10002
+PREHOOK: query: DROP TABLE s3_table_merge_move
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@s3_table_merge_move
+PREHOOK: Output: default@s3_table_merge_move
+POSTHOOK: query: DROP TABLE s3_table_merge_move
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@s3_table_merge_move
+POSTHOOK: Output: default@s3_table_merge_move
+PREHOOK: query: DROP TABLE tmp_table_merge_move
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tmp_table_merge_move
+PREHOOK: Output: default@tmp_table_merge_move
+POSTHOOK: query: DROP TABLE tmp_table_merge_move
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tmp_table_merge_move
+POSTHOOK: Output: default@tmp_table_merge_move

http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out
b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out
new file mode 100644
index 0000000..1bffae3
--- /dev/null
+++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_merge_only.q.out
@@ -0,0 +1,128 @@
+PREHOOK: query: CREATE TABLE tmp_table_merge (id string, name string, dt string, pid int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_table_merge
+POSTHOOK: query: CREATE TABLE tmp_table_merge (id string, name string, dt string, pid int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_table_merge
+PREHOOK: query: INSERT INTO tmp_table_merge values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000),
('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10001)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tmp_table_merge
+POSTHOOK: query: INSERT INTO tmp_table_merge values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10000),
('u3','name3','2017-04-10',10000), ('u4','name4','2017-04-10',10001), ('u5','name5','2017-04-10',10001)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tmp_table_merge
+POSTHOOK: Lineage: tmp_table_merge.dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_merge.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_merge.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_merge.pid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4,
type:string, comment:), ]
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/s3_table_merge
+PREHOOK: Output: database:default
+PREHOOK: Output: default@s3_table_merge
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/s3_table_merge
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@s3_table_merge
+PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_table_merge
+PREHOOK: Output: default@s3_table_merge
+POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_table_merge
+POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).event_name
SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).user_id
SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).event_name
SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).user_id
SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ]
+PREHOOK: query: select * from s3_table_merge order by user_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s3_table_merge
+PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000
+PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001
+#### A masked pattern was here ####
+POSTHOOK: query: select * from s3_table_merge order by user_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s3_table_merge
+POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001
+#### A masked pattern was here ####
+u1	name1	2017-04-10	10000
+u2	name2	2017-04-10	10000
+u3	name3	2017-04-10	10000
+u4	name4	2017-04-10	10001
+u5	name5	2017-04-10	10001
+PREHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_table_merge
+PREHOOK: Output: default@s3_table_merge
+POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_merge PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_merge t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_table_merge
+POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Output: default@s3_table_merge@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).event_name
SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10000).user_id
SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).event_name
SIMPLE [(tmp_table_merge)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_merge PARTITION(reported_date=2017-04-10,product_id=10001).user_id
SIMPLE [(tmp_table_merge)t.FieldSchema(name:id, type:string, comment:null), ]
+PREHOOK: query: select * from s3_table_merge order by user_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s3_table_merge
+PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000
+PREHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001
+#### A masked pattern was here ####
+POSTHOOK: query: select * from s3_table_merge order by user_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s3_table_merge
+POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Input: default@s3_table_merge@reported_date=2017-04-10/product_id=10001
+#### A masked pattern was here ####
+u1	name1	2017-04-10	10000
+u2	name2	2017-04-10	10000
+u3	name3	2017-04-10	10000
+u4	name4	2017-04-10	10001
+u5	name5	2017-04-10	10001
+PREHOOK: query: DROP TABLE s3_table_merge
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@s3_table_merge
+PREHOOK: Output: default@s3_table_merge
+POSTHOOK: query: DROP TABLE s3_table_merge
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@s3_table_merge
+POSTHOOK: Output: default@s3_table_merge
+PREHOOK: query: DROP TABLE tmp_table_merge
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tmp_table_merge
+PREHOOK: Output: default@tmp_table_merge
+POSTHOOK: query: DROP TABLE tmp_table_merge
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tmp_table_merge
+POSTHOOK: Output: default@tmp_table_merge

http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out
----------------------------------------------------------------------
diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out
b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out
new file mode 100644
index 0000000..530c036
--- /dev/null
+++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions_move_only.q.out
@@ -0,0 +1,148 @@
+PREHOOK: query: CREATE TABLE tmp_table_move (id string, name string, dt string, pid int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tmp_table_move
+POSTHOOK: query: CREATE TABLE tmp_table_move (id string, name string, dt string, pid int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tmp_table_move
+PREHOOK: query: INSERT INTO tmp_table_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10001),
('u3','name3','2017-04-10',10002), ('u4','name4','2017-04-12',10001), ('u5','name5','2017-04-12',10002)
+PREHOOK: type: QUERY
+PREHOOK: Output: default@tmp_table_move
+POSTHOOK: query: INSERT INTO tmp_table_move values ('u1','name1','2017-04-10',10000), ('u2','name2','2017-04-10',10001),
('u3','name3','2017-04-10',10002), ('u4','name4','2017-04-12',10001), ('u5','name5','2017-04-12',10002)
+POSTHOOK: type: QUERY
+POSTHOOK: Output: default@tmp_table_move
+POSTHOOK: Lineage: tmp_table_move.dt SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_move.id SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_move.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2,
type:string, comment:), ]
+POSTHOOK: Lineage: tmp_table_move.pid EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4,
type:string, comment:), ]
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+PREHOOK: Input: ### test.blobstore.path ###/s3_table_move
+PREHOOK: Output: database:default
+PREHOOK: Output: default@s3_table_move
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Input: ### test.blobstore.path ###/s3_table_move
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@s3_table_move
+PREHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_move t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_table_move
+PREHOOK: Output: default@s3_table_move
+POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_move t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_table_move
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10002
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10001
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10002
+PREHOOK: query: select * from s3_table_move order by user_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s3_table_move
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002
+#### A masked pattern was here ####
+POSTHOOK: query: select * from s3_table_move order by user_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s3_table_move
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002
+#### A masked pattern was here ####
+u1	name1	2017-04-10	10000
+u2	name2	2017-04-10	10001
+u3	name3	2017-04-10	10002
+u4	name4	2017-04-12	10001
+u5	name5	2017-04-12	10002
+PREHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_move t
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tmp_table_move
+PREHOOK: Output: default@s3_table_move
+POSTHOOK: query: INSERT OVERWRITE TABLE s3_table_move PARTITION (reported_date, product_id)
+SELECT
+  t.id as user_id,
+  t.name as event_name,
+  t.dt as reported_date,
+  t.pid as product_id
+FROM tmp_table_move t
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tmp_table_move
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-10/product_id=10002
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10001
+POSTHOOK: Output: default@s3_table_move@reported_date=2017-04-12/product_id=10002
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10000).event_name
SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10000).user_id
SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10001).event_name
SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10001).user_id
SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10002).event_name
SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-10,product_id=10002).user_id
SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10001).event_name
SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10001).user_id
SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10002).event_name
SIMPLE [(tmp_table_move)t.FieldSchema(name:name, type:string, comment:null), ]
+POSTHOOK: Lineage: s3_table_move PARTITION(reported_date=2017-04-12,product_id=10002).user_id
SIMPLE [(tmp_table_move)t.FieldSchema(name:id, type:string, comment:null), ]
+PREHOOK: query: select * from s3_table_move order by user_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@s3_table_move
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001
+PREHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002
+#### A masked pattern was here ####
+POSTHOOK: query: select * from s3_table_move order by user_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@s3_table_move
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10000
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10001
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-10/product_id=10002
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10001
+POSTHOOK: Input: default@s3_table_move@reported_date=2017-04-12/product_id=10002
+#### A masked pattern was here ####
+u1	name1	2017-04-10	10000
+u2	name2	2017-04-10	10001
+u3	name3	2017-04-10	10002
+u4	name4	2017-04-12	10001
+u5	name5	2017-04-12	10002
+PREHOOK: query: DROP TABLE s3_table_move
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@s3_table_move
+PREHOOK: Output: default@s3_table_move
+POSTHOOK: query: DROP TABLE s3_table_move
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@s3_table_move
+POSTHOOK: Output: default@s3_table_move
+PREHOOK: query: DROP TABLE tmp_table_move
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tmp_table_move
+PREHOOK: Output: default@tmp_table_move
+POSTHOOK: query: DROP TABLE tmp_table_move
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tmp_table_move
+POSTHOOK: Output: default@tmp_table_move

http://git-wip-us.apache.org/repos/asf/hive/blob/8df9f655/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
index 4266569..b454a2a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java
@@ -284,7 +284,16 @@ public class ConditionalResolverMergeFiles implements ConditionalResolver,
         // make the MoveTask as the child of the MR Task
         resTsks.add(mrAndMvTask);
 
-        MoveWork mvWork = (MoveWork) mvTask.getWork();
+        // Originally the mvTask and the child move task of the mrAndMvTask contain the same
+        // MoveWork object.
+        // If the blobstore optimizations are on and the input/output paths are merged
+        // in the move only MoveWork, the mvTask and the child move task of the mrAndMvTask
+        // will contain different MoveWork objects, which causes problems.
+        // Not just in this case, but also in general the child move task of the mrAndMvTask
should
+        // be used, because that is the correct move task for the "merge and move" use case.
+        Task<? extends Serializable> mergeAndMoveMoveTask = mrAndMvTask.getChildTasks().get(0);
+        MoveWork mvWork = (MoveWork) mergeAndMoveMoveTask.getWork();
+
         LoadFileDesc lfd = mvWork.getLoadFileWork();
 
         Path targetDir = lfd.getTargetDir();


Mime
View raw message