hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jd...@apache.org
Subject hive git commit: HIVE-17813: hive.exec.move.files.from.source.dir does not work with partitioned tables (Jason Dere, reviewed by Ashutosh Chauhan)
Date Tue, 17 Oct 2017 18:01:08 GMT
Repository: hive
Updated Branches:
  refs/heads/master e881f2ea2 -> 2139ef601


HIVE-17813: hive.exec.move.files.from.source.dir does not work with partitioned tables (Jason
Dere, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2139ef60
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2139ef60
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2139ef60

Branch: refs/heads/master
Commit: 2139ef601b91d2982acd25ed1450ab3bda0dbc49
Parents: e881f2e
Author: Jason Dere <jdere@hortonworks.com>
Authored: Tue Oct 17 11:00:25 2017 -0700
Committer: Jason Dere <jdere@hortonworks.com>
Committed: Tue Oct 17 11:00:25 2017 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/exec/Utilities.java   |   5 +
 .../insert_with_move_files_from_source_dir.q    |  21 +++
 ...insert_with_move_files_from_source_dir.q.out | 138 +++++++++++++++++++
 3 files changed, 164 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2139ef60/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 6110145..5214688 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -1181,6 +1181,11 @@ public final class Utilities {
     for (FileStatus file : files) {
       if (filesToMove.contains(file.getPath())) {
         Utilities.moveFile(fs, file, dst);
+      } else if (file.isDir()) {
+        // Traverse directory contents.
+        // Directory nesting for dst needs to match src.
+        Path nestedDstPath = new Path(dst, file.getPath().getName());
+        Utilities.moveSpecifiedFiles(fs, file.getPath(), nestedDstPath, filesToMove);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/2139ef60/ql/src/test/queries/clientpositive/insert_with_move_files_from_source_dir.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/insert_with_move_files_from_source_dir.q b/ql/src/test/queries/clientpositive/insert_with_move_files_from_source_dir.q
new file mode 100644
index 0000000..0117755
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/insert_with_move_files_from_source_dir.q
@@ -0,0 +1,21 @@
+
+set hive.exec.move.files.from.source.dir=true;
+set hive.enforce.bucketing=true;
+set hive.exec.dynamic.partition=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+create table emp1 (id int, name string, dept int, country string) row format delimited fields
terminated by '|' stored as textfile;
+load data local inpath '../../data/files/employee_part.txt' overwrite into table emp1;
+select * from emp1 order by id;
+
+-- Testing inserts with hive.exec.move.files.from.source.dir=true
+-- inserts into non-partitioned/non-bucketed table
+create table emp2 (id int, name string, dept int, country string) stored as textfile;
+insert overwrite table emp2 select * from emp1;
+select * from emp2 order by id;
+
+-- inserts into partitioned/bucketed table
+create table emp1_part_bucket (id int, name string) partitioned by (dept int, country string)
clustered by (id) into 4 buckets;
+insert overwrite table emp1_part_bucket partition (dept, country) select * from emp1;
+show partitions emp1_part_bucket;
+select * from emp1_part_bucket order by id;

http://git-wip-us.apache.org/repos/asf/hive/blob/2139ef60/ql/src/test/results/clientpositive/insert_with_move_files_from_source_dir.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_with_move_files_from_source_dir.q.out
b/ql/src/test/results/clientpositive/insert_with_move_files_from_source_dir.q.out
new file mode 100644
index 0000000..b55cdfa
--- /dev/null
+++ b/ql/src/test/results/clientpositive/insert_with_move_files_from_source_dir.q.out
@@ -0,0 +1,138 @@
+PREHOOK: query: create table emp1 (id int, name string, dept int, country string) row format
delimited fields terminated by '|' stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp1
+POSTHOOK: query: create table emp1 (id int, name string, dept int, country string) row format
delimited fields terminated by '|' stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp1
+PREHOOK: query: load data local inpath '../../data/files/employee_part.txt' overwrite into
table emp1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@emp1
+POSTHOOK: query: load data local inpath '../../data/files/employee_part.txt' overwrite into
table emp1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@emp1
+PREHOOK: query: select * from emp1 order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emp1 order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp1
+#### A masked pattern was here ####
+16	john	4000	USA
+17	robert	2000	USA
+18	andrew	4000	USA
+19	katty	2000	USA
+27	edward	4000	UK
+29	alan	3000	UK
+31	kerry	4000	UK
+34	tom	3000	UK
+35	zack	2000	UK
+PREHOOK: query: create table emp2 (id int, name string, dept int, country string) stored
as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp2
+POSTHOOK: query: create table emp2 (id int, name string, dept int, country string) stored
as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp2
+PREHOOK: query: insert overwrite table emp2 select * from emp1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp1
+PREHOOK: Output: default@emp2
+POSTHOOK: query: insert overwrite table emp2 select * from emp1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp1
+POSTHOOK: Output: default@emp2
+POSTHOOK: Lineage: emp2.country SIMPLE [(emp1)emp1.FieldSchema(name:country, type:string,
comment:null), ]
+POSTHOOK: Lineage: emp2.dept SIMPLE [(emp1)emp1.FieldSchema(name:dept, type:int, comment:null),
]
+POSTHOOK: Lineage: emp2.id SIMPLE [(emp1)emp1.FieldSchema(name:id, type:int, comment:null),
]
+POSTHOOK: Lineage: emp2.name SIMPLE [(emp1)emp1.FieldSchema(name:name, type:string, comment:null),
]
+PREHOOK: query: select * from emp2 order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emp2 order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp2
+#### A masked pattern was here ####
+16	john	4000	USA
+17	robert	2000	USA
+18	andrew	4000	USA
+19	katty	2000	USA
+27	edward	4000	UK
+29	alan	3000	UK
+31	kerry	4000	UK
+34	tom	3000	UK
+35	zack	2000	UK
+PREHOOK: query: create table emp1_part_bucket (id int, name string) partitioned by (dept
int, country string) clustered by (id) into 4 buckets
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp1_part_bucket
+POSTHOOK: query: create table emp1_part_bucket (id int, name string) partitioned by (dept
int, country string) clustered by (id) into 4 buckets
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@emp1_part_bucket
+PREHOOK: query: insert overwrite table emp1_part_bucket partition (dept, country) select
* from emp1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp1
+PREHOOK: Output: default@emp1_part_bucket
+POSTHOOK: query: insert overwrite table emp1_part_bucket partition (dept, country) select
* from emp1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp1
+POSTHOOK: Output: default@emp1_part_bucket@dept=2000/country=UK
+POSTHOOK: Output: default@emp1_part_bucket@dept=2000/country=USA
+POSTHOOK: Output: default@emp1_part_bucket@dept=3000/country=UK
+POSTHOOK: Output: default@emp1_part_bucket@dept=4000/country=UK
+POSTHOOK: Output: default@emp1_part_bucket@dept=4000/country=USA
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=2000,country=UK).id SIMPLE [(emp1)emp1.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=2000,country=UK).name SIMPLE [(emp1)emp1.FieldSchema(name:name,
type:string, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=2000,country=USA).id SIMPLE [(emp1)emp1.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=2000,country=USA).name SIMPLE [(emp1)emp1.FieldSchema(name:name,
type:string, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=3000,country=UK).id SIMPLE [(emp1)emp1.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=3000,country=UK).name SIMPLE [(emp1)emp1.FieldSchema(name:name,
type:string, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=4000,country=UK).id SIMPLE [(emp1)emp1.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=4000,country=UK).name SIMPLE [(emp1)emp1.FieldSchema(name:name,
type:string, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=4000,country=USA).id SIMPLE [(emp1)emp1.FieldSchema(name:id,
type:int, comment:null), ]
+POSTHOOK: Lineage: emp1_part_bucket PARTITION(dept=4000,country=USA).name SIMPLE [(emp1)emp1.FieldSchema(name:name,
type:string, comment:null), ]
+PREHOOK: query: show partitions emp1_part_bucket
+PREHOOK: type: SHOWPARTITIONS
+PREHOOK: Input: default@emp1_part_bucket
+POSTHOOK: query: show partitions emp1_part_bucket
+POSTHOOK: type: SHOWPARTITIONS
+POSTHOOK: Input: default@emp1_part_bucket
+dept=2000/country=UK
+dept=2000/country=USA
+dept=3000/country=UK
+dept=4000/country=UK
+dept=4000/country=USA
+PREHOOK: query: select * from emp1_part_bucket order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@emp1_part_bucket
+PREHOOK: Input: default@emp1_part_bucket@dept=2000/country=UK
+PREHOOK: Input: default@emp1_part_bucket@dept=2000/country=USA
+PREHOOK: Input: default@emp1_part_bucket@dept=3000/country=UK
+PREHOOK: Input: default@emp1_part_bucket@dept=4000/country=UK
+PREHOOK: Input: default@emp1_part_bucket@dept=4000/country=USA
+#### A masked pattern was here ####
+POSTHOOK: query: select * from emp1_part_bucket order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@emp1_part_bucket
+POSTHOOK: Input: default@emp1_part_bucket@dept=2000/country=UK
+POSTHOOK: Input: default@emp1_part_bucket@dept=2000/country=USA
+POSTHOOK: Input: default@emp1_part_bucket@dept=3000/country=UK
+POSTHOOK: Input: default@emp1_part_bucket@dept=4000/country=UK
+POSTHOOK: Input: default@emp1_part_bucket@dept=4000/country=USA
+#### A masked pattern was here ####
+16	john	4000	USA
+17	robert	2000	USA
+18	andrew	4000	USA
+19	katty	2000	USA
+27	edward	4000	UK
+29	alan	3000	UK
+31	kerry	4000	UK
+34	tom	3000	UK
+35	zack	2000	UK


Mime
View raw message