hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lil...@apache.org
Subject [2/2] incubator-hawq git commit: HAWQ-1061. Fix data loss when file locations include directories, check policy and bucketnum in all mode.
Date Tue, 20 Sep 2016 07:57:06 GMT
HAWQ-1061. Fix data loss when file locations include directories, check policy and bucketnum
in all mode.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/8954090c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/8954090c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/8954090c

Branch: refs/heads/master
Commit: 8954090c29a770889c3e3269e14bd4bdaa6926aa
Parents: a683b5c
Author: xunzhang <xunzhangthu@gmail.com>
Authored: Mon Sep 19 17:42:22 2016 +0800
Committer: Lili Ma <ictmalili@gmail.com>
Committed: Tue Sep 20 15:56:49 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqregister | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8954090c/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 153ea9d..2b9b343 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -375,14 +375,24 @@ class HawqRegister(object):
                 set_yml_dataa('AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'],
params['AO_FileLocations'], params['Bucketnum'], partitionby, partitions_constraint,\
                               partitions_name, partitions_compression_level, partitions_compression_type,
partitions_checksum, partitions_filepaths, partitions_filesizes, encoding)
                 
+        def check_file_not_folder():
+            for fn in self.files:
+                hdfscmd = 'hdfs dfs -test -f %s' % fn
+                if local_ssh(hdfscmd, logger):
+                    logger.info('%s is not a file in hdfs, please check the yaml configuration
file.' % fn)
+                    sys.exit(1)
+
         if self.yml:
             option_parser_yml(options.yml_config)
             self.filepath = self.files[0][:self.files[0].rfind('/')] if self.files else ''
-            check_distribution_policy()
+            check_file_not_folder()
             check_database_encoding()
             if self.mode != 'force' and self.mode != 'repair':
                 if not create_table():
                     self.mode = 'second_exist'
+            check_bucket_number()
+            check_distribution_policy()
+            check_policy_consistency()
         else:
             self.file_format = 'Parquet'
             check_hash_type() # Usage1 only support randomly distributed table
@@ -400,8 +410,6 @@ class HawqRegister(object):
             if self.tabledir.strip('/') != self.filepath.strip('/'):
                 logger.error("In repair mode, file path from yaml file should be the same
with table's path.")
                 sys.exit(1)
-            check_policy_consistency()
-            check_bucket_number()
             existed_files, existed_sizes = self._get_files_in_hdfs(self.filepath)
             existed_info = {}
             for k, fn in enumerate(existed_files):


Mime
View raw message