hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lil...@apache.org
Subject [3/3] incubator-hawq git commit: HAWQ-778. Refine hawq register, add sanity checks.
Date Tue, 07 Jun 2016 03:04:13 GMT
HAWQ-778. Refine hawq register, add sanity checks.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/3e3f93dd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/3e3f93dd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/3e3f93dd

Branch: refs/heads/master
Commit: 3e3f93dd1c0ddb92d4971ba0c88264c739973c31
Parents: 355c437
Author: Yancheng Luo <yluo@pivotal.io>
Authored: Fri Jun 3 14:48:33 2016 +0800
Committer: Lili Ma <ictmalili@gmail.com>
Committed: Tue Jun 7 11:03:56 2016 +0800

----------------------------------------------------------------------
 src/test/feature/ManagementTool/test_hawq.paq   | Bin 657 -> 0 bytes
 .../ManagementTool/test_hawq_register.cpp       |  78 ++++++----
 .../ManagementTool/test_hawq_register_hawq.paq  | Bin 0 -> 657 bytes
 .../ManagementTool/test_hawq_register_hive.paq  | Bin 0 -> 212 bytes
 .../ManagementTool/test_hawq_register_not_paq   | Bin 0 -> 48 bytes
 src/test/feature/ManagementTool/test_hive.paq   | Bin 212 -> 0 bytes
 src/test/feature/ManagementTool/test_not_paq    | Bin 48 -> 0 bytes
 tools/bin/hawqpylib/HAWQ_HELP.py                |   1 +
 tools/bin/hawqregister                          | 144 ++++++++++++-------
 tools/doc/hawqregister_help                     |  15 +-
 10 files changed, 148 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_hawq.paq
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq.paq b/src/test/feature/ManagementTool/test_hawq.paq
deleted file mode 100644
index f2adb4b..0000000
Binary files a/src/test/feature/ManagementTool/test_hawq.paq and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index 328c19d..2efbd84 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -19,15 +19,15 @@ class TestHawqRegister : public ::testing::Test {
 TEST_F(TestHawqRegister, TestSingleHawqFile) {
 	SQLUtility util;
 	string rootPath(util.getTestRootPath());
-	string relativePath("/testhawqregister/test_hawq.paq");
+	string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
 	string filePath = rootPath + relativePath;
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " /hawq_register_hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
 
 	util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_hawq.paq"));
 
 	util.query("select * from hawqregister;", 3);
 	util.execute("insert into hawqregister values(1);");
@@ -38,15 +38,15 @@ TEST_F(TestHawqRegister, TestSingleHawqFile) {
 TEST_F(TestHawqRegister, TestSingleHiveFile) {
 	SQLUtility util;
 	string rootPath(util.getTestRootPath());
-	string relativePath("/testhawqregister/test_hive.paq");
+	string relativePath("/ManagementTool/test_hawq_register_hive.paq");
 	string filePath = rootPath + relativePath;
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " /hawq_register_hive.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_hive.paq"));
 
 	util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_hive.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_hive.paq"));
 
 	util.query("select * from hawqregister;", 1);
 	util.execute("insert into hawqregister values(1);");
@@ -57,83 +57,83 @@ TEST_F(TestHawqRegister, TestSingleHiveFile) {
 TEST_F(TestHawqRegister, TestFiles) {
 	SQLUtility util;
 	string rootPath(util.getTestRootPath());
-	string relativePath("/testhawqregister/test_hawq.paq");
+	string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
 	string filePath1 = rootPath + relativePath;
-	relativePath = "/testhawqregister/test_hive.paq";
+	relativePath = "/ManagementTool/test_hawq_register_hive.paq";
 	string filePath2 = rootPath + relativePath;
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -mkdir -p /hawq_register_test/t/t"));
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath1 + " /hawq_register_test/hawq1.paq"));
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath1 + " /hawq_register_test/hawq2.paq"));
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath1 + " /hawq_register_test/t/hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -mkdir -p hdfs://localhost:8020/hawq_register_test/t/t"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath1 + " hdfs://localhost:8020/hawq_register_test/hawq1.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath1 + " hdfs://localhost:8020/hawq_register_test/hawq2.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath1 + " hdfs://localhost:8020/hawq_register_test/t/hawq.paq"));
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath2 + " /hawq_register_test/hive1.paq"));
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath2 + " /hawq_register_test/hive2.paq"));
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath2 + " /hawq_register_test/t/hive.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath2 + " hdfs://localhost:8020/hawq_register_test/hive1.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath2 + " hdfs://localhost:8020/hawq_register_test/hive2.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath2 + " hdfs://localhost:8020/hawq_register_test/t/hive.paq"));
 
 	util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_test"));
+	EXPECT_EQ(0, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_test"));
 
 	util.query("select * from hawqregister;", 12);
 	util.execute("insert into hawqregister values(1);");
 	util.query("select * from hawqregister;", 13);
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm -r /hawq_register_test"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm -r hdfs://localhost:8020/hawq_register_test"));
 	util.execute("drop table hawqregister;");
 }
 
 TEST_F(TestHawqRegister, TestHashDistributedTable) {
 	SQLUtility util;
 	string rootPath(util.getTestRootPath());
-	string relativePath("/testhawqregister/test_hawq.paq");
+	string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
 	string filePath = rootPath + relativePath;
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " /hawq_register_hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
 
 	util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet)
distributed by (i);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_hawq.paq"));
+	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_hawq.paq"));
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm /hawq_register_hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
 	util.execute("drop table hawqregister;");
 }
 
 TEST_F(TestHawqRegister, TestNotParquetFile) {
 	SQLUtility util;
 	string rootPath(util.getTestRootPath());
-	string relativePath("/testhawqregister/test_not_paq");
+	string relativePath("/ManagementTool/test_hawq_register_not_paq");
 	string filePath = rootPath + relativePath;
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " /hawq_register_test_not_paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_test_not_paq"));
 
 	util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_test_not_paq"));
+	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_test_not_paq"));
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm /hawq_register_test_not_paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_test_not_paq"));
 	util.execute("drop table hawqregister;");
 }
 
 TEST_F(TestHawqRegister, TestNotParquetTable) {
 	SQLUtility util;
 	string rootPath(util.getTestRootPath());
-	string relativePath("/testhawqregister/test_hawq.paq");
+	string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
 	string filePath = rootPath + relativePath;
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " /hawq_register_hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
 
 	util.execute("create table hawqregister(i int);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_hawq.paq"));
+	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister hdfs://localhost:8020/hawq_register_hawq.paq"));
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm /hawq_register_hawq.paq"));
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
 	util.execute("drop table hawqregister;");
 }
 
@@ -143,8 +143,26 @@ TEST_F(TestHawqRegister, TestFileNotExist) {
 	util.execute("create table hawqregister(i int);");
 	util.query("select * from hawqregister;", 0);
 
-	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_file_not_exist"));
+	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister /hdfs://localhost:8020hawq_register_file_not_exist"));
+	util.query("select * from hawqregister;", 0);
+
+	util.execute("drop table hawqregister;");
+}
+
+TEST_F(TestHawqRegister, TestNotHDFSPath) {
+	SQLUtility util;
+	string rootPath(util.getTestRootPath());
+	string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
+	string filePath = rootPath + relativePath;
+
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
+
+	util.execute("create table hawqregister(i int);");
+	util.query("select * from hawqregister;", 0);
+
+	EXPECT_EQ(1, Command::getCommandStatus("hawq register postgres hawqregister /hawq_register_hawq.paq"));
 	util.query("select * from hawqregister;", 0);
 
+	EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
 	util.execute("drop table hawqregister;");
 }

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_hawq_register_hawq.paq
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register_hawq.paq b/src/test/feature/ManagementTool/test_hawq_register_hawq.paq
new file mode 100644
index 0000000..f2adb4b
Binary files /dev/null and b/src/test/feature/ManagementTool/test_hawq_register_hawq.paq differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_hawq_register_hive.paq
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register_hive.paq b/src/test/feature/ManagementTool/test_hawq_register_hive.paq
new file mode 100644
index 0000000..a356fc7
Binary files /dev/null and b/src/test/feature/ManagementTool/test_hawq_register_hive.paq differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_hawq_register_not_paq
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register_not_paq b/src/test/feature/ManagementTool/test_hawq_register_not_paq
new file mode 100644
index 0000000..dc75c44
Binary files /dev/null and b/src/test/feature/ManagementTool/test_hawq_register_not_paq differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_hive.paq
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hive.paq b/src/test/feature/ManagementTool/test_hive.paq
deleted file mode 100644
index a356fc7..0000000
Binary files a/src/test/feature/ManagementTool/test_hive.paq and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/src/test/feature/ManagementTool/test_not_paq
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_not_paq b/src/test/feature/ManagementTool/test_not_paq
deleted file mode 100644
index dc75c44..0000000
Binary files a/src/test/feature/ManagementTool/test_not_paq and /dev/null differ

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/tools/bin/hawqpylib/HAWQ_HELP.py
----------------------------------------------------------------------
diff --git a/tools/bin/hawqpylib/HAWQ_HELP.py b/tools/bin/hawqpylib/HAWQ_HELP.py
index e0d901f..b3a2dee 100755
--- a/tools/bin/hawqpylib/HAWQ_HELP.py
+++ b/tools/bin/hawqpylib/HAWQ_HELP.py
@@ -37,6 +37,7 @@ The most commonly used hawq "commands" are:
    ssh-exkeys    Exchanges SSH public keys between hosts.
    check         Verifies and validates HAWQ settings.
    checkperf     Verifies the baseline hardware performance of hosts.
+   register      Register parquet files generated by other system into the corrsponding table
in HAWQ
 
 See 'hawq <command> help' for more information on a specific command.
 """

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 876aed7..8e0e621 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -39,6 +39,7 @@ logger = get_default_logger()
 EXECNAME = os.path.split(__file__)[-1]
 setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
 
+
 def create_opt_parser(version):
     parser = OptParser(option_class=OptChecker,
                        usage='usage: %prog [options] database_name table_name file_or_dir_path_in_hdfs',
@@ -51,11 +52,20 @@ def create_opt_parser(version):
     return parser
 
 
+def check_hadoop_command():
+    hdfscmd = "hadoop"
+    result = local_ssh(hdfscmd);
+    if result != 0:
+        logger.error("command 'hadoop' is not available, please set environment variable
$PATH to fix this")
+        sys.exit(1)
+
+
 def get_seg_name(options, databasename, tablename):
     try:
         relfilenode = 0
         relname = ""
-        query = "select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class
as pg_class2 where pg_class1.relname ='%s' and  pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid
= pg_class2.oid;" % tablename
+        query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class
as pg_class2 where pg_class1.relname ='%s' "
+                 "and  pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;")
% tablename
         dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user,
dbname=databasename)
         conn = dbconn.connect(dburl, True)
         rows = dbconn.execSQL(conn, query)
@@ -103,6 +113,61 @@ def check_hash_type(options, databasename, tablename):
         sys.exit(1)
 
 
+def get_metadata_from_database(options, databasename, tablename, seg_name):
+    try:
+        query = "select segno from pg_aoseg.%s;" % seg_name
+        dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user,
dbname=databasename)
+        conn = dbconn.connect(dburl, False)
+        rows = dbconn.execSQL(conn, query)
+	conn.commit()
+        conn.close()
+
+    except DatabaseError, ex:
+        logger.error("Failed to connect to database, this script can only be run when the
database is up")
+        logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host,
options.port, options.user, databasename, query))
+        sys.exit(1)
+
+    firstsegno = rows.rowcount + 1
+
+    # get the full path of correspoding file for target table
+    try:
+        query = ("select location, gp_persistent_tablespace_node.tablespace_oid, database_oid,
relfilenode from pg_class, gp_persistent_relation_node, "
+                 "gp_persistent_tablespace_node, gp_persistent_filespace_node where relname
= '%s' and pg_class.relfilenode = "
+                 "gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid
= gp_persistent_tablespace_node.tablespace_oid "
+                 "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;")
% tablename
+        dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user,
dbname=databasename)
+        conn = dbconn.connect(dburl, False)
+        rows = dbconn.execSQL(conn, query)
+	conn.commit()
+        conn.close()
+
+    except DatabaseError, ex:
+        logger.error("Failed to connect to database, this script can only be run when the
database is up")
+        logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host,
options.port, options.user, databasename, query))
+        sys.exit(1)
+
+    for row in rows:
+        tabledir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3])
+ "/"
+
+    return firstsegno, tabledir
+
+
+def check_files_and_table_in_same_hdfs_cluster(filepath, tabledir):
+    # check whether the files to be registered is in hdfs
+    filesystem = filepath.split('://')
+    if filesystem[0] != 'hdfs':
+        logger.error("Only support to register file(s) in hdfs")
+        sys.exit(1)
+    fileroot = filepath.split('/')
+    tableroot = tabledir.split('/')
+    print fileroot
+    print tableroot
+    # check the root url of them. eg: for 'hdfs://localhost:8020/temp/tempfile', we check
'hdfs://localohst:8020'
+    if fileroot[0] != tableroot[0] or fileroot[1] != tableroot[1] or fileroot[2] != tableroot[2]:
+        logger.error("Files to be registered and the table are not in the same hdfs cluster.")
+        sys.exit(1)
+
+
 def get_files_in_hdfs(filepath):
     files = []
     sizes = []
@@ -143,48 +208,13 @@ def check_parquet_format(options, files):
             sys.exit(1)
 
 
-def get_metadata_from_database(options, databasename, seg_name):
-    try:
-        query = "select segno from pg_aoseg.%s;" % seg_name
-        dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user,
dbname=databasename)
-        conn = dbconn.connect(dburl, False)
-        rows = dbconn.execSQL(conn, query)
-	conn.commit()
-        conn.close()
-
-    except DatabaseError, ex:
-        logger.error("Failed to connect to database, this script can only be run when the
database is up")
-        logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host,
options.port, options.user, databasename, query))
-        sys.exit(1)
-
-    return rows.rowcount + 1
-
-
-def move_files_in_hdfs(options, databasename, tablename, files, firstsegno, normal):
-    # get the full path of correspoding file for target table
-    try:
-        query = "select location, gp_persistent_tablespace_node.tablespace_oid, database_oid,
relfilenode from pg_class, gp_persistent_relation_node, gp_persistent_tablespace_node, gp_persistent_filespace_node
where relname = '%s' and pg_class.relfilenode = gp_persistent_relation_node.relfilenode_oid
and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid
and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;"
% tablename
-        dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user,
dbname=databasename)
-        conn = dbconn.connect(dburl, False)
-        rows = dbconn.execSQL(conn, query)
-	conn.commit()
-        conn.close()
-
-    except DatabaseError, ex:
-        logger.error("Failed to connect to database, this script can only be run when the
database is up")
-        logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host,
options.port, options.user, databasename, query))
-        sys.exit(1)
-
-
+def move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabledir, normal):
     # move file(s) in src path into the folder correspoding to the target table
     if (normal == True):
-        for row in rows:
-            destdir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3])
+ "/"
-
         segno = firstsegno
         for file in files:
             srcfile = file
-            dstfile = destdir + str(segno)
+            dstfile = tabledir + str(segno)
             segno += 1
             if srcfile != dstfile:
                 hdfscmd = "hadoop fs -mv %s %s" % (srcfile, dstfile)
@@ -194,13 +224,10 @@ def move_files_in_hdfs(options, databasename, tablename, files, firstsegno,
norm
                     logger.error("Fail to move '%s' to '%s'" % (srcfile, dstfile))
                     sys.exit(1)
     else:
-        for row in rows:
-            srcdir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3])
+ "/"
-
         segno = firstsegno
         for file in files:
             dstfile = file
-            srcfile = srcdir + str(segno)
+            srcfile = tabledir + str(segno)
             segno += 1
             if srcfile != dstfile:
                 hdfscmd = "hadoop fs -mv %s %s" % (srcfile, dstfile)
@@ -211,7 +238,7 @@ def move_files_in_hdfs(options, databasename, tablename, files, firstsegno,
norm
                     sys.exit(1)
 
 
-def insert_metadata_into_database(options, databasename, tablename, seg_name, firstsegno,
eofs):
+def insert_metadata_into_database(options, databasename, tablename, seg_name, firstsegno,
tabledir, eofs):
     try:
         query = "SET allow_system_table_mods='dml';"
         segno = firstsegno
@@ -228,7 +255,7 @@ def insert_metadata_into_database(options, databasename, tablename, seg_name,
fi
     except DatabaseError, ex:
         logger.error("Failed to connect to database, this script can only be run when the
database is up")
         logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host,
options.port, options.user, databasename, query))
-	move_files_in_hdfs(options, databasename, tablename, files, firstsegno, False)
+	move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabledir, False)
 
         sys.exit(1)
 
@@ -245,28 +272,37 @@ def main(args=None):
     tablename = args[1]
     filepath = args[2]
 
-    # 1. get the seg_name from database
+    # 1. check whether the path of shell command 'hadoop' is set.
+    check_hadoop_command()
+
+    # 2. get the seg_name from database
     seg_name = get_seg_name(options, databasename, tablename)
 
-    # 2. check whether target table is hash-typed, in that case simple insertion does not
work
+    # 3. check whether target table is hash-typed, in that case simple insertion does not
work
     result = check_hash_type(options, databasename, tablename)
 
-    # 3. get all the files refered by 'filepath', which could be a file or a directory containing
all the files
+    # 4. get the metadata to be inserted from hdfs
+    firstsegno, tabledir = get_metadata_from_database(options, databasename, tablename, seg_name)
+
+    # 5. check whether all the files refered by 'filepath' and the location corresponding
to the table are in the same hdfs cluster
+    check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
+
+    # 6. get all the files refered by 'filepath', which could be a file or a directory containing
all the files
     files, sizes = get_files_in_hdfs(filepath)
     print "File(s) to be registered:"
     print files
 
-    # 4. check whether the file to be registered is parquet format
+    # 7. check whether the file to be registered is parquet format
     check_parquet_format(options, files)
 
-    # 5. get the metadata to be inserted from hdfs
-    firstsegno = get_metadata_from_database(options, databasename, seg_name)
+    # 8. move the file in hdfs to proper location
+    move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabledir, True)
 
-    # 6. move the file in hdfs to proper location
-    move_files_in_hdfs(options, databasename, tablename, files, firstsegno, True)
+    # 9. insert the metadata into database
+    insert_metadata_into_database(options, databasename, tablename, seg_name, firstsegno,
tabledir, sizes)
 
-    # 7. insert the metadata into database
-    insert_metadata_into_database(options, databasename, tablename, seg_name, firstsegno,
sizes)
+    # 10. report the final status of hawq register
+    logger.info("Hawq register succeed.")
 
 if __name__ == '__main__':
     sys.exit(main())

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3e3f93dd/tools/doc/hawqregister_help
----------------------------------------------------------------------
diff --git a/tools/doc/hawqregister_help b/tools/doc/hawqregister_help
index d5a5a0d..ade1e3a 100644
--- a/tools/doc/hawqregister_help
+++ b/tools/doc/hawqregister_help
@@ -30,6 +30,8 @@ User have to make sure that the meta data of the parquet file(s)
 and the table are consistent.
 The table to be registered into should not be hash distributed, which
 is created by using "distributed by" statement when creating that table.
+The file(s) to be registered and the table in HAWQ must be in the
+same HDFS cluster.
 
 *****************************************************
 Arguments
@@ -86,17 +88,18 @@ CONNECTION OPTIONS
 EXAMPLES
 *****************************************************
 
-Run "hawq register" to register a parquet file with path '/temp/hive.paq'
-in hdfs generated by hive into table 'parquet_table' in HAWQ, which is in the
-database named 'postgres'.
+Run "hawq register" to register a parquet file in HDFS with path
+'hdfs://localhost:8020/temp/hive.paq' generated by hive into table
+'parquet_table' in HAWQ, which is in the database named 'postgres'.
+
 Assume the location of the database is 'hdfs://localhost:8020/hawq_default',
 tablespace id is '16385', database id is '16387', table filenode id is '77160',
 last file under the filenode numbered '7'.
 
-$ hawq register postgres parquet_table /temp/hive.paq
+$ hawq register postgres parquet_table hdfs://localhost:8020/temp/hive.paq
 
-This will move the file '/temp/hive.paq' into the corresponding new place
-'hdfs://localhost:8020/hawq_default/16385/16387/77160/8' in hdfs, then
+This will move the file 'hdfs://localhost:8020/temp/hive.paq' into the corresponding
+new place 'hdfs://localhost:8020/hawq_default/16385/16387/77160/8' in HDFS, then
 update the meta data of the table 'parquet_table' in HAWQ which is in the
 table 'pg_aoseg.pg_paqseg_77160'.
 


Mime
View raw message