hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bhuvnesh2...@apache.org
Subject incubator-hawq git commit: HAWQ-617 Applied feedback
Date Mon, 04 Apr 2016 05:07:49 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/HAWQ-617 7e8331a7e -> 53d264e98


HAWQ-617 Applied feedback


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/53d264e9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/53d264e9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/53d264e9

Branch: refs/heads/HAWQ-617
Commit: 53d264e9869e408efc67ab1b9710b594d18bc1b0
Parents: 7e8331a
Author: Bhuvnesh Chaudhary <bchaudhary@pivotal.io>
Authored: Sun Apr 3 22:09:26 2016 -0700
Committer: Bhuvnesh Chaudhary <bchaudhary@pivotal.io>
Committed: Sun Apr 3 22:09:26 2016 -0700

----------------------------------------------------------------------
 .gitignore                     |  1 -
 tools/bin/hawq_ctl             | 23 ++++++++++++++++------
 tools/bin/hawqpylib/hawqlib.py | 39 +++++++++++++++++++++++++++++++++++++
 tools/doc/gpscp_help           |  7 +++++++
 4 files changed, 63 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/53d264e9/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 485dee6..0000000
--- a/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-.idea

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/53d264e9/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index c43dfd4..a4e0c3c 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -493,6 +493,7 @@ class HawqStart:
         self.masteronly = opts.masteronly 
         self.special_mode = opts.special_mode
         self.restrict =  opts.restrict
+        self.ignore_bad_hosts = opts.ignore_bad_hosts
 
         self._get_config()
 
@@ -682,13 +683,24 @@ class HawqStart:
 
     def _start_all_segments(self):
         logger.info("Start all the segments in hawq cluster")
-        segment_cmd_str = self._start_segment_cmd()
         logger.info("Start segments in list: %s" % self.host_list)
-        work_list = []
+        bad_hosts = []
+        working_hosts = self.host_list
+        if self.ignore_bad_hosts:
+            working_hosts, bad_hosts = exclude_bad_hosts(self.host_list)
+            if len(bad_hosts) == len(self.host_list):
+                logger.error("Unable to SSH on any of the hosts, skipping segment start operation")
+                return
+            if len(bad_hosts) > 0:
+                logger.warning("Skipping starting segments in the list {0}, SSH test failed".format(bad_hosts))
+                self.hosts_count_number -= len(bad_hosts)
+
+        segment_cmd_str = self._start_segment_cmd()
         q = Queue.Queue()
-        for host in self.host_list:
+        work_list = []
+        for host in working_hosts:
             work_list.append({"func":remote_ssh,"args":(segment_cmd_str, host, self.user,
q)})
-        work_list.append({"func":check_progress,"args":(q, self.hosts_count_number, 'start',
0, self.quiet)})
+        work_list.append({"func":check_progress,"args":(q, self.hosts_count_number, 'start',
len(bad_hosts), self.quiet)})
         node_init = HawqCommands(name = 'HAWQ', action_name = 'start', logger = logger)
         node_init.get_function_list(work_list)
         node_init.start()
@@ -699,7 +711,6 @@ class HawqStart:
             logger.info("Segments started successfully")
         return node_init.return_flag
 
-
     def run(self):
         if self.node_type == "master":
             check_return_code(self.start_master(), logger, \
@@ -1205,7 +1216,7 @@ def hawq_activate_standby(opts, hawq_dict):
     logger.info("Start hawq cluster")
     cmd = "%s; hawq start master" % source_hawq_env
     check_return_code(remote_ssh(cmd, new_master_host_name, ''), logger, "Start master failed")
-    cmd = "%s; hawq start allsegments" % source_hawq_env
+    cmd = "%s; hawq start allsegments %s" % (source_hawq_env, ignore_bad_hosts)
     check_return_code(remote_ssh(cmd, new_master_host_name, ''), logger, "Start all the segments
failed")
     cmd = '''sed -i "/gp_persistent_repair_global_sequence/d" %s/%s''' % (hawq_dict['hawq_master_directory'],
'postgresql.conf')
     check_return_code(remote_ssh(cmd, new_master_host_name, ''))

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/53d264e9/tools/bin/hawqpylib/hawqlib.py
----------------------------------------------------------------------
diff --git a/tools/bin/hawqpylib/hawqlib.py b/tools/bin/hawqpylib/hawqlib.py
index 85354b4..79bcdae 100755
--- a/tools/bin/hawqpylib/hawqlib.py
+++ b/tools/bin/hawqpylib/hawqlib.py
@@ -24,6 +24,8 @@ from xml.dom import minidom
 from xml.etree.ElementTree import ElementTree
 import shutil
 from gppylib.db import dbconn
+from gppylib.commands.base import WorkerPool, REMOTE
+from gppylib.commands.unix import Echo
 import re
 
 
@@ -484,3 +486,40 @@ def get_hawq_hostname_all(master_port):
 
     hawq_host_array = {'master': {master_host: master_status}, 'standby': {standby_host:
standby_status}, 'segment': seg_host_list} 
     return hawq_host_array
+
+def get_host_status(hostlist):
+    """
+    Test if SSH command works on a host and return a dictionary
+    Return Ex: {host1: True, host2: False}
+    where True represents SSH command success and False represents failure
+    """
+    if not isinstance(hostlist, list):
+        raise Exception("Input parameter should be of type list")
+
+    pool = WorkerPool()
+
+    for host in hostlist:
+        cmd = Echo('ssh test', '', ctxt=REMOTE, remoteHost=host)
+        pool.addCommand(cmd)
+
+    pool.join()
+    pool.haltWork()
+
+    host_status_dict = {}
+    for cmd in pool.getCompletedItems():
+        if not cmd.get_results().wasSuccessful():
+            host_status_dict[cmd.remoteHost] = False
+        else:
+            host_status_dict[cmd.remoteHost] = True
+
+    return host_status_dict
+
+
+def exclude_bad_hosts(host_list):
+    """
+    Split Hosts on which SSH works vs node on which it fails
+    """
+    host_status_dict = get_host_status(host_list)
+    working_hosts = [host for host in host_status_dict.keys() if host_status_dict[host]]
+    bad_hosts = list(set(host_list) - set(working_hosts))
+    return working_hosts, bad_hosts

http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/53d264e9/tools/doc/gpscp_help
----------------------------------------------------------------------
diff --git a/tools/doc/gpscp_help b/tools/doc/gpscp_help
index 4212288..aa5bd25 100755
--- a/tools/doc/gpscp_help
+++ b/tools/doc/gpscp_help
@@ -84,6 +84,13 @@ character is an equal sign (=).
 Optional. Reports additional messages in addition to the 
 SCP command output.
 
+--ignore-bad-hosts
+Ignore copying files to the hosts on which test SSH attempt failed
+and continue with the remaining. If test SSH failed, it indicates 
+that either the host is not working or there are issues while attempting
+to SSH on these host. Once the skipped hosts are brought back, ensure 
+that the required files are synced to them.
+
 <file_to_copy>
 
 Required. The file name (or absolute path) of a file that 


Mime
View raw message