hawq-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r...@apache.org
Subject incubator-hawq git commit: HAWQ-626. HAWQ stop segments check if node alive first
Date Mon, 11 Apr 2016 08:43:36 GMT
Repository: incubator-hawq
Updated Branches:
  refs/heads/master a6680fbe1 -> cad586c95


HAWQ-626. HAWQ stop segments check if node alive first


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/cad586c9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/cad586c9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/cad586c9

Branch: refs/heads/master
Commit: cad586c95534557b1067d4bcec587c9fa973f367
Parents: a6680fb
Author: rlei <rlei@pivotal.io>
Authored: Mon Apr 11 13:21:05 2016 +0800
Committer: rlei <rlei@pivotal.io>
Committed: Mon Apr 11 16:24:02 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawq_ctl | 47 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/cad586c9/tools/bin/hawq_ctl
----------------------------------------------------------------------
diff --git a/tools/bin/hawq_ctl b/tools/bin/hawq_ctl
index f79e3a9..32752b6 100755
--- a/tools/bin/hawq_ctl
+++ b/tools/bin/hawq_ctl
@@ -434,6 +434,7 @@ class HawqInit:
             scpcmd = "scp %s/etc/_mgmt_config %s:%s/etc/_mgmt_config > /dev/null" % (self.GPHOME,
host, self.GPHOME)
             local_ssh(scpcmd)
             work_list.append({"func":remote_ssh,"args":(segment_cmd_str, host, self.user,
q)})
+        logger.info("Total segment number is: %s" % len(self.host_list))
         work_list.append({"func":check_progress,"args":(q, self.hosts_count_number, 'init',
0, self.quiet)})
         node_init = HawqCommands(name='HAWQ', action_name = 'init', logger = logger)
         node_init.get_function_list(work_list)
@@ -708,11 +709,12 @@ class HawqStart:
         work_list = []
         for host in working_hosts:
             work_list.append({"func":remote_ssh,"args":(segment_cmd_str, host, self.user,
q)})
+        logger.info("Total segment number is: %s" % len(self.host_list))
         work_list.append({"func":check_progress,"args":(q, self.hosts_count_number, 'start',
len(bad_hosts), self.quiet)})
         node_init = HawqCommands(name = 'HAWQ', action_name = 'start', logger = logger)
         node_init.get_function_list(work_list)
         node_init.start()
-        logger.info("Total threads return value is : %d" % node_init.return_flag)
+        logger.debug("Total threads return value is : %d" % node_init.return_flag)
         if node_init.return_flag != 0:
             logger.error("Segments start failed")
         else:
@@ -756,6 +758,7 @@ class HawqStop:
         self.dburl = None
         self.conn = None
         self._get_config()
+        self.ignore_bad_hosts = opts.ignore_bad_hosts
 
     def _get_config(self):
         check_items = ('hawq_master_address_host', 'hawq_master_address_port',
@@ -916,13 +919,13 @@ class HawqStop:
             logger.info("Cluster stopped successfully")
         return cluster_result
 
-    def _running_segments_list(self):
+    def _running_segments_list(self, host_list):
         work_list = []
         running_host = []
         stopped_host = []
         seg_check_q = Queue.Queue()
 
-        for host in self.host_list:
+        for host in host_list:
             work_list.append({"func":check_hawq_running,"args":(host, self.segment_data_directory,
self.segment_port, self.user, logger)})
 
         node_checks = threads_with_return(name = 'HAWQ', action_name = 'check', logger =
logger, return_values = seg_check_q)
@@ -939,25 +942,47 @@ class HawqStop:
 
 
     def _stopAllSegments(self):
-        running_host, stopped_host = self._running_segments_list()
+        bad_hosts = []
+        working_hosts = self.host_list
         segment_cmd_str = self._stop_segment_cmd()
-        # Execute segment stop command on each nodes.
         logger.info("Stop segments in list: %s" % self.host_list)
+
+        working_hosts, bad_hosts = exclude_bad_hosts(self.host_list)
+        if len(bad_hosts) == len(self.host_list):
+            logger.error("Unable to SSH on any of the hosts, skipping segment stop operation")
+            return 1
+
+        process_running_host, stopped_host = self._running_segments_list(working_hosts)
+
+        # Execute segment stop command on specified nodes.
+        if self.ignore_bad_hosts:
+            if len(bad_hosts) > 0:
+                logger.warning("Skipping stop segments in the list {0}, SSH test failed".format(bad_hosts))
+            skip_host_list = bad_hosts + stopped_host
+        else:
+            skip_host_list = stopped_host
+
         work_list = []
-        self.running_segment_num = len(running_host)
         q = Queue.Queue()
-        for host in running_host:
+        for host in process_running_host:
             work_list.append({"func":remote_ssh,"args":(segment_cmd_str, host, self.user,
q)})
-
-        work_list.append({"func":check_progress,"args":(q, self.running_segment_num, 'stop',
len(stopped_host), self.quiet)})
+        logger.info("Total segment number is: %s" % len(self.host_list))
+        work_list.append({"func":check_progress,"args":(q, len(process_running_host), 'stop',
len(skip_host_list), self.quiet)})
         node_init = HawqCommands(name = 'HAWQ', action_name = 'stop', logger = logger)
         node_init.get_function_list(work_list)
         node_init.start()
-        if node_init.return_flag != 0:
+        if self.ignore_bad_hosts:
+            total_return_flag = node_init.return_flag
+        else:
+            if len(bad_hosts) > 0:
+                logger.error("%s segment stop failed, SSH test failed on %s" % (len(bad_hosts),
bad_hosts))
+            total_return_flag = node_init.return_flag + len(bad_hosts)
+
+        if total_return_flag != 0:
             logger.error("Segments stop failed")
         else:
             logger.info("Segments stopped successfully")
-        return node_init.return_flag
+        return total_return_flag
 
     def run(self):
         if self.node_type == "master":


Mime
View raw message