ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mithm...@apache.org
Subject ambari git commit: AMBARI-15704: Include an alert informing the number of segments marked down in gp_segment_configuration table (Goutam Tadi via mithmatt)
Date Tue, 05 Apr 2016 19:06:12 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk d7c5a1bbf -> d0da3f7c8


AMBARI-15704: Include an alert informing the number of segments marked down in gp_segment_configuration
table (Goutam Tadi via mithmatt)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d0da3f7c
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d0da3f7c
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d0da3f7c

Branch: refs/heads/trunk
Commit: d0da3f7c8f00b84240f3c2d0222c769db97efd13
Parents: d7c5a1b
Author: Matt <mmathew@pivotal.io>
Authored: Tue Apr 5 11:57:37 2016 -0700
Committer: Matt <mmathew@pivotal.io>
Committed: Tue Apr 5 11:57:37 2016 -0700

----------------------------------------------------------------------
 .../common-services/HAWQ/2.0.0/alerts.json      |  13 ++
 .../alerts/alert_segment_registration_status.py | 117 +++++++++++++
 .../test_alert_segment_registration_status.py   | 170 +++++++++++++++++++
 3 files changed, 300 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/d0da3f7c/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
index 8da5beb..620cb90 100644
--- a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/alerts.json
@@ -42,6 +42,19 @@
         }
       },
       {
+        "name": "hawqsegments_registration_status",
+        "label": "HAWQ Segment Registration Status",
+        "description": "This alert is triggered when a HAWQ Segment node fails to register
with the HAWQ Master.",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": "HAWQ/2.0.0/package/alerts/alert_segment_registration_status.py",
+          "parameters": []
+        }
+      },
+      {
         "name": "hawq_master_process",
         "label": "HAWQ Master Process",
         "description": "This alert is triggered if the HAWQ Master process cannot be confirmed
to be up and listening on the network.",

http://git-wip-us.apache.org/repos/asf/ambari/blob/d0da3f7c/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_segment_registration_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_segment_registration_status.py
b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_segment_registration_status.py
new file mode 100644
index 0000000..4d09763
--- /dev/null
+++ b/ambari-server/src/main/resources/common-services/HAWQ/2.0.0/package/alerts/alert_segment_registration_status.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import os.path
+import re
+from resource_management.core.shell import call
+
+HAWQ_USER = 'gpadmin'
+HAWQ_HOME='/usr/local/hawq'
+HAWQ_GREENPLUM_PATH_FILE = "{0}/greenplum_path.sh".format(HAWQ_HOME)
+HAWQ_SLAVES_FILE= "{0}/etc/slaves".format(HAWQ_HOME)
+HAWQMASTER_PORT = '{{hawq-site/hawq_master_address_port}}'
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_WARNING = 'WARNING'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+logger = logging.getLogger('ambari_alerts')
+
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used to build the
dictionary passed into execute
+  """
+  return ([HAWQMASTER_PORT])
+
+
+def execute(configurations={}, parameters={}, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  configurations (dictionary): a mapping of configuration key to value
+  parameters (dictionary): a mapping of script parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if configurations is None:
+    logger.error("[Alert HAWQ] Configurations file is either not accessible or not present.")
+    return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.'])
+  logger.debug("Configuration File found")
+  if not os.path.isfile(HAWQ_SLAVES_FILE):
+    logger.error("[Alert HAWQ] Slaves file is not present in {0}".format(HAWQ_SLAVES_FILE))
+    return (RESULT_STATE_SKIPPED, ['Slaves file is not present in /usr/local/hawq/etc'])
+
+  try:
+    db_segment_list = get_segment_list_db(configurations[HAWQMASTER_PORT])
+    ambari_segment_list = get_segment_list_ambari()
+    #Converted to set to omit any duplicates inserted into slaves file
+    segment_diff = (set(db_segment_list) ^ set(ambari_segment_list))
+    segment_diff_len = len(segment_diff)
+    #segment_diff_len cannot be negative since this diff is calculated two ways. (eg: "A
- B" & "B - A")
+    if not segment_diff_len :
+      return (RESULT_STATE_OK, ['All HAWQ Segments are registered.'])
+    msg =   '{0} HAWQ Segments are not registered with HAWQ Master.'.format(segment_diff_len)
if (segment_diff_len > 1) else '1 HAWQ Segment is not registered with HAWQ Master.'
+    logger.error(" [Alert HAWQ] Segments Unregistered: {0} are unregistered/down.".format(list(segment_diff)))
+    return (RESULT_STATE_WARNING, [msg + " Try restarting HAWQ service if a segment has been
added/removed. Check the log file in /var/log/ambari-agent/ambari-alerts.log for more details
on unregistered hosts."])
+  except Exception, ex:
+    logger.error('[Alert HAWQ]  Could not find HAWQ Segments registration status on {0}'.format(host_name))
+    logger.exception(str(ex))
+
+  # Registration status cannot be determined
+  return (RESULT_STATE_UNKNOWN, ['HAWQ Segments Registration Status cannot be determined.'])
+
+
+def get_segment_list_db(port):
+  """
+  Gets the Segment registrations count  from HAWQMASTER by running a SQL command.
+  """
+  logger.debug("Fetching segment list from HAWQ Master Database.")
+  query = " SELECT hostname FROM gp_segment_configuration where role = 'p' and status = 'u'
"
+  cmd = "source {0} && psql -p {1} -t -d template1 -c \"{2};\"".format(HAWQ_GREENPLUM_PATH_FILE,
port, query)
+ 
+  returncode, command_output = call(cmd,
+                            user=HAWQ_USER,
+                            timeout=60)
+
+  if returncode:
+    raise
+  segment_list = [segment.strip() for segment in command_output.split('\n')] if command_output
else []
+  return [hostname.strip() for hostname in segment_list]
+
+def get_segment_list_ambari():
+  """
+  Gets the Segment count from HAWQMASTER host from /usr/local/hawq/etc/slaves saved from
ambari configurations file.
+  """
+  segment_list = []
+  logger.debug("Fetching Slaves from Slaves file in {0}".format(HAWQ_SLAVES_FILE))
+  try:
+    #regex to read all not empty lines in a file.
+    with open(HAWQ_SLAVES_FILE, "r") as slaves_file:
+      slaves = slaves_file.read()
+    segment_list = re.findall('\S+' , slaves)
+    return segment_list
+  except Exception as ex:
+     logger.error("[Alert HAWQ] Get Segment list from Slaves : Could not read slaves from
{0}".format(HAWQ_SLAVES_FILE))
+     raise ex
+  

http://git-wip-us.apache.org/repos/asf/ambari/blob/d0da3f7c/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_segment_registration_status.py
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_segment_registration_status.py
b/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_segment_registration_status.py
new file mode 100644
index 0000000..6bb5930
--- /dev/null
+++ b/ambari-server/src/test/python/stacks/2.3/HAWQ/test_alert_segment_registration_status.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+
+'''
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+'''
+
+# System imports
+import os
+import sys
+from resource_management.core.shell import call
+from mock.mock import patch
+
+# Local imports
+from stacks.utils.RMFTestCase import *
+
+COMMON_SERVICES_ALERTS_DIR = "HAWQ/2.0.0/package/alerts"
+
+file_path = os.path.dirname(os.path.abspath(__file__))
+file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(file_path)))))
+file_path = os.path.join(file_path, "main", "resources", "common-services", COMMON_SERVICES_ALERTS_DIR)
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_WARNING = 'WARNING'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+class TestAlertRegistrationStatus(RMFTestCase):
+    
+  HOST_LIST_A = ['HOST1','HOST2','HOST3','HOST4']
+  HOST_LIST_B = ['HOST1','HOST3','HOST5','HOST4']
+  HOST_LIST_C = ['HOST1','HOST2','HOST3']
+
+  def setUp(self):
+    """
+    Import the class under test.
+    Because the class is present in a different folder, append its dir to the system path.
+    Also, shorten the import name and make it a global so the test functions can access it.
+    :return:
+    """
+    sys.path.append(file_path)
+    global alert_segment_registration_status
+    import alert_segment_registration_status
+
+  def test_missing_configs(self):
+    """
+    Check if the status is UNKNOWN when configs are missing.
+    """
+    configs = None
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_UNKNOWN)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], 'There were no configurations supplied to the script.')
+
+  @patch("os.path.isfile", return_value=False)
+  def test_missing_slave_file(self, os_path_file_mock):
+    """
+    Check if the status is SKIPPED when slaves file is missing.
+    """
+    configs={
+      "{{hawq-site/hawq_master_address_port}}": "5432"
+     }
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_SKIPPED)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], 'Slaves file is not present in /usr/local/hawq/etc')
+
+  @patch("alert_segment_registration_status.get_segment_list_db")
+  @patch("alert_segment_registration_status.get_segment_list_ambari")
+  @patch("os.path.isfile", return_value=True)
+  def test_successful_registration_status(self, os_path_isfile_mock, get_segment_list_ambari_mock,
get_segment_list_db_mock):
+    """
+    Check if the status is OK if no difference in registration segment number and slaves
count.
+    """
+    get_segment_list_ambari_mock.return_value=self.HOST_LIST_A
+    get_segment_list_db_mock.return_value=self.HOST_LIST_A
+    configs={
+      "{{hawq-site/hawq_master_address_port}}": "5432"
+     }
+
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_OK)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], 'All HAWQ Segments are registered.')
+
+  @patch("alert_segment_registration_status.get_segment_list_db")
+  @patch("alert_segment_registration_status.get_segment_list_ambari")
+  @patch("os.path.isfile", return_value=True)
+  def test_unsuccessful_registration_status_plural(self, os_path_isfile_mock, get_segment_list_ambari_mock,
get_segment_list_db_mock):
+    """
+    Check if the status is WARNING if a difference is present in registration segment number
and slaves count.
+    """
+    get_segment_list_ambari_mock.return_value=self.HOST_LIST_A
+    get_segment_list_db_mock.return_value=self.HOST_LIST_B
+    configs={
+      "{{hawq-site/hawq_master_address_port}}": "5432"
+     }
+
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_WARNING)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], '2 HAWQ Segments are not registered with HAWQ Master. Try
restarting HAWQ service if a segment has been added/removed. Check the log file in /var/log/ambari-agent/ambari-alerts.log
for more details on unregistered hosts.')
+
+  @patch("alert_segment_registration_status.get_segment_list_db")
+  @patch("alert_segment_registration_status.get_segment_list_ambari")
+  @patch("os.path.isfile", return_value=True)
+  def test_unsuccessful_registration_status(self, os_path_isfile_mock, get_segment_list_ambari_mock,
get_segment_list_db_mock):
+    """
+    Check if the status is WARNING if a difference is present in registration segment number
and slaves count.
+    """
+    get_segment_list_ambari_mock.return_value=self.HOST_LIST_A
+    get_segment_list_db_mock.return_value=self.HOST_LIST_C
+    configs={
+      "{{hawq-site/hawq_master_address_port}}": "5432"
+     }
+
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_WARNING)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], '1 HAWQ Segment is not registered with HAWQ Master. Try
restarting HAWQ service if a segment has been added/removed. Check the log file in /var/log/ambari-agent/ambari-alerts.log
for more details on unregistered hosts.')
+
+  @patch("alert_segment_registration_status.get_segment_list_db")
+  @patch("alert_segment_registration_status.get_segment_list_ambari")
+  @patch("os.path.isfile", return_value=True)
+  def test_exception_registration_status(self, os_path_isfile_mock, get_segment_list_ambari_mock,
get_segment_list_db_mock):
+    """
+    Check if the status is UNKNOWN if an exception is thrown when finding registration segment
number and slaves count.
+    """
+    get_segment_list_ambari_mock.return_value=self.HOST_LIST_A
+    get_segment_list_db_mock.side_effect=Exception("Exception raised to fail")
+    configs={
+      "{{hawq-site/hawq_master_address_port}}": "5432"
+     }
+
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_UNKNOWN)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], 'HAWQ Segments Registration Status cannot be determined.')
+
+  @patch("alert_segment_registration_status.get_segment_list_db")
+  @patch("alert_segment_registration_status.get_segment_list_ambari")
+  @patch("os.path.isfile", return_value=True)
+  def test_unsuccessful_empty_db_registration_status(self, os_path_isfile_mock, get_segment_list_ambari_mock,
get_segment_list_db_mock):
+    """
+    Check if the status is WARNING if a difference is present in registration segment number
and slaves count.
+    """
+    get_segment_list_ambari_mock.return_value=[]
+    get_segment_list_db_mock.return_value=self.HOST_LIST_C
+    configs={
+      "{{hawq-site/hawq_master_address_port}}": "5432"
+     }
+
+    [status, messages] = alert_segment_registration_status.execute(configurations=configs)
+    self.assertEqual(status, RESULT_STATE_WARNING)
+    self.assertTrue(messages is not None and len(messages) == 1)
+    self.assertEqual(messages[0], '3 HAWQ Segments are not registered with HAWQ Master. Try
restarting HAWQ service if a segment has been added/removed. Check the log file in /var/log/ambari-agent/ambari-alerts.log
for more details on unregistered hosts.')
+


Mime
View raw message