Return-Path: X-Original-To: apmail-ambari-commits-archive@www.apache.org Delivered-To: apmail-ambari-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DD27A106F7 for ; Mon, 24 Nov 2014 23:45:21 +0000 (UTC) Received: (qmail 4232 invoked by uid 500); 24 Nov 2014 23:45:21 -0000 Delivered-To: apmail-ambari-commits-archive@ambari.apache.org Received: (qmail 4110 invoked by uid 500); 24 Nov 2014 23:45:21 -0000 Mailing-List: contact commits-help@ambari.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: ambari-dev@ambari.apache.org Delivered-To: mailing list commits@ambari.apache.org Received: (qmail 3873 invoked by uid 99); 24 Nov 2014 23:45:21 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Nov 2014 23:45:21 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 3EAF7A17FED; Mon, 24 Nov 2014 23:45:21 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jaoki@apache.org To: commits@ambari.apache.org Date: Mon, 24 Nov 2014 23:45:28 -0000 Message-Id: <45ebd2e489c1441688d158f03a118078@git.apache.org> In-Reply-To: <0f164819e0034812947ab1a11f3140fc@git.apache.org> References: <0f164819e0034812947ab1a11f3140fc@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [09/24] ambari git commit: AMBARI-7872 Create stack definitions for PHD-3.0.0.0 (vasanm, adenisso, tyu, Boxiong Ding, rpidva, rmeneses, Sourabh Bansod, Ashvin Agrawal, Sujeet Varakhedi via jaoki) http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_nagios_init.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_nagios_init.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_nagios_init.php new file mode 100644 index 0000000..487eb43 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/hdp_nagios_init.php @@ -0,0 +1,81 @@ + /dev/null 2>/dev/null ; [[ $? != 0 ]] && echo 1"; + $check_output = shell_exec($check_cmd); + + if ($check_output) + return false; + else + return true; + } + + /* + * Runs kinit command. + */ + function kinit($kinit_path_local, $keytab_path, $principal_name) { + $init_cmd = "$kinit_path_local -kt $keytab_path $principal_name 2>&1"; + $kinit_output = shell_exec($init_cmd); + if ($kinit_output) + $status = array(1, $kinit_output); + else + $status = array(0, ''); + + return $status; + } + + function logout() { + if (shell_exec("rm -f /tmp/krb5cc_".trim(shell_exec('id -u'))) == "" ) + $status = true; + else + $status = false; + + return $status; + } + + ?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/mm_wrapper.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/mm_wrapper.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/mm_wrapper.py new file mode 100644 index 0000000..7a622b6 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/mm_wrapper.py @@ -0,0 +1,326 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import sys +import subprocess +import os + +N_SGN = 'NAGIOS_SERVICEGROUPNAME' +N_SD = 'NAGIOS_SERVICEDESC' +N_HOST = 'NAGIOS_HOSTNAME' + +LIST_SEPARATOR = "--" +HOSTNAME_PLACEHOLDER = "^^" +IGNORE_DAT_FILE = "/var/nagios/ignore.dat" + +# Mode constants +OR = 0 +AND = 1 +ENV_ONLY = 2 +FILTER_MM = 3 +LEGACY_CHECK_WRAPPER = 4 +MODES = ['or', 'and', 'env_only', 'filter_mm', 'legacy_check_wrapper'] + + +def ignored_host_list(service, component): + """ + :param service: current service + :param component: current component + :return: all hosts where specified host component is in ignored state + """ + try: + with open(IGNORE_DAT_FILE) as f: + lines = f.readlines() + except IOError: + return [] + result = [] + if lines: + for l in lines: + tokens = l.split(' ') + if len(tokens) == 3 and tokens[1] == service and tokens[2].strip() == component: + result.append(tokens[0]) + return result + + +def get_real_service(): + try: + service = os.environ[N_SGN] # e.g. 'HBASE' + except KeyError: + service = '' + return service + + +def get_real_component(): + try: + arr_desc = os.environ[N_SD] # e.g. 'HBASE::Percent RegionServers live' + SEPARATOR = "::" + comp_name = arr_desc.replace(SEPARATOR, ' ').split(' ')[0] + except KeyError: + comp_name = '' + mapping = { + 'HBASEMASTER': 'HBASE_MASTER', + 'REGIONSERVER': 'HBASE_REGIONSERVER', + 'JOBHISTORY': 'MAPREDUCE2', + 'HIVE-METASTORE': 'HIVE_METASTORE', + 'HIVE-SERVER': 'HIVE_SERVER', + 'FLUME': 'FLUME_HANDLER', + 'HUE': 'HUE_SERVER', + 'WEBHCAT': 'WEBHCAT_SERVER', + } + if comp_name in mapping: + comp_name = mapping.get(comp_name) + return comp_name + + +def check_output(*popenargs, **kwargs): + """ + Imitate subprocess.check_output() for python 2.6 + """ + process = subprocess.Popen(stdout=subprocess.PIPE, stderr=subprocess.PIPE, + *popenargs, **kwargs) + output, unused_err = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + err = subprocess.CalledProcessError(retcode, cmd) + # Monkey-patching for python 2.6 + err.output = output + raise err + return output + + +def print_usage(): + """ + Prints usage and exits with a non-zero exit code + """ + print "Usage: mm_wrapper.py MODE HOST1 HOST2 .. HOSTN %s command arg1 arg2 .. argN" % LIST_SEPARATOR + print "MODE is one of the following: or, and, env_only, filter_mm, legacy_check_wrapper" + print "%s is a separator between list of hostnames and command with args" % LIST_SEPARATOR + print "%s is used as a hostname placeholder at command args" % HOSTNAME_PLACEHOLDER + print "Also script provides $MM_HOSTS shell variable to commands" + print "NOTE: Script makes use of Nagios-populated env vars %s and %s" % (N_SGN, N_SD) + print "For more info, please see docstrings at %s" % os.path.realpath(__file__) + sys.exit(1) + + +def parse_args(args): + if not args or not LIST_SEPARATOR in args or args[0] not in MODES: + print_usage() + else: + mode = MODES.index(args[0]) # identify operation mode + args = args[1:] # Shift args left + hostnames = [] + command_line = [] + # Parse command line args + passed_separator = False # True if met LIST_SEPARATOR + for arg in args: + if not passed_separator: + if arg != LIST_SEPARATOR: + hostnames.append(arg) + else: + passed_separator = True + else: + if arg != LIST_SEPARATOR: + command_line.append(arg) + else: # Something definitely goes wrong + print "Could not parse arguments: " \ + "There is more than one %s argument." % LIST_SEPARATOR + print_usage() + + if not command_line: + print "No command provided." + print_usage() + return mode, hostnames, command_line + + +def do_work(mode, hostnames, command_line): + # Execute commands + ignored_hosts = ignored_host_list(get_real_service(), get_real_component()) + empty_check_result = { + 'message': 'No checks have been run (no hostnames provided)', + 'retcode': -1, + 'real_retcode': None + } + custom_env = os.environ.copy() + if ignored_hosts: + custom_env['MM_HOSTS'] = \ + reduce(lambda a, b: "%s %s" % (a, b), ignored_hosts) + if mode == OR: + check_result = work_in_or_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result) + elif mode == AND: + check_result = work_in_and_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result) + elif mode == ENV_ONLY: + check_result = work_in_env_only_mode(hostnames, command_line, custom_env) + elif mode == FILTER_MM: + check_result = work_in_filter_mm_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result) + else: # mode == LEGACY_CHECK_WRAPPER: + check_result = work_in_legacy_check_wrapper_mode(ignored_hosts, command_line, custom_env) + # Build the final output + final_output = [] + output = check_result.get('message') + if output is not None: + for string in output.splitlines(): + final_output.append(string.strip()) + real_retcode = check_result.get('real_retcode') + if real_retcode: + # This string is used at check_aggregate.php when aggregating alerts + final_output.append("AMBARIPASSIVE=%s" % real_retcode) + return final_output, check_result.get('retcode') + + +def work_in_or_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result): + check_result = empty_check_result + for hostname in hostnames: + concrete_command_line = map( # Substitute hostname where needed + lambda x: hostname if x == HOSTNAME_PLACEHOLDER else x, + command_line) + try: + returncode = 0 + real_retcode = None + message = check_output(concrete_command_line, env=custom_env) + except subprocess.CalledProcessError, e: + if hostname not in ignored_hosts: + returncode = e.returncode + else: # Host is in MM + real_retcode = e.returncode + message = e.output + really_positive_result = hostname not in ignored_hosts and returncode == 0 + if check_result.get('retcode') <= returncode or really_positive_result: + check_result = { + 'message': message, + 'retcode': returncode, + 'real_retcode': real_retcode # Real (not suppressed) program retcode + } + if really_positive_result: + break # Exit on first real success + return check_result + + +def work_in_and_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result): + check_result = empty_check_result + for hostname in hostnames: + concrete_command_line = map( # Substitute hostname where needed + lambda x: hostname if x == HOSTNAME_PLACEHOLDER else x, + command_line) + try: + returncode = 0 + real_retcode = None + message = check_output(concrete_command_line, env=custom_env) + except subprocess.CalledProcessError, e: + if hostname not in ignored_hosts: + returncode = e.returncode + else: + real_retcode = e.returncode + message = e.output + if check_result.get('retcode') <= returncode: + check_result = { + 'message': message, + 'retcode': returncode, + 'real_retcode': real_retcode # Real (not suppressed) program retcode + } + return check_result + + +def work_in_env_only_mode(hostnames, command_line, custom_env): + concrete_command_line = [] + for item in command_line: + if item == HOSTNAME_PLACEHOLDER: + concrete_command_line.extend(hostnames) + else: + concrete_command_line.append(item) + try: + returncode = 0 + message = check_output(concrete_command_line, env=custom_env) + except subprocess.CalledProcessError, e: + returncode = e.returncode + message = e.output + check_result = { + 'message': message, + 'retcode': returncode, + 'real_retcode': None # Real (not suppressed) program retcode + } + return check_result + + +def work_in_filter_mm_mode(hostnames, ignored_hosts, command_line, custom_env, empty_check_result): + not_mm_hosts = [hostname for hostname in hostnames if hostname not in ignored_hosts] + if not not_mm_hosts: # All hosts have been filtered + return empty_check_result + else: + return work_in_env_only_mode(not_mm_hosts, command_line, custom_env) + + +def work_in_legacy_check_wrapper_mode(ignored_hosts, command_line, custom_env): + host = os.environ[N_HOST] + result = work_in_env_only_mode([host], command_line, custom_env) + real_retcode = result['retcode'] + if host in ignored_hosts and real_retcode != 0: # Ignore fail + result['retcode'] = 0 + result['real_retcode'] = real_retcode + return result + + +def main(): + """ + This script allows to run nagios service check commands for host components + located at different hosts. + Also script passes to every command a $MM_HOSTS shell variable with a list of + hosts that are in MM + + or mode: return 0 exit code if at least one service check succeeds. + Command exits on a first success. + Failures for host components that are in MM are suppressed (return code + is set to 0). + If command fails for all provided hostnames, script returns alert with the + greatest exit code value. + + and mode: + Perform checks of all host components (effectively ignoring negative results + for MM components). If service check is successful for all hosts, script + also returns zero exit code. Otherwise alert with the greatest exit code is + returned. + + env_only mode: + Pass list of all hosts to command and run it once. The only role of + mm_wrapper script in this mode is to provide properly initialized + $MM_HOSTS env variable to command being run. All duties of ignoring failures + of MM host components are delegated to a command being run. + + filter_mm + Similar to env_only mode. The only difference is that hostnames for + host components that are in MM are filtered (not passed to command at all) + + legacy_check_wrapper + Designed as a drop-in replacement for check_wrapper.sh . It reads $NAGIOS_HOSTNAME + env var and ignores check results if host component on this host is in MM. + When host subtitution symbol is encountered, hostname defined by $NAGIOS_HOSTNAME + is substituted, + """ + args = sys.argv[1:] # Shift args left + mode, hostnames, command_line = parse_args(args) + output, ret_code = do_work(mode, hostnames, command_line) + for line in output: + print line + sys.exit(ret_code) + + +if __name__ == "__main__": + main() http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/nagios_alerts.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/nagios_alerts.php b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/nagios_alerts.php new file mode 100644 index 0000000..0e1e501 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/nagios_alerts.php @@ -0,0 +1,513 @@ + HDP_MON_RESPONSE_OPTION_VALUE__PROPERTIES_UNCACHEABLE, + HDP_MON_RESPONSE_OPTION_KEY__TYPE => + isset( $jsonpFunctionName ) && $jsonpFunctionName != "" ? + HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JAVASCRIPT : + HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JSON ) ); + + if( isset( $jsonpFunctionName ) ) + { + echo "$jsonpFunctionName( $response_data );"; + } + else + { + echo $response_data; + } +} + + /* alert_type { ok, non-ok, warning, critical, all } */ + define ("all", "-2"); + define ("nok", "-1"); + define ("ok", "0"); + define ("warn", "1"); + define ("critical", "2"); + + define ("HDFS_SERVICE_CHECK", "NAMENODE::NameNode process down"); + define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::JobTracker process down"); + define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster process down"); + define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent ZooKeeper Servers down"); + define ("HIVE_SERVICE_CHECK", "HIVE-METASTORE::Hive Metastore status check"); + define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie Server status check"); + define ("WEBHCAT_SERVICE_CHECK", "WEBHCAT::WebHCat Server status check"); + define ("PUPPET_SERVICE_CHECK", "PUPPET::Puppet agent down"); + + // on SUSE, some versions of Nagios stored data in /var/lib + $status_file = "/var/nagios/status.dat"; + if (!file_exists($status_file) && file_exists("/etc/SuSE-release")) { + $status_file = "/var/lib/nagios/status.dat"; + } + + $q1=""; + if (array_key_exists('q1', $_GET)) { + $q1=$_GET["q1"]; + } + $q2=""; + if (array_key_exists('q2', $_GET)) { + $q2=$_GET["q2"]; + } + $alert_type=""; + if (array_key_exists('alert_type', $_GET)) { + $alert_type=$_GET["alert_type"]; + } + $host=""; + if (array_key_exists('host_name', $_GET)) { + $host=$_GET["host_name"]; + } + $indent=""; + if (array_key_exists('indent', $_GET)) { + $indent=$_GET["indent"]; + } + + $result = array(); + $status_file_content = file_get_contents($status_file); + + if ($q1 == "alerts") { + /* Add the service status object to result array */ + $result['alerts'] = query_alerts ($status_file_content, $alert_type, $host); + } + + if ($q2 == "hosts") { + /* Add the service status object to result array */ + $result['hosts'] = query_hosts ($status_file_content, $alert_type, $host); + } + + /* Add host count object to the results */ + $result['hostcounts'] = query_host_count ($status_file_content); + + /* Add services runtime states */ + $result['servicestates'] = query_service_states ($status_file_content); + + /* Return results */ + if ($indent == "true") { + hdp_mon_generate_response(indent(json_encode($result))); + } else { + hdp_mon_generate_response(json_encode($result)); + } + + # Functions + /* Query service states */ + function query_service_states ($status_file_content) { + $num_matches = preg_match_all("/servicestatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $services_object = array (); + $services_object["PUPPET"] = 0; + foreach ($matches[0] as $object) { + + if (getParameter($object, "service_description") == HDFS_SERVICE_CHECK) { + $services_object["HDFS"] = getParameter($object, "last_hard_state"); + if ($services_object["HDFS"] >= 1) { + $services_object["HDFS"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == MAPREDUCE_SERVICE_CHECK) { + $services_object["MAPREDUCE"] = getParameter($object, "last_hard_state"); + if ($services_object["MAPREDUCE"] >= 1) { + $services_object["MAPREDUCE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == HBASE_SERVICE_CHECK) { + $services_object["HBASE"] = getParameter($object, "last_hard_state"); + if ($services_object["HBASE"] >= 1) { + $services_object["HBASE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == HIVE_SERVICE_CHECK) { + $services_object["HIVE"] = getParameter($object, "last_hard_state"); + if ($services_object["HIVE"] >= 1) { + $services_object["HIVE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == OOZIE_SERVICE_CHECK) { + $services_object["OOZIE"] = getParameter($object, "last_hard_state"); + if ($services_object["OOZIE"] >= 1) { + $services_object["OOZIE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == WEBHCAT_SERVICE_CHECK) { + $services_object["WEBHCAT"] = getParameter($object, "last_hard_state"); + if ($services_object["WEBHCAT"] >= 1) { + $services_object["WEBHCAT"] = 1; + } + continue; + } + /* In case of zookeeper, service is treated running if alert is ok or warning (i.e partial + * instances of zookeepers are running + */ + if (getParameter($object, "service_description") == ZOOKEEPER_SERVICE_CHECK) { + $services_object["ZOOKEEPER"] = getParameter($object, "last_hard_state"); + if ($services_object["ZOOKEEPER"] <= 1) { + $services_object["ZOOKEEPER"] = 0; + } + continue; + } + if (getParameter($object, "service_description") == PUPPET_SERVICE_CHECK) { + $state = getParameter($object, "last_hard_state"); + if ($state >= 1) { + $services_object["PUPPET"]++; + } + continue; + } + } + if ($services_object["PUPPET"] >= 1) { + $services_object["PUPPET"] = 1; + } + $services_object = array_map('strval', $services_object); + return $services_object; + } + + /* Query host count */ + function query_host_count ($status_file_content) { + $num_matches = preg_match_all("/hoststatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $hostcounts_object = array (); + $up_hosts = 0; + $down_hosts = 0; + + foreach ($matches[0] as $object) { + if (getParameter($object, "last_hard_state") != ok) { + $down_hosts++; + } else { + $up_hosts++; + } + } + $hostcounts_object['up_hosts'] = $up_hosts; + $hostcounts_object['down_hosts'] = $down_hosts; + $hostcounts_object = array_map('strval', $hostcounts_object); + return $hostcounts_object; + } + + /* Query Hosts */ + function query_hosts ($status_file_content, $alert_type, $host) { + $hoststatus_attributes = array ("host_name", "current_state", "last_hard_state", + "plugin_output", "last_check", "current_attempt", + "last_hard_state_change", "last_time_up", "last_time_down", + "last_time_unreachable", "is_flapping", "last_check"); + + $num_matches = preg_match_all("/hoststatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $hosts_objects = array (); + $i = 0; + foreach ($matches[0] as $object) { + $hoststatus = array (); + $chost = getParameter($object, "host_name"); + if (empty($host) || $chost == $host) { + foreach ($hoststatus_attributes as $attrib) { + $hoststatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + } + $hoststatus['alerts'] = query_alerts ($status_file_content, $alert_type, $chost); + if (!empty($host)) { + $hosts_objects[$i] = $hoststatus; + $i++; + break; + } + } + if (!empty($hoststatus)) { + $hosts_objects[$i] = $hoststatus; + $i++; + } + } + /* echo "COUNT : " . count ($services_objects) . "\n"; */ + return $hosts_objects; + } + + /* Query Alerts */ + function query_alerts ($status_file_content, $alert_type, $host) { + + $servicestatus_attributes = array ("service_description", "host_name", "current_attempt", + "current_state", "plugin_output", "last_hard_state_change", "last_hard_state", + "last_time_ok", "last_time_warning", "last_time_unknown", + "last_time_critical", "is_flapping", "last_check", + "long_plugin_output"); + + $num_matches = preg_match_all("/servicestatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + #echo $matches[0][0] . ", " . $matches[0][1] . "\n"; + #echo $matches[1][0] . ", " . $matches[1][1] . "\n"; + $services_objects = array (); + $i = 0; + foreach ($matches[1] as $object) { + $servicestatus = getParameterMap($object, $servicestatus_attributes); + switch ($alert_type) { + case "all": + if (empty($host) || $servicestatus['host_name'] == $host) { + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "nok": + if (getParameterMapValue($map, "last_hard_state") != ok && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "ok": + if (getParameterMapValue($map, "last_hard_state") == ok && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "warn": + if (getParameterMapValue($map, "last_hard_state") == warn && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "critical": + if (getParameterMapValue($map, "last_hard_state") == critical && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + } + + if (!empty($servicestatus)) { + $services_objects[$i] = $servicestatus; + $i++; + } + } + + // echo "COUNT : " . count ($services_objects) . "\n"; + return $services_objects; + } + + function get_service_type($service_description) + { + $pieces = explode("::", $service_description); + switch ($pieces[0]) { + case "DATANODE": + case "NAMENODE": + case "JOURNALNODE": + $pieces[0] = "HDFS"; + break; + case "JOBTRACKER": + case "TASKTRACKER": + $pieces[0] = "MAPREDUCE"; + break; + case "HBASEMASTER": + case "REGIONSERVER": + $pieces[0] = "HBASE"; + break; + case "HIVE-METASTORE": + case "HIVE-SERVER": + case "WEBHCAT": + $pieces[0] = "HIVE"; + break; + case "ZKSERVERS": + $pieces[0] = "ZOOKEEPER"; + break; + case "AMBARI": + $pieces[0] = "AMBARI"; + break; + case "FLUME": + $pieces[0] = "FLUME"; + break; + case "JOBHISTORY": + $pieces[0] = "MAPREDUCE2"; + break; + case "RESOURCEMANAGER": + case "APP_TIMELINE_SERVER": + case "NODEMANAGER": + $pieces[0] = "YARN"; + break; + case "STORM_UI_SERVER": + case "NIMBUS": + case "DRPC_SERVER": + case "SUPERVISOR": + case "STORM_REST_API": + $pieces[0] = "STORM"; + break; + case "NAGIOS": + case "HDFS": + case "MAPREDUCE": + case "HBASE": + case "ZOOKEEPER": + case "OOZIE": + case "GANGLIA": + case "STORM": + case "FALCON": + case "PUPPET": + break; + default: + $pieces[0] = "UNKNOWN"; + } + return $pieces[0]; + } + + function getParameter($object, $key) + { + $pattern="/\s" . $key . "[\s= ]*([\S, ]*)\n/"; + $num_mat = preg_match($pattern, $object, $matches); + $value = ""; + if ($num_mat) { + $value = $matches[1]; + } + return $value; + } + + function getParameterMapValue($map, $key) { + $value = $map[$key]; + + if (!is_null($value)) + return "" . $value; + + return ""; + } + + + function getParameterMap($object, $keynames) { + + $cnt = preg_match_all('/\t([\S]*)=[\n]?[\t]?([\S= ]*)/', $object, $matches, PREG_PATTERN_ORDER); + + $tmpmap = array_combine($matches[1], $matches[2]); + + $map = array(); + foreach ($keynames as $key) { + $map[$key] = htmlentities($tmpmap[$key], ENT_COMPAT); + } + + return $map; + } + +function indent($json) { + + $result = ''; + $pos = 0; + $strLen = strlen($json); + $indentStr = ' '; + $newLine = "\n"; + $prevChar = ''; + $outOfQuotes = true; + + for ($i=0; $i<=$strLen; $i++) { + + // Grab the next character in the string. + $char = substr($json, $i, 1); + + // Are we inside a quoted string? + if ($char == '"' && $prevChar != '\\') { + $outOfQuotes = !$outOfQuotes; + + // If this character is the end of an element, + // output a new line and indent the next line. + } else if(($char == '}' || $char == ']') && $outOfQuotes) { + $result .= $newLine; + $pos --; + for ($j=0; $j<$pos; $j++) { + $result .= $indentStr; + } + } + + // Add the character to the result string. + $result .= $char; + + // If the last character was the beginning of an element, + // output a new line and indent the next line. + if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) { + $result .= $newLine; + if ($char == '{' || $char == '[') { + $pos ++; + } + + for ($j = 0; $j < $pos; $j++) { + $result .= $indentStr; + } + } + + $prevChar = $char; + } + + return $result; +} +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/sys_logger.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/sys_logger.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/sys_logger.py new file mode 100644 index 0000000..6683342 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/files/sys_logger.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import syslog + +# dictionary of state->severity mappings +severities = {'UP':'OK', 'DOWN':'Critical', 'UNREACHABLE':'Critical', 'OK':'OK', + 'WARNING':'Warning', 'UNKNOWN':'Warning', 'CRITICAL':'Critical'} + +# List of services which can result in events at the Degraded severity +degraded_alert_services = ['HBASEMASTER::HBaseMaster CPU utilization', + 'HDFS::Namenode RPC Latency', + 'MAPREDUCE::JobTracker RPC Latency', + 'JOBTRACKER::Jobtracker CPU utilization'] + +# List of services which can result in events at the Fatal severity +fatal_alert_services = ['NAMENODE::Namenode Process down', + 'NAMENODE::NameNode process'] + +# dictionary of service->msg_id mappings +msg_ids = {'Host::Ping':'host_down', + 'HBASEMASTER::HBaseMaster CPU utilization':'master_cpu_utilization', + 'HDFS::HDFS Capacity utilization':'hdfs_percent_capacity', + 'HDFS::Corrupt/Missing blocks':'hdfs_block', + 'NAMENODE::Namenode Edit logs directory status':'namenode_edit_log_write', + 'HDFS::Percent DataNodes down':'datanode_down', + 'DATANODE::Process down':'datanode_process_down', + 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full', + 'NAMENODE::Namenode Process down':'namenode_process_down', + 'HDFS::Namenode RPC Latency':'namenode_rpc_latency', + 'DATANODE::Storage full':'datanodes_storage_full', + 'JOBTRACKER::Jobtracker Process down':'jobtracker_process_down', + 'MAPREDUCE::JobTracker RPC Latency':'jobtracker_rpc_latency', + 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down', + 'TASKTRACKER::Process down':'tasktracker_process_down', + 'HBASEMASTER::HBaseMaster Process down':'hbasemaster_process_down', + 'REGIONSERVER::Process down':'regionserver_process_down', + 'HBASE::Percent region servers down':'regionservers_down', + 'HIVE-METASTORE::HIVE-METASTORE status check':'hive_metastore_process_down', + 'ZOOKEEPER::Percent zookeeper servers down':'zookeepers_down', + 'ZKSERVERS::ZKSERVERS Process down':'zookeeper_process_down', + 'OOZIE::Oozie status check':'oozie_down', + 'TEMPLETON::Templeton status check':'templeton_down', + 'PUPPET::Puppet agent down':'puppet_down', + 'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', + 'GANGLIA::Ganglia [gmetad] Process down':'ganglia_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster':'ganglia_collector_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker':'ganglia_collector_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for namenode':'ganglia_collector_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for slaves':'ganglia_collector_process_down', + 'NAMENODE::Secondary Namenode Process down':'secondary_namenode_process_down', + 'JOBTRACKER::Jobtracker CPU utilization':'jobtracker_cpu_utilization', + 'HBASEMASTER::HBase Web UI down':'hbase_ui_down', + 'NAMENODE::Namenode Web UI down':'namenode_ui_down', + 'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down', + 'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down', + + 'HBASEMASTER::HBase Master CPU utilization':'master_cpu_utilization', + 'HDFS::HDFS capacity utilization':'hdfs_percent_capacity', + 'NAMENODE::NameNode edit logs directory status':'namenode_edit_log_write', + 'DATANODE::DataNode process down':'datanode_process_down', + 'NAMENODE::NameNode process down':'namenode_process_down', + 'HDFS::NameNode RPC latency':'namenode_rpc_latency', + 'DATANODE::DataNode storage full':'datanodes_storage_full', + 'JOBTRACKER::JobTracker process down':'jobtracker_process_down', + 'MAPREDUCE::JobTracker RPC latency':'jobtracker_rpc_latency', + 'TASKTRACKER::TaskTracker process down':'tasktracker_process_down', + 'HBASEMASTER::HBase Master process down':'hbasemaster_process_down', + 'REGIONSERVER::RegionServer process down':'regionserver_process_down', + 'HBASE::Percent RegionServers down':'regionservers_down', + 'HIVE-METASTORE::Hive Metastore status check':'hive_metastore_process_down', + 'HIVE-METASTORE::Hive Metastore process':'hive_metastore_process_down', + 'ZOOKEEPER::Percent ZooKeeper Servers down':'zookeepers_down', + 'ZOOKEEPER::ZooKeeper Server process down':'zookeeper_process_down', + 'OOZIE::Oozie Server status check':'oozie_down', + 'WEBHCAT::WebHCat Server status check':'templeton_down', + 'GANGLIA::Ganglia [gmetad] process down':'ganglia_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master':'ganglia_collector_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker':'ganglia_collector_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for NameNode':'ganglia_collector_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for slaves':'ganglia_collector_process_down', + 'NAMENODE::Secondary NameNode process down':'secondary_namenode_process_down', + 'JOBTRACKER::JobTracker CPU utilization':'jobtracker_cpu_utilization', + 'HBASEMASTER::HBase Master Web UI down':'hbase_ui_down', + 'NAMENODE::NameNode Web UI down':'namenode_ui_down', + 'Oozie status check':'oozie_down', + 'WEBHCAT::WebHcat status check':'templeton_down', + + # Ambari Nagios service check descriptions + 'DATANODE::DataNode process':'datanode_process', + 'NAMENODE::NameNode process':'namenode_process', + 'NAMENODE::Secondary NameNode process':'secondary_namenode_process', + 'JOURNALNODE::JournalNode process':'journalnode_process', + 'ZOOKEEPER::ZooKeeper Server process':'zookeeper_process_down', + 'JOBTRACKER::JobTracker process':'jobtracker_process', + 'TASKTRACKER::TaskTracker process':'tasktracker_process', + 'GANGLIA::Ganglia Server process':'ganglia_server_process', + 'GANGLIA::Ganglia Monitor process for Slaves':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for NameNode':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for JobTracker':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for HBase Master':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for ResourceManager':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for HistoryServer':'ganglia_monitor_process', + 'HBASEMASTER::HBase Master process':'hbase_master_process', + 'HBASE::Percent RegionServers live':'regionservers_down', + 'REGIONSERVER::RegionServer process':'regionserver_process', + 'NAGIOS::Nagios status log freshness':'nagios_process', + 'FLUME::Flume Agent process':'flume_agent_process', + 'OOZIE::Oozie Server status':'oozie_down', + 'HIVE-METASTORE::Hive Metastore status':'hive_metastore_process', + 'WEBHCAT::WebHCat Server status':'webhcat_down', + 'RESOURCEMANAGER::ResourceManager process':'resourcemanager_process_down', + 'RESOURCEMANAGER::ResourceManager RPC latency':'resourcemanager_rpc_latency', + 'RESOURCEMANAGER::ResourceManager CPU utilization':'resourcemanager_cpu_utilization', + 'RESOURCEMANAGER::ResourceManager Web UI':'recourcemanager_ui', + 'NODEMANAGER::NodeManager process':'nodemanager_process_down', + 'NODEMANAGER::NodeManager health':'nodemanager_health', + 'NODEMANAGER::Percent NodeManagers live':'nodemanagers_down', + 'APP_TIMELINE_SERVER::App Timeline Server process':'timelineserver_process', + 'JOBHISTORY::HistoryServer RPC latency':'historyserver_rpc_latency', + 'JOBHISTORY::HistoryServer CPU utilization':'historyserver_cpu_utilization', + 'JOBHISTORY::HistoryServer Web UI':'historyserver_ui', + 'JOBHISTORY::HistoryServer process':'historyserver_process'} + +# Determine the severity of the TVI alert based on the Nagios alert state. +def determine_severity(state, service): + if severities.has_key(state): + severity = severities[state] + else: severity = 'Warning' + + # For some alerts, warning should be converted to Degraded + if severity == 'Warning' and service in degraded_alert_services: + severity = 'Degraded' + elif severity != 'OK' and service in fatal_alert_services: + severity = 'Fatal' + + return severity + + +# Determine the msg id for the TVI alert from based on the service which generates the Nagios alert. +# The msg id is used to correlate a log msg to a TVI rule. +def determine_msg_id(service, severity): + for k, v in msg_ids.iteritems(): + if(k in service): + msg_id = v + if severity == 'OK': + msg_id = '{0}_ok'.format(msg_id) + return msg_id + return 'HADOOP_UNKNOWN_MSG' + + +# Determine the domain. Currently the domain is always 'Hadoop'. +def determine_domain(): + return 'Hadoop' + + +# log the TVI msg to the syslog +def log_tvi_msg(msg): + syslog.openlog('nagios', syslog.LOG_PID) + syslog.syslog(msg) + + +# generate a tvi log msg from a Hadoop alert +def generate_tvi_log_msg(alert_type, attempt, state, service, msg): + # Determine the TVI msg contents + severity = determine_severity(state, service) # The TVI alert severity. + domain = determine_domain() # The domain specified in the TVI alert. + msg_id = determine_msg_id(service, severity) # The msg_id used to correlate to a TVI rule. + + # Only log HARD alerts + if alert_type == 'HARD': + # Format and log msg + log_tvi_msg('{0}: {1}: {2}# {3}'.format(severity, domain, msg_id, msg)) + + +# main method which is called when invoked on the command line +def main(): + generate_tvi_log_msg(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]) + + +# run the main method +if __name__ == '__main__': + main() + sys.exit(0) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/functions.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/functions.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/functions.py new file mode 100644 index 0000000..7252f8f --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/functions.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" +from resource_management import * + +# Gets if the java version is greater than 6 +def is_jdk_greater_6(java64_home): + import os + import re + java_bin = os.path.join(java64_home, 'bin', 'java') + ver_check = shell.call([java_bin, '-version']) + + ver = '' + if 0 != ver_check[0]: + # java is not local, try the home name as a fallback + ver = java64_home + else: + ver = ver_check[1] + + regex = re.compile('"1\.([0-9]*)\.0_([0-9]*)"', re.IGNORECASE) + r = regex.search(ver) + if r: + strs = r.groups() + if 2 == len(strs): + minor = int(strs[0]) + if minor > 6: + return True + + return False http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios.py new file mode 100644 index 0000000..a63ea38 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * +from nagios_server_config import nagios_server_config + +def nagios(): + import params + + File( params.nagios_httpd_config_file, + owner = params.nagios_user, + group = params.nagios_group, + content = Template("nagios.conf.j2"), + mode = 0644 + ) + + Directory( params.conf_dir, + owner = params.nagios_user, + group = params.nagios_group + ) + + Directory( [params.plugins_dir, params.nagios_obj_dir]) + + Directory( params.nagios_pid_dir, + owner = params.nagios_user, + group = params.nagios_group, + mode = 0755, + recursive = True + ) + + Directory( [params.nagios_var_dir, params.check_result_path, params.nagios_rw_dir, params.ambarinagios_php_dir], + owner = params.nagios_user, + group = params.nagios_group, + recursive = True + ) + + Directory( [params.nagios_log_dir, params.nagios_log_archives_dir], + owner = params.nagios_user, + group = params.nagios_group, + mode = 0755 + ) + + nagios_server_config() + + set_web_permisssions() + + File( format("{conf_dir}/command.cfg"), + owner = params.nagios_user, + group = params.nagios_group + ) + + File( format("{ambarinagios_php_dir}/{ambarinagios_php_filename}"), + content = StaticFile(params.ambarinagios_php_filename), + ) + + File( params.hdp_mon_nagios_addons_path, + content = StaticFile("hdp_mon_nagios_addons.conf"), + ) + + File(format("{nagios_var_dir}/ignore.dat"), + owner = params.nagios_user, + group = params.nagios_group, + mode = 0664) + + if System.get_instance().os_family == "ubuntu": + Link(params.ubuntu_stylesheets_desired_location, + to = params.ubuntu_stylesheets_real_location + ) + + +def set_web_permisssions(): + import params + + cmd = format("{htpasswd_cmd} -c -b {conf_dir}/htpasswd.users {nagios_web_login} {nagios_web_password!p}") + Execute(cmd) + + File( format("{conf_dir}/htpasswd.users"), + owner = params.nagios_user, + group = params.nagios_group, + mode = 0640 + ) + + if System.get_instance().os_family == "suse": + command = format("usermod -G {nagios_group} wwwrun") + elif System.get_instance().os_family == "ubuntu": + command = format("usermod -G {nagios_group} www-data") # check -a ??? + elif System.get_instance().os_family == "redhat": + command = format("usermod -a -G {nagios_group} apache") + + Execute( command) http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server.py new file mode 100644 index 0000000..da35b34 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +import sys +from resource_management import * +from nagios import nagios +from nagios_service import nagios_service +from nagios_service import update_active_alerts + + +class NagiosServer(Script): + def install(self, env): + remove_conflicting_packages() + self.install_packages(env) + self.configure(env) + + def configure(self, env): + import params + env.set_params(params) + nagios() + + + def start(self, env): + import params + env.set_params(params) + + update_ignorable(params) + + self.configure(env) # done for updating configs after Security enabled + nagios_service(action='start') + + + def stop(self, env): + import params + env.set_params(params) + + nagios_service(action='stop') + + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.nagios_pid_file) + + # check for alert structures + update_active_alerts() + + +def remove_conflicting_packages(): + Package('hdp_mon_nagios_addons', action = "remove") + + Package('nagios-plugins', action = "remove") + + if System.get_instance().os_family in ["redhat","suse"]: + Execute("rpm -e --allmatches --nopostun nagios", + path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + ignore_failures = True) + +def update_ignorable(params): + if not params.config.has_key('passiveInfo'): + return + else: + buf = "" + count = 0 + for define in params.config['passiveInfo']: + try: + host = str(define['host']) + service = str(define['service']) + component = str(define['component']) + buf += host + " " + service + " " + component + "\n" + count += 1 + except KeyError: + pass + + f = None + try: + f = open('/var/nagios/ignore.dat', 'w') + f.write(buf) + if 1 == count: + Logger.info("Persisted '/var/nagios/ignore.dat' with 1 entry") + elif count > 1: + Logger.info("Persisted '/var/nagios/ignore.dat' with " + str(count) + " entries") + except: + Logger.info("Could not persist '/var/nagios/ignore.dat'") + pass + finally: + if f is not None: + f.close() + + +if __name__ == "__main__": + NagiosServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server_config.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server_config.py new file mode 100644 index 0000000..883442c --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_server_config.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * + +def nagios_server_config(): + import params + + nagios_server_configfile( 'nagios.cfg', + config_dir = params.conf_dir, + group = params.nagios_group + ) + nagios_server_configfile( 'resource.cfg', + config_dir = params.conf_dir, + group = params.nagios_group + ) + nagios_server_configfile( 'hadoop-hosts.cfg') + nagios_server_configfile( 'hadoop-hostgroups.cfg') + nagios_server_configfile( 'hadoop-servicegroups.cfg') + nagios_server_configfile( 'hadoop-services.cfg') + nagios_server_configfile( 'hadoop-commands.cfg') + nagios_server_configfile( 'contacts.cfg') + + if System.get_instance().os_family != "suse": + nagios_server_configfile( 'nagios', + config_dir = '/etc/init.d', + mode = 0755, + owner = 'root', + group = 'root' + ) + + nagios_server_check( 'check_cpu.pl') + nagios_server_check( 'check_cpu.php') + nagios_server_check( 'check_cpu_ha.php') + nagios_server_check( 'check_datanode_storage.php') + nagios_server_check( 'check_aggregate.php') + nagios_server_check( 'check_hdfs_blocks.php') + nagios_server_check( 'check_hdfs_capacity.php') + nagios_server_check( 'check_rpcq_latency.php') + nagios_server_check( 'check_rpcq_latency_ha.php') + nagios_server_check( 'check_webui.sh') + nagios_server_check( 'check_webui_ha.sh') + nagios_server_check( 'check_name_dir_status.php') + nagios_server_check( 'check_oozie_status.sh') + nagios_server_check( 'check_templeton_status.sh') + nagios_server_check( 'check_hive_metastore_status.sh') + nagios_server_check( 'check_hue_status.sh') + nagios_server_check( 'check_mapred_local_dir_used.sh') + nagios_server_check( 'check_nodemanager_health.sh') + nagios_server_check( 'check_namenodes_ha.sh') + nagios_server_check( 'hdp_nagios_init.php') + nagios_server_check( 'check_checkpoint_time.py' ) + nagios_server_check( 'sys_logger.py' ) + nagios_server_check( 'check_ambari_alerts.py' ) + nagios_server_check( 'mm_wrapper.py' ) + nagios_server_check( 'check_hive_thrift_port.py' ) + +def nagios_server_configfile( + name, + owner = None, + group = None, + config_dir = None, + mode = None +): + import params + owner = params.nagios_user if not owner else owner + group = params.user_group if not group else group + config_dir = params.nagios_obj_dir if not config_dir else config_dir + + TemplateConfig( format("{config_dir}/{name}"), + owner = owner, + group = group, + mode = mode + ) + +def nagios_server_check(name): + File( format("{plugins_dir}/{name}"), + content = StaticFile(name), + mode = 0755 + ) http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_service.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_service.py new file mode 100644 index 0000000..b7f512b --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/nagios_service.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +import json +import os +import signal + +from resource_management import * +from os.path import isfile + + +def nagios_service(action='start'): # start or stop + import params + + nagios_pid_file = format("{nagios_pid_file}") + + if action == 'start': + command = format("service {nagios_service_name} start") + Execute(command) + elif action == 'stop': + # attempt to grab the pid in case we need it later + nagios_pid = 0 + if isfile(nagios_pid_file): + with open(nagios_pid_file, "r") as file: + try: + nagios_pid = int(file.read()) + Logger.info("Nagios is running with a PID of {0}".format(nagios_pid)) + except: + Logger.info("Unable to read PID file {0}".format(nagios_pid_file)) + finally: + file.close() + + command = format("service {nagios_service_name} stop") + Execute(command) + + # on SUSE, there is a bug where Nagios doesn't kill the process + # but this could also affect any OS, so don't restrict this to SUSE + if nagios_pid > 0: + try: + os.kill(nagios_pid, 0) + except: + Logger.info("The Nagios process has successfully terminated") + else: + Logger.info("The Nagios process with ID {0} failed to terminate; explicitly killing.".format(nagios_pid)) + os.kill(nagios_pid, signal.SIGKILL) + + # in the event that the Nagios scripts don't remove the pid file + if isfile( nagios_pid_file ): + Execute(format("rm -f {nagios_pid_file}")) + + MonitorWebserver("restart") + +def update_active_alerts(): + import status_params + + alerts = None + if 'alerts' in status_params.config and status_params.config['alerts'] is not None: + alerts = status_params.config['alerts'] + + if alerts is None: + return + + output = {} + + for a in alerts: + alert_name = a['name'] + alert_text = a['text'] + alert_state = a['state'] + alert_host = a['host'] + if not output.has_key(alert_name): + output[alert_name] = {} + + if not output[alert_name].has_key(alert_host): + output[alert_name][alert_host] = [] + + host_items = output[alert_name][alert_host] + alert_out = {} + alert_out['state'] = alert_state + alert_out['text'] = alert_text + host_items.append(alert_out) + + with open(os.path.join(status_params.nagios_var_dir, 'ambari.json'), 'w') as f: + json.dump(output, f) + http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/params.py new file mode 100644 index 0000000..5a0ffbb --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/params.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from functions import is_jdk_greater_6 +from resource_management import * +import status_params + +HADOOP_HTTP_POLICY = "HTTP_ONLY" +HADOOP_HTTPS_POLICY = "HTTPS_ONLY" + +# server configurations +config = Script.get_config() + +if System.get_instance().os_family == "ubuntu": + nagios_service_name = "nagios3" +else: + nagios_service_name = "nagios" + +conf_dir = format("/etc/{nagios_service_name}") +nagios_obj_dir = format("{conf_dir}/objects") +nagios_var_dir = status_params.nagios_var_dir +nagios_rw_dir = status_params.nagios_rw_dir + +# HACK: Stylesheets for Nagios UI on Ubuntu are in wrong place so we have to do a symlink. +# In future we can fix this directly in the package. +ubuntu_stylesheets_real_location = "/etc/nagios3/stylesheets" +ubuntu_stylesheets_desired_location = "/usr/share/nagios3/htdocs/stylesheets" + +if System.get_instance().os_family == "ubuntu": + host_template = "generic-host" + plugins_dir = "/usr/lib/nagios/plugins" + nagios_web_dir = "/usr/share/nagios3/htdocs" + + cfg_files = [ + format("{conf_dir}/commands.cfg"), + format("{conf_dir}/conf.d/contacts_nagios2.cfg"), + format("{conf_dir}/conf.d/generic-host_nagios2.cfg"), + format("{conf_dir}/conf.d/generic-service_nagios2.cfg"), + format("{conf_dir}/conf.d/timeperiods_nagios2.cfg"), + ] + cgi_dir = "/usr/lib/cgi-bin/nagios3" + cgi_weblink = "/cgi-bin/nagios3" +else: + host_template = "linux-server" + plugins_dir = "/usr/lib64/nagios/plugins" + nagios_web_dir = "/usr/share/nagios" + + cfg_files = [ + format("{nagios_obj_dir}/commands.cfg"), + format("{nagios_obj_dir}/contacts.cfg"), + format("{nagios_obj_dir}/timeperiods.cfg"), + format("{nagios_obj_dir}/templates.cfg"), + ] + + cgi_dir = "/usr/lib/nagios/cgi" + cgi_weblink = "/nagios/cgi-bin" + +check_result_path = "/var/nagios/spool/checkresults" +nagios_log_dir = "/var/log/nagios" +nagios_log_archives_dir = format("{nagios_log_dir}/archives") +nagios_host_cfg = format("{nagios_obj_dir}/hadoop-hosts.cfg") +nagios_lookup_daemon_str = "/usr/sbin/nagios" +nagios_pid_dir = status_params.nagios_pid_dir +nagios_pid_file = status_params.nagios_pid_file +nagios_resource_cfg = format("{conf_dir}/resource.cfg") +nagios_hostgroup_cfg = format("{nagios_obj_dir}/hadoop-hostgroups.cfg") +nagios_servicegroup_cfg = format("{nagios_obj_dir}/hadoop-servicegroups.cfg") +nagios_service_cfg = format("{nagios_obj_dir}/hadoop-services.cfg") +nagios_command_cfg = format("{nagios_obj_dir}/hadoop-commands.cfg") +eventhandlers_dir = "/usr/lib/nagios/eventhandlers" +nagios_principal_name = default("/configurations/nagios-env/nagios_principal_name", "nagios") + +oozie_server_port = get_port_from_url(config['configurations']['oozie-site']['oozie.base.url']) +namenode_host = default("/clusterHostInfo/namenode_host", None) +_rm_host = default("/clusterHostInfo/rm_host", None) +if type(_rm_host) is list: + rm_hosts_in_str = ','.join(_rm_host) + +has_namenode = not namenode_host == None +has_rm = not _rm_host == None + +# - test for HDFS or HCFS (glusterfs) +if 'namenode_host' in config['clusterHostInfo']: + ishdfs_value = "HDFS" +else: + ishdfs_value = None + +# HDFS, YARN, and MR use different settings to enable SSL +hdfs_ssl_enabled = False +yarn_ssl_enabled = False +mapreduce_ssl_enabled = False + +# initialize all http policies to HTTP_ONLY +dfs_http_policy = HADOOP_HTTP_POLICY +yarn_http_policy = HADOOP_HTTP_POLICY +mapreduce_http_policy = HADOOP_HTTP_POLICY + +# +if has_namenode: + if 'dfs.http.policy' in config['configurations']['hdfs-site']: + dfs_http_policy = config['configurations']['hdfs-site']['dfs.http.policy'] + if dfs_http_policy == HADOOP_HTTPS_POLICY: + hdfs_ssl_enabled = True +if has_rm: + if 'yarn.http.policy' in config['configurations']['yarn-site']: + yarn_http_policy = config['configurations']['yarn-site']['yarn.http.policy'] + + if 'mapreduce.jobhistory.http.policy' in config['configurations']['mapred-site']: + mapreduce_http_policy = config['configurations']['mapred-site']['mapreduce.jobhistory.http.policy'] + +if dfs_http_policy == HADOOP_HTTPS_POLICY: + hdfs_ssl_enabled = True + +if yarn_http_policy == HADOOP_HTTPS_POLICY: + yarn_ssl_enabled = True + +if mapreduce_http_policy == HADOOP_HTTPS_POLICY: + mapreduce_ssl_enabled = True + +# set default ports and webui lookup properties +dfs_namenode_webui_default_port = '50070' +dfs_snamenode_webui_default_port = '50090' +yarn_nodemanager_default_port = '8042' +dfs_namenode_webui_property = 'dfs.namenode.http-address' +dfs_snamenode_webui_property = 'dfs.namenode.secondary.http-address' +dfs_datanode_webui_property = 'dfs.datanode.http.address' +yarn_rm_webui_property = 'yarn.resourcemanager.webapp.address' +yarn_timeline_service_webui_property = 'yarn.timeline-service.webapp.address' +yarn_nodemanager_webui_property = 'yarn.nodemanager.webapp.address' +mapreduce_jobhistory_webui_property = 'mapreduce.jobhistory.webapp.address' + +# if HDFS is protected by SSL, adjust the ports and lookup properties +if hdfs_ssl_enabled == True: + dfs_namenode_webui_default_port = '50470' + dfs_snamenode_webui_default_port = '50091' + dfs_namenode_webui_property = 'dfs.namenode.https-address' + dfs_snamenode_webui_property = 'dfs.namenode.secondary.https-address' + dfs_datanode_webui_property = 'dfs.datanode.https.address' + +# if YARN is protected by SSL, adjust the ports and lookup properties +if yarn_ssl_enabled == True: + yarn_rm_webui_property = 'yarn.resourcemanager.webapp.https.address' + yarn_nodemanager_webui_property = 'yarn.nodemanager.webapp.https.address' + yarn_timeline_service_webui_property = 'yarn.timeline-service.webapp.https.address' + +# if MR is protected by SSL, adjust the ports and lookup properties +if mapreduce_ssl_enabled == True: + mapreduce_jobhistory_webui_property = 'mapreduce.jobhistory.webapp.https.address' + +if has_namenode: + # extract NameNode + if dfs_namenode_webui_property in config['configurations']['hdfs-site']: + namenode_port = get_port_from_url(config['configurations']['hdfs-site'][dfs_namenode_webui_property]) + else: + namenode_port = dfs_namenode_webui_default_port + + # extract Secondary NameNode + if dfs_snamenode_webui_property in config['configurations']['hdfs-site']: + snamenode_port = get_port_from_url(config['configurations']['hdfs-site'][dfs_snamenode_webui_property]) + else: + snamenode_port = dfs_snamenode_webui_default_port + + if 'dfs.journalnode.http-address' in config['configurations']['hdfs-site']: + journalnode_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.journalnode.http-address']) + datanode_port = get_port_from_url(config['configurations']['hdfs-site'][dfs_datanode_webui_property]) + +nm_port = yarn_nodemanager_default_port +if has_rm: + if yarn_nodemanager_webui_property in config['configurations']['yarn-site']: + nm_port = get_port_from_url(config['configurations']['yarn-site'][yarn_nodemanager_webui_property]) + +flume_port = "4159" +hbase_master_rpc_port = default('/configurations/hbase-site/hbase.master.port', "60000") +rm_port = get_port_from_url(config['configurations']['yarn-site'][yarn_rm_webui_property]) +hs_port = get_port_from_url(config['configurations']['mapred-site'][mapreduce_jobhistory_webui_property]) +hive_metastore_port = get_port_from_url(config['configurations']['hive-site']['hive.metastore.uris']) #"9083" +hive_server_port = default('/configurations/hive-site/hive.server2.thrift.port',"10000") +templeton_port = config['configurations']['webhcat-site']['templeton.port'] #"50111" +hbase_master_port = config['configurations']['hbase-site']['hbase.master.info.port'] #"60010" +hbase_rs_port = config['configurations']['hbase-site']['hbase.regionserver.info.port'] #"60030" +storm_ui_port = config['configurations']['storm-site']['ui.port'] +drpc_port = config['configurations']['storm-site']['drpc.port'] +nimbus_port = config['configurations']['storm-site']['nimbus.thrift.port'] +supervisor_port = "56431" +storm_rest_api_port = "8745" +falcon_port = config['configurations']['falcon-env']['falcon_port'] +ahs_port = get_port_from_url(config['configurations']['yarn-site'][yarn_timeline_service_webui_property]) +knox_gateway_port = config['configurations']['gateway-site']['gateway.port'] +kafka_broker_port = config['configurations']['kafka-broker']['port'] + +# use sensible defaults for checkpoint as they are required by Nagios and +# may not be part of hdfs-site.xml on an upgrade +if has_namenode: + if 'dfs.namenode.checkpoint.period' in config['configurations']['hdfs-site']: + dfs_namenode_checkpoint_period = config['configurations']['hdfs-site']['dfs.namenode.checkpoint.period'] + else: + dfs_namenode_checkpoint_period = '21600' + + if 'dfs.namenode.checkpoint.txns' in config['configurations']['hdfs-site']: + dfs_namenode_checkpoint_txns = config['configurations']['hdfs-site']['dfs.namenode.checkpoint.txns'] + else: + dfs_namenode_checkpoint_txns = '1000000' + +# this is different for HDP1 +nn_metrics_property = "FSNamesystem" +clientPort = config['configurations']['zookeeper-env']['clientPort'] #ZK + + +java64_home = config['hostLevelParams']['java_home'] +check_cpu_on = is_jdk_greater_6(java64_home) +security_enabled = config['configurations']['cluster-env']['security_enabled'] +nagios_keytab_path = default("/configurations/nagios-env/nagios_keytab_path", "/etc/security/keytabs/nagios.service.keytab") +kinit_path_local = functions.get_kinit_path(["/usr/bin", "/usr/kerberos/bin", "/usr/sbin"]) + +dfs_ha_enabled = False +dfs_ha_nameservices = default("/configurations/hdfs-site/dfs.nameservices", None) +dfs_ha_namenode_ids = default(format("/configurations/hdfs-site/dfs.ha.namenodes.{dfs_ha_nameservices}"), None) +if dfs_ha_namenode_ids: + dfs_ha_namemodes_ids_list = dfs_ha_namenode_ids.split(",") + dfs_ha_namenode_ids_array_len = len(dfs_ha_namemodes_ids_list) + if dfs_ha_namenode_ids_array_len > 1: + dfs_ha_enabled = True + +nn_ha_host_port_map = {} +if dfs_ha_enabled: + for nn_id in dfs_ha_namemodes_ids_list: + nn_host = config['configurations']['hdfs-site'][format('dfs.namenode.rpc-address.{dfs_ha_nameservices}.{nn_id}')] + nn_ha_host_port_map[nn_host.split(":")[0]] = nn_host.split(":")[1] +else: + if 'namenode_host' in config['clusterHostInfo']: + namenode_metadata_port = get_port_from_url(config['configurations']['core-site']['fs.defaultFS']) + nn_ha_host_port_map[config['clusterHostInfo']['namenode_host'][0]] = namenode_metadata_port + else: + namenode_metadata_port = '8020' + +os_family = System.get_instance().os_family + +ganglia_port = "8651" +ganglia_collector_slaves_port = "8660" +ganglia_collector_namenode_port = "8661" +ganglia_collector_jobtracker_port = "8662" +ganglia_collector_hbase_port = "8663" +ganglia_collector_rm_port = "8664" +ganglia_collector_nm_port = "8660" +ganglia_collector_hs_port = "8666" + +all_ping_ports = config['clusterHostInfo']['all_ping_ports'] + +if System.get_instance().os_family == "suse": + nagios_p1_pl = "/usr/lib/nagios/p1.pl" + htpasswd_cmd = "htpasswd2" + web_conf_dir = "/etc/apache2/conf.d" +elif System.get_instance().os_family == "ubuntu": + nagios_p1_pl = "/usr/lib/nagios3/p1.pl" + htpasswd_cmd = "htpasswd" + web_conf_dir = "/etc/apache2/conf.d" +elif System.get_instance().os_family == "redhat": + nagios_p1_pl = "/usr/bin/p1.pl" + htpasswd_cmd = "htpasswd" + web_conf_dir = "/etc/httpd/conf.d" + +nagios_httpd_config_file = format("{web_conf_dir}/{nagios_service_name}.conf") +hdp_mon_nagios_addons_path = format("{web_conf_dir}/hdp_mon_nagios_addons.conf") + +ambarinagios_php_dir = "/usr/share/hdp/nagios/" +ambarinagios_php_filename = "nagios_alerts.php" + +nagios_user = config['configurations']['nagios-env']['nagios_user'] +nagios_group = config['configurations']['nagios-env']['nagios_group'] +nagios_web_login = config['configurations']['nagios-env']['nagios_web_login'] +nagios_web_password = config['configurations']['nagios-env']['nagios_web_password'] +user_group = config['configurations']['cluster-env']['user_group'] +nagios_contact = config['configurations']['nagios-env']['nagios_contact'] + + +_snamenode_host = default("/clusterHostInfo/snamenode_host", None) +_jtnode_host = default("/clusterHostInfo/jtnode_host", None) +_slave_hosts = default("/clusterHostInfo/slave_hosts", None) +_journalnode_hosts = default("/clusterHostInfo/journalnode_hosts", None) +_zkfc_hosts = default("/clusterHostInfo/zkfc_hosts", None) +_rm_host = default("/clusterHostInfo/rm_host", None) +if type(_rm_host) is list: + rm_hosts_in_str = ','.join(_rm_host) +_nm_hosts = default("/clusterHostInfo/nm_hosts", None) +_hs_host = default("/clusterHostInfo/hs_host", None) +_zookeeper_hosts = default("/clusterHostInfo/zookeeper_hosts", None) +_flume_hosts = default("/clusterHostInfo/flume_hosts", None) +_nagios_server_host = default("/clusterHostInfo/nagios_server_host",None) +_ganglia_server_host = default("/clusterHostInfo/ganglia_server_host",None) +_app_timeline_server_hosts = default("/clusterHostInfo/app_timeline_server_hosts",None) +_nimbus_host = default("/clusterHostInfo/nimbus_hosts",None) +_drpc_host = default("/clusterHostInfo/drpc_server_hosts",None) +_supervisor_hosts = default("/clusterHostInfo/supervisor_hosts",None) +_storm_ui_host = default("/clusterHostInfo/storm_ui_server_hosts",None) +_storm_rest_api_hosts = default("/clusterHostInfo/storm_rest_api_hosts",None) +hbase_master_hosts = default("/clusterHostInfo/hbase_master_hosts",None) +if type(hbase_master_hosts) is list: + hbase_master_hosts_in_str = ','.join(hbase_master_hosts) +_hive_server_host = default("/clusterHostInfo/hive_server_host",None) +_oozie_server = default("/clusterHostInfo/oozie_server",None) +_webhcat_server_host = default("/clusterHostInfo/webhcat_server_host",None) +_falcon_host = default("/clusterHostInfo/falcon_server_hosts", None) +# can differ on HDP1 +#_mapred_tt_hosts = _slave_hosts +#if hbase_rs_hosts not given it is assumed that region servers on same nodes as slaves +_hbase_rs_hosts = default("/clusterHostInfo/hbase_rs_hosts", _slave_hosts) +_hue_server_host = default("/clusterHostInfo/hue_server_host", None) +_knox_gateway_host = default("/clusterHostInfo/knox_gateway_hosts", None) +_kafka_broker_host = default("/clusterHostInfo/kafka_broker_hosts", None) +all_hosts = config['clusterHostInfo']['all_hosts'] + +if 'namenode_host' in config['clusterHostInfo']: + nn_hosts_string = " ".join(namenode_host) +else: + nn_hosts_string = " ".join(config['clusterHostInfo']['ambari_server_host']) + + +hostgroup_defs = { + 'namenode' : namenode_host, + 'snamenode' : _snamenode_host, + 'slaves' : _slave_hosts, + 'agent-servers' : all_hosts, + 'nagios-server' : _nagios_server_host, + 'jobtracker' : _jtnode_host, + 'ganglia-server' : _ganglia_server_host, + 'flume-servers' : _flume_hosts, + 'zookeeper-servers' : _zookeeper_hosts, + 'hbasemasters' : hbase_master_hosts, + 'hiveserver' : _hive_server_host, + 'region-servers' : _hbase_rs_hosts, + 'oozie-server' : _oozie_server, + 'webhcat-server' : _webhcat_server_host, + 'hue-server' : _hue_server_host, + 'resourcemanager' : _rm_host, + 'nodemanagers' : _nm_hosts, + 'historyserver2' : _hs_host, + 'journalnodes' : _journalnode_hosts, + 'nimbus' : _nimbus_host, + 'drpc-server' : _drpc_host, + 'storm_ui' : _storm_ui_host, + 'supervisors' : _supervisor_hosts, + 'storm_rest_api' : _storm_rest_api_hosts, + 'falcon-server' : _falcon_host, + 'ats-servers' : _app_timeline_server_hosts, + 'knox-gateway' : _knox_gateway_host, + 'kafka-broker' : _kafka_broker_host +} http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/status_params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/status_params.py b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/status_params.py new file mode 100644 index 0000000..11d4aa9 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/scripts/status_params.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +""" + +from resource_management import * + +config = Script.get_config() + +nagios_pid_dir = "/var/run/nagios" +nagios_pid_file = format("{nagios_pid_dir}/nagios.pid") + +nagios_var_dir = "/var/nagios" +nagios_rw_dir = "/var/nagios/rw" http://git-wip-us.apache.org/repos/asf/ambari/blob/e7d07030/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/templates/contacts.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/templates/contacts.cfg.j2 b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/templates/contacts.cfg.j2 new file mode 100644 index 0000000..610b2bd --- /dev/null +++ b/ambari-server/src/main/resources/stacks/PHD/3.0.0.0/services/NAGIOS/package/templates/contacts.cfg.j2 @@ -0,0 +1,109 @@ +{# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#} + +############################################################################### +# CONTACTS.CFG - SAMPLE CONTACT/CONTACTGROUP DEFINITIONS +# +# Last Modified: 05-31-2007 +# +# NOTES: This config file provides you with some example contact and contact +# group definitions that you can reference in host and service +# definitions. +# +# You don't need to keep these definitions in a separate file from your +# other object definitions. This has been done just to make things +# easier to understand. +# +############################################################################### + +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +# + + +############################################################################### +############################################################################### +# +# CONTACTS +# +############################################################################### +############################################################################### + +# Just one contact defined by default - the Nagios admin (that's you) +# This contact definition inherits a lot of default values from the 'generic-contact' +# template which is defined elsewhere. + +define contact{ + contact_name {{nagios_web_login}} ; Short name of user + use generic-contact ; Inherit default values from generic-contact template (defined above) + alias Nagios Admin ; Full name of user + + email {{nagios_contact}} ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ****** + } + +# Contact which writes all Nagios alerts to the system logger. +define contact{ + contact_name sys_logger ; Short name of user + use generic-contact ; Inherit default values from generic-contact template (defined above) + alias System Logger ; Full name of user + host_notifications_enabled 1 + service_notifications_enabled 1 + service_notification_period 24x7 + host_notification_period 24x7 + service_notification_options w,u,c,r,s + host_notification_options d,u,r,s + can_submit_commands 1 + retain_status_information 1 + service_notification_commands service_sys_logger + host_notification_commands host_sys_logger + } + +############################################################################### +############################################################################### +# +# CONTACT GROUPS +# +############################################################################### +############################################################################### + +# We only have one contact in this simple configuration file, so there is +# no need to create more than one contact group. + +define contactgroup { + contactgroup_name admins + alias Nagios Administrators + members {{nagios_web_login}},sys_logger +}