incubator-ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maha...@apache.org
Subject svn commit: r1443336 - in /incubator/ambari/trunk: ./ ambari-agent/src/main/puppet/modules/hdp-nagios/templates/ ambari-server/src/main/java/org/apache/ambari/server/configuration/ contrib/addons/src/addOns/nagios/plugins/ contrib/addons/src/addOns/nag...
Date Thu, 07 Feb 2013 07:46:45 GMT
Author: mahadev
Date: Thu Feb  7 07:46:45 2013
New Revision: 1443336

URL: http://svn.apache.org/viewvc?rev=1443336&view=rev
Log:
AMBARI-1358. Clean up alert messages. (Yusaku Sako via mahadev)

Modified:
    incubator/ambari/trunk/CHANGES.txt
    incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
    incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
    incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh
    incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php
    incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh
    incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh
    incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
    incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php

Modified: incubator/ambari/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/CHANGES.txt?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/CHANGES.txt (original)
+++ incubator/ambari/trunk/CHANGES.txt Thu Feb  7 07:46:45 2013
@@ -439,6 +439,9 @@ Trunk (unreleased changes):
  AMBARI-1330. Cluster missing hosts after successful install and restart.
  (mahadev)
 
+ AMBARI-1358. Clean up alert messages. (Yusaku Sako via mahadev)
+
+
 AMBARI-1.2.0 branch:
 
  INCOMPATIBLE CHANGES

Modified: incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
(original)
+++ incubator/ambari/trunk/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
Thu Feb  7 07:46:45 2013
@@ -47,7 +47,7 @@ define service {
         use                     hadoop-service
         service_description     HDFS::Percent DataNodes storage full
         servicegroups           HDFS
-        check_command           check_aggregate!"DATANODE::Storage full"!10%!30%
+        check_command           check_aggregate!"DATANODE::DataNode storage full"!10%!30%
         normal_check_interval   2
         retry_check_interval    1 
         max_check_attempts      1
@@ -58,7 +58,7 @@ define service {
         use                     hadoop-service
         service_description     HDFS::Percent DataNodes down
         servicegroups           HDFS
-        check_command           check_aggregate!"DATANODE::Process down"!10%!30%
+        check_command           check_aggregate!"DATANODE::DataNode process down"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -70,7 +70,7 @@ define service {
         use                     hadoop-service
         service_description     MAPREDUCE::Percent TaskTrackers down
         servicegroups           MAPREDUCE
-        check_command           check_aggregate!"TASKTRACKER::Process down"!10%!30%
+        check_command           check_aggregate!"TASKTRACKER::TaskTracker process down"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -81,9 +81,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     ZOOKEEPER::Percent zookeeper servers down
+        service_description     ZOOKEEPER::Percent ZooKeeper Servers down
         servicegroups           ZOOKEEPER
-        check_command           check_aggregate!"ZKSERVERS::ZKSERVERS Process down"!35%!70%
+        check_command           check_aggregate!"ZOOKEEPER::ZooKeeper Server process down"!35%!70%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -95,9 +95,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     HBASE::Percent region servers down
+        service_description     HBASE::Percent RegionServers down
         servicegroups           HBASE
-        check_command           check_aggregate!"REGIONSERVER::Process down"!10%!30%
+        check_command           check_aggregate!"REGIONSERVER::RegionServer process down"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -110,7 +110,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia [gmetad] Process down
+        service_description     GANGLIA::Ganglia [gmetad] process down
         servicegroups           GANGLIA
         check_command           check_tcp!8651!-w 1 -c 1
         normal_check_interval   0.25
@@ -121,7 +121,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia collector [gmond] Process down alert for
slaves
+        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
slaves
         servicegroups           GANGLIA
         check_command           check_tcp!8660!-w 1 -c 1
         normal_check_interval   0.25
@@ -132,7 +132,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia collector [gmond] Process down alert for
namenode
+        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
NameNode
         servicegroups           GANGLIA
         check_command           check_tcp!8661!-w 1 -c 1
         normal_check_interval   0.25
@@ -143,7 +143,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia collector [gmond] Process down alert for
jobtracker
+        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
JobTracker
         servicegroups           GANGLIA
         check_command           check_tcp!8662!-w 1 -c 1
         normal_check_interval   0.25
@@ -155,7 +155,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia collector [gmond] Process down alert for
hbasemaster
+        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
HBase Master
         servicegroups           GANGLIA
         check_command           check_tcp!8663!-w 1 -c 1
         normal_check_interval   0.25
@@ -170,7 +170,7 @@ define service {
 define service {
         hostgroup_name          snamenode
         use                     hadoop-service
-        service_description     NAMENODE::Secondary Namenode Process down
+        service_description     NAMENODE::Secondary NameNode process down
         servicegroups           HDFS
         check_command           check_tcp!50090!-w 1 -c 1
         normal_check_interval   0.5
@@ -183,7 +183,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     NAMENODE::Namenode Web UI down
+        service_description     NAMENODE::NameNode Web UI down
         servicegroups           HDFS
         check_command           check_webui!namenode
         normal_check_interval   1
@@ -194,7 +194,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     NAMENODE::Namenode Edit logs directory status
+        service_description     NAMENODE::NameNode edit logs directory status
         servicegroups           HDFS
         check_command           check_name_dir_status!50070
         normal_check_interval   0.5
@@ -205,7 +205,7 @@ define service {
 define service {        
         hostgroup_name          namenode        
         use                     hadoop-service
-        service_description     NAMENODE::Namenode Host CPU utilization
+        service_description     NAMENODE::NameNode host CPU utilization
         servicegroups           HDFS
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -217,7 +217,7 @@ define service {        
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     NAMENODE::Namenode Process down
+        service_description     NAMENODE::NameNode process down
         servicegroups           HDFS
         check_command           check_tcp!8020!-w 1 -c 1
         normal_check_interval   0.5
@@ -239,7 +239,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     HDFS::HDFS Capacity utilization
+        service_description     HDFS::HDFS capacity utilization
         servicegroups           HDFS
         check_command           check_hdfs_capacity!50070!80%!90%
         normal_check_interval   10
@@ -250,7 +250,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     HDFS::Namenode RPC Latency
+        service_description     HDFS::NameNode RPC latency
         servicegroups           HDFS
         check_command           check_rpcq_latency!NameNode!50070!3000!5000
         normal_check_interval   5
@@ -286,7 +286,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
-        service_description     JOBTRACKER::Jobtracker CPU utilization
+        service_description     JOBTRACKER::JobTracker CPU utilization
         servicegroups           MAPREDUCE
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -298,7 +298,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
-        service_description     JOBTRACKER::Jobtracker Process down
+        service_description     JOBTRACKER::JobTracker process down
         servicegroups           MAPREDUCE
         check_command           check_tcp!50030!-w 1 -c 1
         normal_check_interval   0.5
@@ -309,7 +309,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
-        service_description     MAPREDUCE::JobTracker RPC Latency
+        service_description     MAPREDUCE::JobTracker RPC latency
         servicegroups           MAPREDUCE
         check_command           check_rpcq_latency!JobTracker!50030!3000!5000
         normal_check_interval   5
@@ -323,7 +323,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     hadoop-service
-        service_description     DATANODE::Process down
+        service_description     DATANODE::DataNode process down
         servicegroups           HDFS
         check_command           check_tcp!<%=scope.function_hdp_template_var("dfs_datanode_address")%>!-w
1 -c 1
         normal_check_interval   1
@@ -334,7 +334,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     hadoop-service
-        service_description     DATANODE::Storage full
+        service_description     DATANODE::DataNode storage full
         servicegroups           HDFS
         check_command           check_datanode_storage!<%=scope.function_hdp_template_var("dfs_datanode_http_address")%>!90%!90%
         normal_check_interval   5
@@ -346,7 +346,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     hadoop-service
-        service_description     TASKTRACKER::Process down
+        service_description     TASKTRACKER::TaskTracker process down
         servicegroups           MAPREDUCE
         check_command           check_tcp!50060!-w 1 -c 1
         normal_check_interval   1
@@ -360,7 +360,7 @@ define service {
 define service {
         hostgroup_name          zookeeper-servers
         use                     hadoop-service
-        service_description     ZKSERVERS::ZKSERVERS Process down
+        service_description     ZOOKEEPER::ZooKeeper Server process down
         servicegroups           ZOOKEEPER
         check_command           check_tcp!2181!-w 1 -c 1
         normal_check_interval   1
@@ -374,7 +374,7 @@ define service {
 define service {
         hostgroup_name          region-servers
         use                     hadoop-service
-        service_description     REGIONSERVER::Process down
+        service_description     REGIONSERVER::RegionServer process down
         servicegroups           HBASE
         check_command           check_tcp!60020!-w 1 -c 1
         normal_check_interval   1
@@ -386,7 +386,7 @@ define service {
 define service {
         hostgroup_name          hbasemaster
         use                     hadoop-service
-        service_description     HBASEMASTER::HBase Web UI down
+        service_description     HBASEMASTER::HBase Master Web UI down
         servicegroups           HBASE
         check_command           check_webui!hbase
         normal_check_interval   1
@@ -397,7 +397,7 @@ define service {
 define service {
         hostgroup_name          hbasemaster
         use                     hadoop-service
-        service_description     HBASEMASTER::HBaseMaster CPU utilization
+        service_description     HBASEMASTER::HBase Master CPU utilization
         servicegroups           HBASE
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -408,7 +408,7 @@ define service {
 define service {
         hostgroup_name          hbasemaster
         use                     hadoop-service
-        service_description     HBASEMASTER::HBaseMaster Process down
+        service_description     HBASEMASTER::HBase Master process down
         servicegroups           HBASE
         check_command           check_tcp!60000!-w 1 -c 1
         normal_check_interval   0.5
@@ -422,7 +422,7 @@ define service {
 define service {
         hostgroup_name          hiveserver
         use                     hadoop-service
-        service_description     HIVE-METASTORE::HIVE-METASTORE status check
+        service_description     HIVE-METASTORE::Hive Metastore status check
         servicegroups           HIVE-METASTORE
         <%if scope.function_hdp_template_var("security_enabled")-%>
         check_command           check_hive_metastore_status!9083!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("keytab_path")%>/<%=scope.function_hdp_template_var("nagios_user")%>.headless.keytab!<%=scope.function_hdp_template_var("nagios_user")%>
@@ -439,7 +439,7 @@ define service {
 define service {
         hostgroup_name          oozie-server
         use                     hadoop-service
-        service_description     OOZIE::Oozie status check
+        service_description     OOZIE::Oozie Server status check
         servicegroups           OOZIE
         <%if scope.function_hdp_template_var("security_enabled")-%>
         check_command           check_oozie_status!11000!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("keytab_path")%>/<%=scope.function_hdp_template_var("nagios_user")%>.headless.keytab!<%=scope.function_hdp_template_var("nagios_user")%>
@@ -456,7 +456,7 @@ define service {
 define service {
         hostgroup_name          webhcat-server
         use                     hadoop-service
-        service_description     WEBHCAT::WEBHCAT status check
+        service_description     WEBHCAT::WebHCat Server status check
         servicegroups           WEBHCAT 
         <%if scope.function_hdp_template_var("security_enabled")-%>
         check_command           check_templeton_status!50111!v1!true!<%=scope.function_hdp_template_var("keytab_path")%>/<%=scope.function_hdp_template_var("nagios_user")%>.headless.keytab!<%=scope.function_hdp_template_var("nagios_user")%>

Modified: incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
(original)
+++ incubator/ambari/trunk/ambari-server/src/main/java/org/apache/ambari/server/configuration/Configuration.java
Thu Feb  7 07:46:45 2013
@@ -109,6 +109,10 @@ public class Configuration {
   public static final String SRVR_HOSTS_MAPPING = 
       "server.hosts.mapping";
 
+  public static final String SSL_TRUSTSTORE_PATH_KEY = "ssl.trustStore.path";
+  public static final String SSL_TRUSTSTORE_PASSWORD_KEY = "ssl.trustStore.password";
+  public static final String SSL_TRUSTSTORE_TYPE_KEY = "ssl.trustStore.type";
+
   private static final String SRVR_KSTR_DIR_DEFAULT = ".";
   public static final String SRVR_CRT_NAME_DEFAULT = "ca.crt";
   public static final String SRVR_KEY_NAME_DEFAULT = "ca.key";
@@ -209,6 +213,23 @@ public class Configuration {
       }
     }
     configsMap.put(SRVR_CRT_PASS_KEY, randStr);
+
+    loadSSLParams();
+  }
+
+  /**
+   * Loads trusted certificates store properties
+   */
+  private void loadSSLParams(){
+    if (properties.getProperty(SSL_TRUSTSTORE_PATH_KEY) != null) {
+      System.setProperty("javax.net.ssl.trustStore", properties.getProperty(SSL_TRUSTSTORE_PATH_KEY));
+    }
+    if (properties.getProperty(SSL_TRUSTSTORE_PASSWORD_KEY) != null) {
+      System.setProperty("javax.net.ssl.trustStorePassword", properties.getProperty(SSL_TRUSTSTORE_PASSWORD_KEY));
+    }
+    if (properties.getProperty(SSL_TRUSTSTORE_TYPE_KEY) != null) {
+      System.setProperty("javax.net.ssl.trustStoreType", properties.getProperty(SSL_TRUSTSTORE_TYPE_KEY));
+    }
   }
 
 

Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh
(original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_hive_metastore_status.sh
Thu Feb  7 07:46:45 2013
@@ -25,8 +25,8 @@ PORT=$2
 HCAT_URL=-Dhive.metastore.uris="thrift://$HOST:$PORT"
 out=`hcat $HCAT_URL -e "show databases" 2>&1`
 if [[ "$?" -ne 0 ]]; then 
-  echo "CRITICAL: Error accessing hive-metaserver status [$out]";
+  echo "CRITICAL: Error accessing Hive Metastore status [$out]";
   exit 2;
 fi
-echo "OK: Hive metaserver status OK";
+echo "OK: Hive Metastore status OK";
 exit 0;

Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php
(original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_name_dir_status.php
Thu Feb  7 07:46:45 2013
@@ -36,12 +36,12 @@
   $json_array = json_decode($json_string, true);
   $object = $json_array['beans'][0];
   if ($object['NameDirStatuses'] == "") {
-    echo "UNKNOWN: Namenode directory status not available via http://<nn_host>:port/jmx
url" . "\n";
+    echo "UNKNOWN: NameNode directory status not available via http://<nn_host>:port/jmx
url" . "\n";
     exit(3);
   }
   $NameDirStatuses = json_decode($object['NameDirStatuses'], true);
   $failed_dir_count = count($NameDirStatuses['failed']);
-  $out_msg = "CRITICAL: Offline Namenode directories: ";
+  $out_msg = "CRITICAL: Offline NameNode directories: ";
   if ($failed_dir_count > 0) {
     foreach ($NameDirStatuses['failed'] as $key => $value) {
       $out_msg = $out_msg . $key . ":" . $value . ", ";
@@ -49,7 +49,7 @@
     echo $out_msg . "\n";
     exit (2);
   }
-  echo "OK: All Namenode directories are active" . "\n";
+  echo "OK: All NameNode directories are active" . "\n";
   exit(0);
 
   /* print usage */

Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh
(original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_oozie_status.sh
Thu Feb  7 07:46:45 2013
@@ -20,7 +20,7 @@
 #
 #
 # OOZIE_URL is of the form http://<hostname>:<port>/oozie
-# OOZIE_URL: http://hortonworks-sandbox.localdomain:11000/oozie
+# OOZIE_URL: http://host1.localdomain:11000/oozie
 HOST=$1
 PORT=$2
 JAVA_HOME=$3
@@ -28,8 +28,8 @@ OOZIE_URL="http://$HOST:$PORT/oozie"
 export JAVA_HOME=$JAVA_HOME
 out=`oozie admin -oozie ${OOZIE_URL} -status 2>&1`
 if [[ "$?" -ne 0 ]]; then 
-  echo "CRITICAL: Error accessing oozie server status [$out]";
+  echo "CRITICAL: Error accessing Oozie Server status [$out]";
   exit 2;
 fi
-echo "OK: Oozie server status [$out]";
+echo "OK: Oozie Server status [$out]";
 exit 0;

Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/check_webui.sh Thu Feb
 7 07:46:45 2013
@@ -45,14 +45,14 @@ jobtracker) 
 namenode)
     nnweburl="http://$host:50070"
     if [[ `checkurl "$nnweburl"` -ne 0 ]] ; then 
-      echo "WARNING: NameNode web UI not accessible : $nnweburl";
+      echo "WARNING: NameNode Web UI not accessible : $nnweburl";
       exit 1;
     fi
     ;;
 jobhistory)
     jhweburl="http://$host:51111/jobhistoryhome.jsp"
     if [[ `checkurl "$jhweburl"` -ne 0 ]]; then 
-      echo "WARNING: Jobhistory web UI not accessible : $jhweburl";
+      echo "WARNING: JobHistory Web UI not accessible : $jhweburl";
       exit 1;
     fi
     ;;
@@ -60,7 +60,7 @@ hbase)
     hbaseweburl="http://$host:60010/master-status"
     jhweburl="http://domU-12-31-39-16-DC-FB.compute-1.internal:51111/jobhistoryhome.jsp"
     if [[ `checkurl "$hbaseweburl"` -ne 0 ]]; then 
-      echo "WARNING: Hbase Master web UI not accessible : $hbaseweburl"; 
+      echo "WARNING: HBase Master Web UI not accessible : $hbaseweburl";
       exit 1;
     fi
     ;;

Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/plugins/sys_logger.py Thu Feb
 7 07:46:45 2013
@@ -21,35 +21,35 @@ severities = {'UP':'OK', 'DOWN':'Critica
               'WARNING':'Warning', 'UNKNOWN':'Warning', 'CRITICAL':'Critical'}
 
 # List of services which can result in events at the Degraded severity
-degraded_alert_services = ['HBASEMASTER::HBaseMaster CPU utilization',
-                           'HDFS::Namenode RPC Latency',
-                           'MAPREDUCE::JobTracker RPC Latency',
-                           'JOBTRACKER::Jobtracker CPU utilization']
+degraded_alert_services = ['HBASEMASTER::HBase Master CPU utilization',
+                           'HDFS::NameNode RPC latency',
+                           'MAPREDUCE::JobTracker RPC latency',
+                           'JOBTRACKER::JobTracker CPU utilization']
 
 # List of services which can result in events at the Fatal severity
-fatal_alert_services = ['NAMENODE::Namenode Process down']
+fatal_alert_services = ['NAMENODE::NameNode process down']
 
 # dictionary of service->msg_id mappings
-msg_ids = {'Host::Ping':'host_down', 'HBASEMASTER::HBaseMaster CPU utilization':'master_cpu_utilization',
-           'HDFS::HDFS Capacity utilization':'hdfs_percent_capacity', 'HDFS::Corrupt/Missing
blocks':'hdfs_block',
-           'NAMENODE::Namenode Edit logs directory status':'namenode_edit_log_write', 'HDFS::Percent
DataNodes down':'datanode_down',
-           'DATANODE::Process down':'datanode_process_down', 'HDFS::Percent DataNodes storage
full':'datanodes_percent_storage_full',
-           'NAMENODE::Namenode Process down':'namenode_process_down', 'HDFS::Namenode RPC
Latency':'namenode_rpc_latency',
-           'DATANODE::Storage full':'datanodes_storage_full', 'JOBTRACKER::Jobtracker Process
down':'jobtracker_process_down',
-           'MAPREDUCE::JobTracker RPC Latency':'jobtracker_rpc_latency', 'MAPREDUCE::Percent
TaskTrackers down':'tasktrackers_down',
-           'TASKTRACKER::Process down':'tasktracker_process_down', 'HBASEMASTER::HBaseMaster
Process down':'hbasemaster_process_down',
-           'REGIONSERVER::Process down':'regionserver_process_down', 'HBASE::Percent region
servers down':'regionservers_down',
-           'HIVE-METASTORE::HIVE-METASTORE status check':'hive_metastore_process_down', 'ZOOKEEPER::Percent
zookeeper servers down':'zookeepers_down',
-           'ZKSERVERS::ZKSERVERS Process down':'zookeeper_process_down', 'OOZIE::Oozie status
check':'oozie_down',
-           'TEMPLETON::Templeton status check':'templeton_down', 'PUPPET::Puppet agent down':'puppet_down',
-           'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', 'GANGLIA::Ganglia
[gmetad] Process down':'ganglia_process_down',
-           'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster':'ganglia_collector_process_down',
-           'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker':'ganglia_collector_process_down',
-           'GANGLIA::Ganglia collector [gmond] Process down alert for namenode':'ganglia_collector_process_down',
-           'GANGLIA::Ganglia collector [gmond] Process down alert for slaves':'ganglia_collector_process_down',
-           'NAMENODE::Secondary Namenode Process down':'secondary_namenode_process_down',
-           'JOBTRACKER::Jobtracker CPU utilization':'jobtracker_cpu_utilization',
-           'HBASEMASTER::HBase Web UI down':'hbase_ui_down', 'NAMENODE::Namenode Web UI down':'namenode_ui_down',
+msg_ids = {'Host::Ping':'host_down', 'HBASEMASTER::HBase Master CPU utilization':'master_cpu_utilization',
+           'HDFS::HDFS capacity utilization':'hdfs_percent_capacity', 'HDFS::Corrupt/Missing
blocks':'hdfs_block',
+           'NAMENODE::NameNode edit logs directory status':'namenode_edit_log_write', 'HDFS::Percent
DataNodes down':'datanode_down',
+           'DATANODE::DataNode process down':'datanode_process_down', 'HDFS::Percent DataNodes
storage full':'datanodes_percent_storage_full',
+           'NAMENODE::NameNode process down':'namenode_process_down', 'HDFS::NameNode RPC
latency':'namenode_rpc_latency',
+           'DATANODE::DataNode storage full':'datanodes_storage_full', 'JOBTRACKER::JobTracker
process down':'jobtracker_process_down',
+           'MAPREDUCE::JobTracker RPC latency':'jobtracker_rpc_latency', 'MAPREDUCE::Percent
TaskTrackers down':'tasktrackers_down',
+           'TASKTRACKER::TaskTracker process down':'tasktracker_process_down', 'HBASEMASTER::HBase
Master process down':'hbasemaster_process_down',
+           'REGIONSERVER::RegionServer process down':'regionserver_process_down', 'HBASE::Percent
RegionServers down':'regionservers_down',
+           'HIVE-METASTORE::Hive Metastore status check':'hive_metastore_process_down', 'ZOOKEEPER::Percent
ZooKeeper Servers down':'zookeepers_down',
+           'ZOOKEEPER::ZooKeeper Server process down':'zookeeper_process_down', 'OOZIE::Oozie
Server status check':'oozie_down',
+           'WEBHCAT::WebHCat Server status check':'templeton_down', 'PUPPET::Puppet agent
down':'puppet_down',
+           'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', 'GANGLIA::Ganglia
[gmetad] process down':'ganglia_process_down',
+           'GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master':'ganglia_collector_process_down',
+           'GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker':'ganglia_collector_process_down',
+           'GANGLIA::Ganglia Collector [gmond] process down alert for NameNode':'ganglia_collector_process_down',
+           'GANGLIA::Ganglia Collector [gmond] process down alert for slaves':'ganglia_collector_process_down',
+           'NAMENODE::Secondary NameNode process down':'secondary_namenode_process_down',
+           'JOBTRACKER::JobTracker CPU utilization':'jobtracker_cpu_utilization',
+           'HBASEMASTER::HBase Master Web UI down':'hbase_ui_down', 'NAMENODE::NameNode Web
UI down':'namenode_ui_down',
            'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down', 'JOBTRACKER::JobTracker
Web UI down':'jobtracker_ui_down'}
 
 

Modified: incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php
URL: http://svn.apache.org/viewvc/incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php?rev=1443336&r1=1443335&r2=1443336&view=diff
==============================================================================
--- incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php (original)
+++ incubator/ambari/trunk/contrib/addons/src/addOns/nagios/scripts/nagios_alerts.php Thu
Feb  7 07:46:45 2013
@@ -91,13 +91,13 @@ function hdp_mon_generate_response( $res
   define ("warn", "1");
   define ("critical", "2");
 
-  define ("HDFS_SERVICE_CHECK", "NAMENODE::Namenode Process down");
-  define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::Jobtracker Process down");
-  define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster Process down");
-  define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent zookeeper servers down");
-  define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::HIVE-METASTORE status check");
-  define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie status check");
-  define ("TEMPLETON_SERVICE_CHECK", "TEMPLETON::Templeton status check");
+  define ("HDFS_SERVICE_CHECK", "NAMENODE::NameNode process down");
+  define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::JobTracker process down");
+  define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster process down");
+  define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent ZooKeeper Servers down");
+  define ("HIVE_METASTORE_SERVICE_CHECK", "HIVE-METASTORE::Hive Metastore status check");
+  define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie Server status check");
+  define ("WEBHCAT_SERVICE_CHECK", "WEBHCAT::WebHCat Server status check");
   define ("PUPPET_SERVICE_CHECK", "PUPPET::Puppet agent down");
 
   /* If SUSE, status file is under /var/lib/nagios */
@@ -196,10 +196,10 @@ function hdp_mon_generate_response( $res
         }
         continue;
       }
-      if (getParameter($object, "service_description") == TEMPLETON_SERVICE_CHECK) {
-        $services_object["TEMPLETON"] = getParameter($object, "last_hard_state");
-        if ($services_object["TEMPLETON"] >= 1) {
-          $services_object["TEMPLETON"] = 1;
+      if (getParameter($object, "service_description") == WEBHCAT_SERVICE_CHECK) {
+        $services_object["WEBHCAT"] = getParameter($object, "last_hard_state");
+        if ($services_object["WEBHCAT"] >= 1) {
+          $services_object["WEBHCAT"] = 1;
         }
         continue;
       }
@@ -302,16 +302,7 @@ function hdp_mon_generate_response( $res
           $servicestatus['service_type'] = get_service_type($servicestatus['service_description']);
           $srv_desc = explode ("::",$servicestatus['service_description'],2);
 
-          switch ($srv_desc[0]) {
-            case "DATANODE":
-            case "TASKTRACKER":
-            case "REGIONSERVER":
-              $servicestatus['service_description'] = $srv_desc[0] . ' ' . $srv_desc[1];
-              break;
-            default:
-              $servicestatus['service_description'] = $srv_desc[1];
-          }
-            $servicestatus['service_description'] = format_description($servicestatus['service_description']);
+          $servicestatus['service_description'] = $srv_desc[1];
         }
         break;
       case "nok":
@@ -368,43 +359,6 @@ function hdp_mon_generate_response( $res
     return $services_objects;
   }
 
-  function format_description ($service_description)
-  {
-      $patterns[0] = "/tasktracker/i";
-      $patterns[1] = "/datanode/i";
-      $patterns[2] = "/namenode/i";
-      $patterns[3] = "/jobtracker/i";
-      $patterns[4] = "/hbaseMaster/i";
-      $patterns[5] = "/hive-metastore/i";
-      $patterns[6] = "/webhcat/i";
-      $patterns[7] = "/zookeeper/i";
-      $patterns[8] = "/zkserver/i";
-      $patterns[9] = "/oozie/i";
-      $patterns[10] = "/region server/i";
-      $patterns[11] = "/region/i";
-      $patterns[12] = "/server/i";
-      $patterns[13] = "/servers/i";
-
-      $replacements[0] = "TaskTracker";
-      $replacements[1] = "DataNode";
-      $replacements[2] = "NameNode";
-      $replacements[3] = "JobTracker";
-      $replacements[4] = "HBase Master";
-      $replacements[5] = "Hive Metastore";
-      $replacements[6] = "WebHCat Server";
-      $replacements[7] = "ZooKeeper";
-      $replacements[8] = "ZooKeeper Server";
-      $replacements[9] = "Oozie Server";
-      $replacements[10] = "RegionServer";
-      $replacements[11] = "Region";
-      $replacements[12] = "Server";
-      $replacements[13] = "Servers";
-
-      $result =  preg_replace($patterns, $replacements, $service_description);
-
-    return $result;
-  }
-
   function get_service_type($service_description)
   {
     $pieces = explode("::", $service_description);



Mime
View raw message