incubator-ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yus...@apache.org
Subject git commit: AMBARI-2920. Rename alert titles and descriptions. (yusaku)"
Date Thu, 15 Aug 2013 22:22:19 GMT
Updated Branches:
  refs/heads/trunk d44c1c2ba -> a2b675c6a


AMBARI-2920. Rename alert titles and descriptions. (yusaku)"


Project: http://git-wip-us.apache.org/repos/asf/incubator-ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-ambari/commit/a2b675c6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-ambari/tree/a2b675c6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-ambari/diff/a2b675c6

Branch: refs/heads/trunk
Commit: a2b675c6ada585a830ad928f6aefe426acb9dc03
Parents: d44c1c2
Author: Yusaku Sako <yusaku@hortonworks.com>
Authored: Thu Aug 15 14:59:48 2013 -0700
Committer: Yusaku Sako <yusaku@hortonworks.com>
Committed: Thu Aug 15 15:22:02 2013 -0700

----------------------------------------------------------------------
 .../files/check_ambari_agent_status.sh          |  4 +-
 .../files/check_hive_metastore_status.sh        |  4 +-
 .../files/check_mapred_local_dir_used.sh        |  4 +-
 .../hdp-nagios/files/check_name_dir_status.php  |  6 +-
 .../files/check_nodemanager_health.sh           |  4 +-
 .../hdp-nagios/files/check_oozie_status.sh      |  4 +-
 .../hdp-nagios/files/check_templeton_status.sh  |  4 +-
 .../modules/hdp-nagios/files/check_webui.sh     | 10 +-
 .../templates/hadoop-services.cfg.erb           | 96 ++++++++++----------
 9 files changed, 68 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_ambari_agent_status.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_ambari_agent_status.sh
b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_ambari_agent_status.sh
index dd67496..a8b510a 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_ambari_agent_status.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_ambari_agent_status.sh
@@ -31,9 +31,9 @@ fi
 
 if [ $RES -eq "2" ]
 then
-  echo "OK: Ambari agent is running [PID:$AMBARI_AGENT_PID]";
+  echo "OK: Ambari Agent is running [PID:$AMBARI_AGENT_PID]";
   exit 0;
 else
-  echo "CRITICAL: Ambari agent is not running [$AMBARI_AGENT_PID_PATH not found]";
+  echo "CRITICAL: Ambari Agent is not running [$AMBARI_AGENT_PID_PATH not found]";
   exit 2;
 fi
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_hive_metastore_status.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_hive_metastore_status.sh
b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_hive_metastore_status.sh
index 0140958..640c077 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_hive_metastore_status.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_hive_metastore_status.sh
@@ -38,8 +38,8 @@ HCAT_URL=-Dhive.metastore.uris="thrift://$HOST:$PORT"
 export JAVA_HOME=$JAVA_HOME
 out=`hcat $HCAT_URL -e "show databases" 2>&1`
 if [[ "$?" -ne 0 ]]; then
-  echo "CRITICAL: Error accessing hive-metaserver status [$out]";
+  echo "CRITICAL: Error accessing Hive Metastore status [$out]";
   exit 2;
 fi
-echo "OK: Hive metaserver status OK";
+echo "OK: Hive Metastore status OK";
 exit 0;

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_mapred_local_dir_used.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_mapred_local_dir_used.sh
b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_mapred_local_dir_used.sh
index e91cb66..15c85eb 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_mapred_local_dir_used.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_mapred_local_dir_used.sh
@@ -26,9 +26,9 @@ for mapred_dir in $MAPRED_LOCAL_DIRS
 do
   percent=`df -hl $mapred_dir | awk '{percent=$5;} END{print percent}' | cut -d % -f 1`
   if [ $percent -ge $CRITICAL ]; then
-    echo "CRITICAL: Mapreduce local dir is full."
+    echo "CRITICAL: MapReduce local dir is full."
     exit 2
   fi
 done
-echo "OK: Mapreduce local dir space is available."
+echo "OK: MapReduce local dir space is available."
 exit 0

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_name_dir_status.php
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_name_dir_status.php
b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_name_dir_status.php
index 3f38c98..db2b491 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_name_dir_status.php
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_name_dir_status.php
@@ -36,12 +36,12 @@
   $json_array = json_decode($json_string, true);
   $object = $json_array['beans'][0];
   if ($object['NameDirStatuses'] == "") {
-    echo "WARNING: Namenode directory status not available via http://".$host.":".$port."/jmx
url" . "\n";
+    echo "WARNING: NameNode directory status not available via http://".$host.":".$port."/jmx
url" . "\n";
     exit(1);
   }
   $NameDirStatuses = json_decode($object['NameDirStatuses'], true);
   $failed_dir_count = count($NameDirStatuses['failed']);
-  $out_msg = "CRITICAL: Offline Namenode directories: ";
+  $out_msg = "CRITICAL: Offline NameNode directories: ";
   if ($failed_dir_count > 0) {
     foreach ($NameDirStatuses['failed'] as $key => $value) {
       $out_msg = $out_msg . $key . ":" . $value . ", ";
@@ -49,7 +49,7 @@
     echo $out_msg . "\n";
     exit (2);
   }
-  echo "OK: All Namenode directories are active" . "\n";
+  echo "OK: All NameNode directories are active" . "\n";
   exit(0);
 
   /* print usage */

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
index 2a26f4e..82b8a3d 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_nodemanager_health.sh
@@ -25,8 +25,8 @@ NODEMANAGER_URL="http://$HOST:$PORT/ws/v1/node/info"
 export PATH="/usr/bin:$PATH"
 RESPONSE=`curl -s $NODEMANAGER_URL`
 if [[ "$RESPONSE" == *'"nodeHealthy":true'* ]]; then 
-  echo "OK: nodemanager healthy true";
+  echo "OK: NodeManager healthy";
   exit 0;
 fi
-echo "CRITICAL: nodemanager healthy false";
+echo "CRITICAL: NodeManager unhealthy";
 exit 2;

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_oozie_status.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_oozie_status.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_oozie_status.sh
index bfd9d75..820ee99 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_oozie_status.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_oozie_status.sh
@@ -38,8 +38,8 @@ OOZIE_URL="http://$HOST:$PORT/oozie"
 export JAVA_HOME=$JAVA_HOME
 out=`oozie admin -oozie ${OOZIE_URL} -status 2>&1`
 if [[ "$?" -ne 0 ]]; then 
-  echo "CRITICAL: Error accessing oozie server status [$out]";
+  echo "CRITICAL: Error accessing Oozie Server status [$out]";
   exit 2;
 fi
-echo "OK: Oozie server status [$out]";
+echo "OK: Oozie Server status [$out]";
 exit 0;

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_templeton_status.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_templeton_status.sh
b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_templeton_status.sh
index 7190956..79424be 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_templeton_status.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_templeton_status.sh
@@ -37,8 +37,8 @@ fi
 regex="^.*\"status\":\"ok\".*<status_code:200>$"
 out=`curl --negotiate -u : -s -w '<status_code:%{http_code}>' http://$HOST:$PORT/templeton/$VERSION/status
2>&1`
 if [[ $out =~ $regex ]]; then 
-  echo "OK: Templeton server status [$out]";
+  echo "OK: WebHCat Server status [$out]";
   exit 0;
 fi
-echo "CRITICAL: Error accessing Templeton server, status [$out]";
+echo "CRITICAL: Error accessing WebHCat Server, status [$out]";
 exit 2;

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_webui.sh
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_webui.sh b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_webui.sh
index 57381e4..b23045e 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_webui.sh
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/files/check_webui.sh
@@ -46,35 +46,35 @@ jobtracker)
 namenode)
     nnweburl="http://$host:$port"
     if [[ `checkurl "$nnweburl"` -ne 0 ]] ; then 
-      echo "WARNING: NameNode web UI not accessible : $nnweburl";
+      echo "WARNING: NameNode Web UI not accessible : $nnweburl";
       exit 1;
     fi
     ;;
 jobhistory)
     jhweburl="http://$host:$port/jobhistoryhome.jsp"
     if [[ `checkurl "$jhweburl"` -ne 0 ]]; then 
-      echo "WARNING: Jobhistory web UI not accessible : $jhweburl";
+      echo "WARNING: HistoryServer Web UI not accessible : $jhweburl";
       exit 1;
     fi
     ;;
 hbase)
     hbaseweburl="http://$host:$port/master-status"
     if [[ `checkurl "$hbaseweburl"` -ne 0 ]]; then 
-      echo "WARNING: HBase Master web UI not accessible : $hbaseweburl"; 
+      echo "WARNING: HBase Master Web UI not accessible : $hbaseweburl";
       exit 1;
     fi
     ;;
 resourcemanager)
     rmweburl="http://$host:$port/cluster"
     if [[ `checkurl "$rmweburl"` -ne 0 ]]; then 
-      echo "WARNING: ResourceManager web UI not accessible : $rmweburl";
+      echo "WARNING: ResourceManager Web UI not accessible : $rmweburl";
       exit 1;
     fi
     ;;
 historyserver2)
     hsweburl="http://$host:$port/jobhistory"
     if [[ `checkurl "$hsweburl"` -ne 0 ]]; then 
-      echo "WARNING: HistoryServer2 web UI not accessible : $hsweburl";
+      echo "WARNING: HistoryServer Web UI not accessible : $hsweburl";
       exit 1;
     fi
     ;;

http://git-wip-us.apache.org/repos/asf/incubator-ambari/blob/a2b675c6/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
----------------------------------------------------------------------
diff --git a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
index d35b982..3256eb4 100644
--- a/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
+++ b/ambari-agent/src/main/puppet/modules/hdp-nagios/templates/hadoop-services.cfg.erb
@@ -33,7 +33,7 @@ define service {
 define service {        
         hostgroup_name          nagios-server        
         use                     hadoop-service
-        service_description     NAGIOS::Nagios status log staleness
+        service_description     NAGIOS::Nagios status log freshness
         servicegroups           NAGIOS
         check_command           check_nagios!10!/var/nagios/status.dat!<%=scope.function_hdp_template_var("::hdp-nagios::server::config::nagios_lookup_daemon_str")%>
         normal_check_interval   5
@@ -45,9 +45,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     HDFS::Percent DataNodes storage full
+        service_description     HDFS::Percent DataNodes with space available
         servicegroups           HDFS
-        check_command           check_aggregate!"DATANODE::DataNode storage full"!10%!30%
+        check_command           check_aggregate!"DATANODE::DataNode space"!10%!30%
         normal_check_interval   2
         retry_check_interval    1 
         max_check_attempts      1
@@ -56,9 +56,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     HDFS::Percent DataNodes down
+        service_description     HDFS::Percent DataNodes live
         servicegroups           HDFS
-        check_command           check_aggregate!"DATANODE::DataNode process down"!10%!30%
+        check_command           check_aggregate!"DATANODE::DataNode process"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -69,7 +69,7 @@ define service {
 define service {
         hostgroup_name          agent-servers
         use                     hadoop-service
-        service_description     AMBARI::Check ambari-agent process
+        service_description     AMBARI::Ambari Agent process
         servicegroups           AMBARI
         check_command           check_ambari_agent_status
         normal_check_interval   5
@@ -82,9 +82,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     ZOOKEEPER::Percent ZooKeeper Servers down
+        service_description     ZOOKEEPER::Percent ZooKeeper Servers live
         servicegroups           ZOOKEEPER
-        check_command           check_aggregate!"ZOOKEEPER::ZooKeeper Server process down"!35%!70%
+        check_command           check_aggregate!"ZOOKEEPER::ZooKeeper Server process"!35%!70%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -96,9 +96,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     HBASE::Percent RegionServers down
+        service_description     HBASE::Percent RegionServers live
         servicegroups           HBASE
-        check_command           check_aggregate!"REGIONSERVER::RegionServer process down"!10%!30%
+        check_command           check_aggregate!"REGIONSERVER::RegionServer process"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -113,7 +113,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia [gmetad] process down
+        service_description     GANGLIA::Ganglia Server process
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -124,7 +124,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
slaves
+        service_description     GANGLIA::Ganglia Monitor process for Slaves
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_slaves_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -135,7 +135,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
NameNode
+        service_description     GANGLIA::Ganglia Monitor process for NameNode
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_namenode_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -147,7 +147,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
JobTracker
+        service_description     GANGLIA::Ganglia Monitor process for JobTracker
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_jobtracker_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -160,7 +160,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
HBase Master
+        service_description     GANGLIA::Ganglia Monitor process for HBase Master
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_hbase_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -173,7 +173,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
ResourceManager
+        service_description     GANGLIA::Ganglia Monitor process for ResourceManager
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_rm_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -186,7 +186,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
NodeManager
+        service_description     GANGLIA::Ganglia Monitor process for NodeManager
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_nm_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -199,7 +199,7 @@ define service {
 define service {
         hostgroup_name          ganglia-server
         use                     hadoop-service
-        service_description     GANGLIA::Ganglia Collector [gmond] process down alert for
History Server 2
+        service_description     GANGLIA::Ganglia Monitor process for HistoryServer
         servicegroups           GANGLIA
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::ganglia_collector_hs_port")%>!-w
1 -c 1
         normal_check_interval   0.25
@@ -215,7 +215,7 @@ define service {
 define service {
         hostgroup_name          snamenode
         use                     hadoop-service
-        service_description     NAMENODE::Secondary NameNode process down
+        service_description     NAMENODE::Secondary NameNode process
         servicegroups           HDFS
         check_command           check_tcp!<%=scope.function_hdp_template_var("snamenode_port")%>!-w
1 -c 1
         normal_check_interval   0.5
@@ -228,7 +228,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     NAMENODE::NameNode Web UI down
+        service_description     NAMENODE::NameNode Web UI
         servicegroups           HDFS
         check_command           check_webui!namenode!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>
         normal_check_interval   1
@@ -262,7 +262,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     NAMENODE::NameNode process down
+        service_description     NAMENODE::NameNode process
         servicegroups           HDFS
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::namenode_metadata_port")%>!-w
1 -c 1
         normal_check_interval   0.5
@@ -273,7 +273,7 @@ define service {
 define service {
         hostgroup_name          namenode
         use                     hadoop-service
-        service_description     HDFS::Corrupt/Missing blocks
+        service_description     HDFS::Blocks health
         servicegroups           HDFS
         check_command           check_hdfs_blocks!<%=scope.function_hdp_template_var("::hdp::namenode_port")%>!0%!0%
         normal_check_interval   2
@@ -309,7 +309,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
-        service_description     JOBTRACKER::JobTracker Web UI down
+        service_description     JOBTRACKER::JobTracker Web UI
         servicegroups           MAPREDUCE
         check_command           check_webui!jobtracker!<%=scope.function_hdp_template_var("::hdp::jtnode_port")%>
         normal_check_interval   1
@@ -320,7 +320,7 @@ define service {
 define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
-        service_description     JOBTRACKER::JobHistory Web UI down
+        service_description     JOBTRACKER::HistoryServer Web UI
         servicegroups           MAPREDUCE
         check_command           check_webui!jobhistory!<%=scope.function_hdp_template_var("::hdp::jobhistory_port")%>
         normal_check_interval   1
@@ -344,7 +344,7 @@ define service {
         hostgroup_name          jobtracker
         use                     hadoop-service
         use                     hadoop-service
-        service_description     JOBTRACKER::JobTracker process down
+        service_description     JOBTRACKER::JobTracker process
         servicegroups           MAPREDUCE
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::jtnode_port")%>!-w
1 -c 1
         normal_check_interval   0.5
@@ -369,9 +369,9 @@ define service {
 define service {
         hostgroup_name          nagios-server
         use                     hadoop-service
-        service_description     MAPREDUCE::Percent TaskTrackers down
+        service_description     MAPREDUCE::Percent TaskTrackers live
         servicegroups           MAPREDUCE
-        check_command           check_aggregate!"TASKTRACKER::TaskTracker process down"!10%!30%
+        check_command           check_aggregate!"TASKTRACKER::TaskTracker process"!10%!30%
         normal_check_interval   0.5
         retry_check_interval    0.25
         max_check_attempts      3
@@ -381,7 +381,7 @@ define service {
 define service {
         hostgroup_name          tasktracker-servers
         use                     hadoop-service
-        service_description     TASKTRACKER::TaskTracker process down
+        service_description     TASKTRACKER::TaskTracker process
         servicegroups           MAPREDUCE
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::tasktracker_port")%>!-w
1 -c 1
         normal_check_interval   1
@@ -393,7 +393,7 @@ define service {
 define service {
         hostgroup_name          tasktracker-servers
         use                     hadoop-service
-        service_description     TASKTRACKER::Mapreduce local dir used space
+        service_description     TASKTRACKER::MapReduce local dir space
         servicegroups           MAPREDUCE
         check_command           check_mapred_local_dir_used_space!<%=scope.function_hdp_default("::hdp::mapred-site/mapred.local.dir")%>!85%
         normal_check_interval   0.5
@@ -409,7 +409,7 @@ define service {
 define service {
         hostgroup_name          resourcemanager
         use                     hadoop-service
-        service_description     RESOURCEMANAGER::ResourceManager Web UI down
+        service_description     RESOURCEMANAGER::ResourceManager Web UI
         servicegroups           YARN
         check_command           check_webui!resourcemanager!<%=scope.function_hdp_template_var("::hdp::rm_port")%>
         normal_check_interval   1
@@ -442,7 +442,7 @@ define service {
 define service {
         hostgroup_name          resourcemanager
         use                     hadoop-service
-        service_description     RESOURCEMANAGER::Percent NodeManager down
+        service_description     RESOURCEMANAGER::Percent NodeManager live
         servicegroups           YARN
         check_command           check_resourcemanager_nodes_percentage!<%=scope.function_hdp_template_var("::hdp::rm_port")%>!lost!10!30
         normal_check_interval   1
@@ -453,7 +453,7 @@ define service {
 define service {
         hostgroup_name          resourcemanager
         use                     hadoop-service
-        service_description     RESOURCEMANAGER::Percent NodeManager unhealthy
+        service_description     RESOURCEMANAGER::Percent NodeManager healthy
         servicegroups           YARN
         check_command           check_resourcemanager_nodes_percentage!<%=scope.function_hdp_template_var("::hdp::rm_port")%>!unhealthy!10!30
         normal_check_interval   1
@@ -467,7 +467,7 @@ define service {
 define service {
         hostgroup_name          nodemanagers
         use                     hadoop-service
-        service_description     NODEMANAGER::NodeManager process down
+        service_description     NODEMANAGER::NodeManager process
         servicegroups           YARN
         check_command           check_tcp!<%=scope.function_hdp_template_var("nm_port")%>!-w
1 -c 1
         normal_check_interval   1
@@ -478,7 +478,7 @@ define service {
 define service {
         hostgroup_name          nodemanagers
         use                     hadoop-service
-        service_description     NODEMANAGER::NodeManager unhealthy
+        service_description     NODEMANAGER::NodeManager health
         servicegroups           YARN
         check_command           check_nodemanager_health!<%=scope.function_hdp_template_var("nm_port")%>
         normal_check_interval   1
@@ -492,7 +492,7 @@ define service {
 define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
-        service_description     JOBHISTORY::History Server 2 Web UI down
+        service_description     JOBHISTORY::HistoryServer Web UI
         servicegroups           MAPREDUCE
         check_command           check_webui!historyserver2!<%=scope.function_hdp_template_var("::hdp::hs_port")%>
         normal_check_interval   1
@@ -503,7 +503,7 @@ define service {
 define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
-        service_description     JOBHISTORY::History Server 2 CPU utilization
+        service_description     JOBHISTORY::HistoryServer CPU utilization
         servicegroups           MAPREDUCE
         check_command           check_cpu!200%!250%
         normal_check_interval   5
@@ -514,7 +514,7 @@ define service {
 define service {
         hostgroup_name          historyserver2
         use                     hadoop-service
-        service_description     JOBHISTORY::History Server 2 RPC latency
+        service_description     JOBHISTORY::HistoryServer RPC latency
         servicegroups           MAPREDUCE
         check_command           check_rpcq_latency!JobHistoryServer!<%=scope.function_hdp_template_var("::hdp::hs_port")%>!3000!5000
         normal_check_interval   5
@@ -529,7 +529,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     hadoop-service
-        service_description     DATANODE::DataNode process down
+        service_description     DATANODE::DataNode process
         servicegroups           HDFS
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::datanode_port")%>!-w
1 -c 1
         normal_check_interval   1
@@ -540,7 +540,7 @@ define service {
 define service {
         hostgroup_name          slaves
         use                     hadoop-service
-        service_description     DATANODE::DataNode storage full
+        service_description     DATANODE::DataNode space
         servicegroups           HDFS
         check_command           check_datanode_storage!<%=scope.function_hdp_template_var("::hdp::datanode_port")%>!90%!90%
         normal_check_interval   5
@@ -555,7 +555,7 @@ define service {
 define service {
         hostgroup_name          flume-servers
         use                     hadoop-service
-        service_description     FLUME::Flume Agent process down
+        service_description     FLUME::Flume Agent process
         servicegroups           FLUME
         check_command           check_tcp!<%=scope.function_hdp_template_var("flume_port")%>!-w
1 -c 1
         normal_check_interval   1
@@ -570,7 +570,7 @@ define service {
 define service {
         hostgroup_name          zookeeper-servers
         use                     hadoop-service
-        service_description     ZOOKEEPER::ZooKeeper Server process down
+        service_description     ZOOKEEPER::ZooKeeper Server process
         servicegroups           ZOOKEEPER
         check_command           check_tcp!<%=scope.function_hdp_template_var("::clientPort")%>!-w
1 -c 1
         normal_check_interval   1
@@ -584,7 +584,7 @@ define service {
 define service {
         hostgroup_name          region-servers
         use                     hadoop-service
-        service_description     REGIONSERVER::RegionServer process down
+        service_description     REGIONSERVER::RegionServer process
         servicegroups           HBASE
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::hbase_rs_port")%>!-w
1 -c 1
         normal_check_interval   1
@@ -596,7 +596,7 @@ define service {
 define service {
         hostgroup_name          hbasemasters
         use                     hadoop-service
-        service_description     HBASEMASTER::HBase Master Web UI down
+        service_description     HBASEMASTER::HBase Master Web UI
         servicegroups           HBASE
         check_command           check_webui!hbase!<%=scope.function_hdp_template_var("::hdp::hbase_master_port")%>
         normal_check_interval   1
@@ -618,7 +618,7 @@ define service {
 define service {
         hostgroup_name          hbasemasters
         use                     hadoop-service
-        service_description     HBASEMASTER::HBase Master process down
+        service_description     HBASEMASTER::HBase Master process
         servicegroups           HBASE
         check_command           check_tcp!<%=scope.function_hdp_template_var("::hdp::hbase_master_port")%>!-w
1 -c 1
         normal_check_interval   0.5
@@ -632,7 +632,7 @@ define service {
 define service {
         hostgroup_name          hiveserver
         use                     hadoop-service
-        service_description     HIVE-METASTORE::Hive Metastore status check
+        service_description     HIVE-METASTORE::Hive Metastore status
         servicegroups           HIVE-METASTORE
         <%if scope.function_hdp_template_var("security_enabled")-%>
         check_command           check_hive_metastore_status!<%=scope.function_hdp_template_var("::hive_metastore_port")%>!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
@@ -649,7 +649,7 @@ define service {
 define service {
         hostgroup_name          oozie-server
         use                     hadoop-service
-        service_description     OOZIE::Oozie Server status check
+        service_description     OOZIE::Oozie Server status
         servicegroups           OOZIE
         <%if scope.function_hdp_template_var("security_enabled")-%>
         check_command           check_oozie_status!<%=scope.function_hdp_template_var("::hdp::oozie_server_port")%>!<%=scope.function_hdp_template_var("java64_home")%>!true!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
@@ -666,7 +666,7 @@ define service {
 define service {
         hostgroup_name          webhcat-server
         use                     hadoop-service
-        service_description     WEBHCAT::WebHCat Server status check
+        service_description     WEBHCAT::WebHCat Server status
         servicegroups           WEBHCAT 
         <%if scope.function_hdp_template_var("security_enabled")-%>
         check_command           check_templeton_status!<%=scope.function_hdp_template_var("::hdp::templeton_port")%>!v1!true!<%=scope.function_hdp_template_var("nagios_keytab_path")%>!<%=scope.function_hdp_template_var("nagios_principal_name")%>!<%=scope.function_hdp_template_var("kinit_path_local")%>
@@ -683,7 +683,7 @@ define service {
 define service {
         hostgroup_name          hue-server
         use                     hadoop-service
-        service_description     HUE::Hue Server status check
+        service_description     HUE::Hue Server status
         servicegroups           HUE
         check_command           check_hue_status
         normal_check_interval   100


Mime
View raw message