ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jonathanhur...@apache.org
Subject git commit: AMBARI-7928 - Alerts: Convert HBase, Falcon, Storm, Kafka, Knox Nagios Alerts Into Ambari (jonathanhurley)
Date Thu, 23 Oct 2014 17:18:03 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk efddf553f -> 7777302b7


AMBARI-7928 - Alerts: Convert HBase, Falcon, Storm, Kafka, Knox Nagios Alerts Into Ambari
(jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/7777302b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/7777302b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/7777302b

Branch: refs/heads/trunk
Commit: 7777302b7f53304bcaafb0c0035bd729eb61628b
Parents: efddf55
Author: Jonathan Hurley <jhurley@hortonworks.com>
Authored: Thu Oct 23 12:35:21 2014 -0400
Committer: Jonathan Hurley <jhurley@hortonworks.com>
Committed: Thu Oct 23 13:17:48 2014 -0400

----------------------------------------------------------------------
 .../HDP/2.0.6/services/GANGLIA/alerts.json      | 107 ++++++++++++
 .../stacks/HDP/2.0.6/services/HBASE/alerts.json |  85 +++++++++-
 .../stacks/HDP/2.0.6/services/HIVE/alerts.json  |  26 +++
 .../stacks/HDP/2.1/services/FALCON/alerts.json  |  51 ++++++
 .../stacks/HDP/2.1/services/STORM/alerts.json   | 164 +++++++++++++++++++
 .../stacks/HDP/2.2/services/KAFKA/alerts.json   |  26 +++
 .../stacks/HDP/2.2/services/KNOX/alerts.json    |  26 +++
 7 files changed, 484 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/GANGLIA/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/GANGLIA/alerts.json
b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/GANGLIA/alerts.json
new file mode 100644
index 0000000..9b115d5
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/GANGLIA/alerts.json
@@ -0,0 +1,107 @@
+{
+  "GANGLIA": {
+    "service": [],
+    "GANGLIA_SERVER": [
+      {
+        "name": "ganglia_server_process",
+        "label": "Ganglia Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8651",
+          "default_port": 8651,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_hdfs_namenode",
+        "label": "Ganglia NameNode Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8661",
+          "default_port": 8661,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_hbase_master",
+        "label": "Ganglia HBase Master Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8663",
+          "default_port": 8663,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_yarn_resourcemanager",
+        "label": "Ganglia ResourceManager Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8664",
+          "default_port": 8664,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_mapreduce_history_server",
+        "label": "Ganglia History Server Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8666",
+          "default_port": 8666,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HBASE/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HBASE/alerts.json
b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HBASE/alerts.json
index 80442a2..9846848 100644
--- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HBASE/alerts.json
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HBASE/alerts.json
@@ -1,6 +1,31 @@
 {
   "HBASE": {
-    "service": [],
+    "service": [
+      {
+        "name": "hbase_regionserver_process_percent",
+        "label": "Percent RegionServers Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "hbase_regionserver_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      }    
+    ],
     "HBASE_MASTER": [
       {
         "name": "hbase_master_process",
@@ -20,6 +45,64 @@
             }
           }        
         }
+      },
+      {
+        "name": "hbase_master_cpu",
+        "label": "HBase Maser CPU Utilization",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hbase-site/hbase.master.info.port}}",
+            "https": "{{hbase-site/hbase.master.info.port}}",
+            "https_property": "{{cluster-env/security_enabled}}",
+            "https_property_value": "true",
+            "default_port": 60010
+          },
+          "reporting": {
+            "ok": {
+              "text": "{1} CPU, load {0:.1%}"
+            },
+            "warning": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 200
+            },
+            "critical": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 250
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "java.lang:type=OperatingSystem/SystemCpuLoad",
+              "java.lang:type=OperatingSystem/AvailableProcessors"
+            ],
+            "value": "{0} * 100"
+          }
+        }
+      }
+    ],
+    "HBASE_REGIONSERVER": [
+      {
+        "name": "hbase_regionserver_process",
+        "label": "HBase RegionServer Process",
+        "interval": 1,
+        "scope": "HOST",
+        "source": {
+          "type": "PORT",
+          "uri": "{{hbase-site/hbase.regionserver.info.port}}",
+          "default_port": 60030,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
       }
     ]
   }

http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HIVE/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HIVE/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HIVE/alerts.json
new file mode 100644
index 0000000..c97775d
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/HIVE/alerts.json
@@ -0,0 +1,26 @@
+{
+  "HIVE": {
+    "service": [],
+    "HIVE_METASTORE": [
+      {
+        "name": "hive_metastore_process",
+        "label": "Hive Metastore Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{hive-site/hive.metastore.uris}}",
+          "default_port": 9083,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.1/services/FALCON/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1/services/FALCON/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.1/services/FALCON/alerts.json
new file mode 100644
index 0000000..c53230f
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1/services/FALCON/alerts.json
@@ -0,0 +1,51 @@
+{
+  "FALCON": {
+    "service": [],
+    "FALCON_SERVER": [
+      {
+        "name": "falcon_server_process",
+        "label": "Falcon Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{falcon-env/falcon_port}}",
+          "default_port": 15000,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "falcon_server_webui",
+        "label": "Falcon Server Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{falcon-env/falcon_port}}",
+            "default_port": 15000
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {3:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {3:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}:{2}"
+            }
+          }
+        }
+      }
+    ]
+  }
+}

http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.1/services/STORM/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.1/services/STORM/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.1/services/STORM/alerts.json
new file mode 100644
index 0000000..df4909b
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.1/services/STORM/alerts.json
@@ -0,0 +1,164 @@
+{
+  "STORM": {
+    "service": [
+      {
+        "name": "storm_supervisor_process_percent",
+        "label": "Percent Supervisors Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "storm_supervisor_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      }
+    ],
+    "STORM_UI_SERVER": [
+      {
+        "name": "storm_server_process",
+        "label": "Storm Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "{{storm-site/ui.port}}",
+          "default_port": 8744,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      },
+      {
+        "name": "storm_webui",
+        "label": "Storm Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{storm-site/ui.port}}"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {3:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {3:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}:{2}"
+            }
+          }
+        }
+      }      
+    ],
+    "NIMBUS": [
+      {
+        "name": "storm_nimbus_process",
+        "label": "Nimbus Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "{{storm-site/nimbus.thrift.port}}",
+          "default_port": 6627,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
+    ],
+    "DRPC_SERVER": [
+      {
+        "name": "storm_drpc_server",
+        "label": "DRPC Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "{{storm-site/drpc.port}}",
+          "default_port": 3772,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
+    ],
+    "STORM_REST_API": [
+      {
+        "name": "storm_rest_api",
+        "label": "Storm REST API",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8745",
+          "default_port": 8745,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
+    ],
+    "SUPERVISOR": [
+      {
+        "name": "storm_supervisor_process",
+        "label": "Supervisor Process",
+        "interval": 1,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "56431",
+          "default_port": 56431,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.2/services/KAFKA/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/KAFKA/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.2/services/KAFKA/alerts.json
new file mode 100644
index 0000000..a52feac
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/KAFKA/alerts.json
@@ -0,0 +1,26 @@
+{
+  "KAFKA": {
+    "service": [],
+    "KAFKA_BROKER": [
+      {
+        "name": "kafka_broker_process",
+        "label": "Kafka Broker Process",
+        "interval": 1,
+        "scope": "HOST",
+        "source": {
+          "type": "PORT",
+          "uri": "{{kafka-broker/port}}",
+          "default_port": 6667,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }        
+        }
+      }
+    ]
+  }
+}

http://git-wip-us.apache.org/repos/asf/ambari/blob/7777302b/ambari-server/src/main/resources/stacks/HDP/2.2/services/KNOX/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/stacks/HDP/2.2/services/KNOX/alerts.json b/ambari-server/src/main/resources/stacks/HDP/2.2/services/KNOX/alerts.json
new file mode 100644
index 0000000..3d2883e
--- /dev/null
+++ b/ambari-server/src/main/resources/stacks/HDP/2.2/services/KNOX/alerts.json
@@ -0,0 +1,26 @@
+{
+  "KNOX": {
+    "service": [],
+    "KNOX_GATEWAY": [
+      {
+        "name": "knox_gateway_process",
+        "label": "Know Gateway Process",
+        "interval": 1,
+        "scope": "HOST",
+        "source": {
+          "type": "PORT",
+          "uri": "{{gateway-site/gateway.port}}",
+          "default_port": 8443,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
+    ]
+  }
+}


Mime
View raw message