ambari-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jonathanhur...@apache.org
Subject ambari git commit: AMBARI-17127 - Ambari Stale Alert Triggers For Server-Side Performance Alert (jonathanhurley)
Date Thu, 09 Jun 2016 16:55:56 GMT
Repository: ambari
Updated Branches:
  refs/heads/trunk 4878a881b -> 4aa1639b4


AMBARI-17127 - Ambari Stale Alert Triggers For Server-Side Performance Alert (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/4aa1639b
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/4aa1639b
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/4aa1639b

Branch: refs/heads/trunk
Commit: 4aa1639b4892b0faeedb4b9df4c6c7ac2b20d299
Parents: 4878a88
Author: Jonathan Hurley <jhurley@hortonworks.com>
Authored: Wed Jun 8 14:20:00 2016 -0400
Committer: Jonathan Hurley <jhurley@hortonworks.com>
Committed: Thu Jun 9 12:55:01 2016 -0400

----------------------------------------------------------------------
 .../ambari/server/alerts/AlertRunnable.java     |  29 ++++
 .../alerts/AmbariPerformanceRunnable.java       |  29 ----
 .../server/alerts/StaleAlertRunnable.java       | 139 ++++++++++++++++---
 ambari-server/src/main/resources/alerts.json    |  13 +-
 .../server/alerts/StaleAlertRunnableTest.java   | 116 +++++++++++++++-
 5 files changed, 276 insertions(+), 50 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/4aa1639b/ambari-server/src/main/java/org/apache/ambari/server/alerts/AlertRunnable.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/alerts/AlertRunnable.java
b/ambari-server/src/main/java/org/apache/ambari/server/alerts/AlertRunnable.java
index 21ae9b0..ea583e4 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/alerts/AlertRunnable.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/alerts/AlertRunnable.java
@@ -29,6 +29,7 @@ import org.apache.ambari.server.orm.entities.AlertDefinitionEntity;
 import org.apache.ambari.server.state.Alert;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.Clusters;
+import org.apache.commons.lang.math.NumberUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -133,4 +134,32 @@ public abstract class AlertRunnable implements Runnable {
       LOG.error("Unable to run the {} alert", m_definitionName, exception);
     }
   }
+
+  /**
+   * Converts the given value to an integer safely.
+   *
+   * @param value
+   * @param defaultValue
+   * @return
+   */
+  int getThresholdValue(Object value, int defaultValue) {
+    if (null == value) {
+      return defaultValue;
+    }
+
+    if (value instanceof Number) {
+      return ((Number) value).intValue();
+    }
+
+    if (!(value instanceof String)) {
+      value = value.toString();
+    }
+
+    if (!NumberUtils.isNumber((String) value)) {
+      return defaultValue;
+    }
+
+    Number number = NumberUtils.createNumber((String) value);
+    return number.intValue();
+  }
 }

http://git-wip-us.apache.org/repos/asf/ambari/blob/4aa1639b/ambari-server/src/main/java/org/apache/ambari/server/alerts/AmbariPerformanceRunnable.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/alerts/AmbariPerformanceRunnable.java
b/ambari-server/src/main/java/org/apache/ambari/server/alerts/AmbariPerformanceRunnable.java
index 7ed2427..bbb583b 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/alerts/AmbariPerformanceRunnable.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/alerts/AmbariPerformanceRunnable.java
@@ -48,7 +48,6 @@ import org.apache.ambari.server.state.alert.ParameterizedSource.AlertParameter;
 import org.apache.ambari.server.state.alert.ServerSource;
 import org.apache.ambari.server.state.services.AmbariServerAlertService;
 import org.apache.commons.lang.StringUtils;
-import org.apache.commons.lang.math.NumberUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.springframework.security.core.context.SecurityContextHolder;
@@ -388,34 +387,6 @@ public class AmbariPerformanceRunnable extends AlertRunnable {
   }
 
   /**
-   * Converts the given value to an integer safely.
-   *
-   * @param value
-   * @param defaultValue
-   * @return
-   */
-  int getThresholdValue(Object value, int defaultValue) {
-    if (null == value) {
-      return defaultValue;
-    }
-
-    if (value instanceof Number) {
-      return ((Number) value).intValue();
-    }
-
-    if (!(value instanceof String)) {
-      value = value.toString();
-    }
-
-    if (!NumberUtils.isNumber((String) value)) {
-      return defaultValue;
-    }
-
-    Number number = NumberUtils.createNumber((String) value);
-    return number.intValue();
-  }
-
-  /**
    * The {@link PerformanceResult} class is used to wrap the result of a
    * {@link PerformanceArea}.
    */

http://git-wip-us.apache.org/repos/asf/ambari/blob/4aa1639b/ambari-server/src/main/java/org/apache/ambari/server/alerts/StaleAlertRunnable.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/java/org/apache/ambari/server/alerts/StaleAlertRunnable.java
b/ambari-server/src/main/java/org/apache/ambari/server/alerts/StaleAlertRunnable.java
index cf12bbf..7cb679b 100644
--- a/ambari-server/src/main/java/org/apache/ambari/server/alerts/StaleAlertRunnable.java
+++ b/ambari-server/src/main/java/org/apache/ambari/server/alerts/StaleAlertRunnable.java
@@ -17,6 +17,8 @@
  */
 package org.apache.ambari.server.alerts;
 
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
 import java.text.MessageFormat;
 import java.util.Collections;
 import java.util.HashMap;
@@ -33,9 +35,15 @@ import org.apache.ambari.server.state.Alert;
 import org.apache.ambari.server.state.AlertState;
 import org.apache.ambari.server.state.Cluster;
 import org.apache.ambari.server.state.MaintenanceState;
+import org.apache.ambari.server.state.alert.AlertDefinition;
+import org.apache.ambari.server.state.alert.AlertDefinitionFactory;
+import org.apache.ambari.server.state.alert.ParameterizedSource.AlertParameter;
+import org.apache.ambari.server.state.alert.ServerSource;
 import org.apache.ambari.server.state.alert.SourceType;
 import org.apache.ambari.server.state.services.AmbariServerAlertService;
 import org.apache.commons.lang.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.inject.Inject;
 
@@ -48,6 +56,11 @@ import com.google.inject.Inject;
  */
 public class StaleAlertRunnable extends AlertRunnable {
   /**
+   * Logger.
+   */
+  private final static Logger LOG = LoggerFactory.getLogger(StaleAlertRunnable.class);
+
+  /**
    * The message for the alert when all services have run in their designated
    * intervals.
    */
@@ -72,12 +85,33 @@ public class StaleAlertRunnable extends AlertRunnable {
   private static final int MINUTES_PER_HOUR = 60;
 
   /**
+   * The multiplier for the interval of the definition which is being checked
+   * for staleness. If this value is {@code 2}, then alerts are considered stale
+   * if they haven't run in more than 2x their interval value.
+   */
+  private static final int INTERVAL_WAIT_FACTOR_DEFAULT = 2;
+
+  /**
+   * A parameter which exposes the interval multipler to use for calculating
+   * staleness. If this does not exist, then
+   * {@link #INTERVAL_WAIT_FACTOR_DEFAULT} will be used.
+   */
+  private static final String STALE_INTERVAL_MULTIPLIER_PARAM_KEY = "stale.interval.multiplier";
+
+  /**
    * Used to get the current alerts and the last time they ran.
    */
   @Inject
   private AlertsDAO m_alertsDao;
 
   /**
+   * Used for converting {@link AlertDefinitionEntity} into
+   * {@link AlertDefinition} instances.
+   */
+  @Inject
+  private AlertDefinitionFactory m_definitionFactory;
+
+  /**
    * Constructor.
    *
    * @param definitionName
@@ -91,8 +125,17 @@ public class StaleAlertRunnable extends AlertRunnable {
    */
   @Override
   List<Alert> execute(Cluster cluster, AlertDefinitionEntity myDefinition) {
-    Set<String> staleAlerts = new TreeSet<String>();
-    Map<String, Set<String>> staleHostAlerts = new HashMap<>();
+    // get the multiplier
+    int waitFactor = getWaitFactorMultiplier(myDefinition);
+
+    // use the uptime of the Ambari Server as a way to determine if we need to
+    // give the alert more time to report in
+    RuntimeMXBean rb = ManagementFactory.getRuntimeMXBean();
+    long uptime = rb.getUptime();
+
+    int totalStaleAlerts = 0;
+    Set<String> staleAlertGroupings = new TreeSet<String>();
+    Map<String, Set<String>> staleAlertsByHost = new HashMap<>();
     Set<String> hostsWithStaleAlerts = new TreeSet<>();
 
     // get the cluster's current alerts
@@ -102,7 +145,8 @@ public class StaleAlertRunnable extends AlertRunnable {
     long now = System.currentTimeMillis();
 
     // for each current alert, check to see if the last time it ran is
-    // more than 2x its interval value (indicating it hasn't run)
+    // more than INTERVAL_WAIT_FACTOR * its interval value (indicating it hasn't
+    // run)
     for (AlertCurrentEntity current : currentAlerts) {
       AlertHistoryEntity history = current.getAlertHistory();
       AlertDefinitionEntity currentDefinition = history.getAlertDefinition();
@@ -130,37 +174,52 @@ public class StaleAlertRunnable extends AlertRunnable {
       // convert minutes to milliseconds for the definition's interval
       long intervalInMillis = currentDefinition.getScheduleInterval() * MINUTE_TO_MS_CONVERSION;
 
-      // if the last time it was run is >= 2x the interval, it's stale
+      // if the server hasn't been up long enough to consider this alert stale,
+      // then don't mark it stale - this is to protect against cases where
+      // Ambari was down for a while and after startup it hasn't received the
+      // alert because it has a longer interval than this stale alert check:
+      //
+      // Stale alert check - every 5 minutes
+      // Foo alert cehck - every 10 minutes
+      // Ambari down for 35 minutes for upgrade
+      if (uptime <= waitFactor * intervalInMillis) {
+        continue;
+      }
+
+      // if the last time it was run is >= INTERVAL_WAIT_FACTOR * the interval,
+      // it's stale
       long timeDifference = now - current.getLatestTimestamp();
-      if (timeDifference >= 2 * intervalInMillis) {
+      if (timeDifference >= waitFactor * intervalInMillis) {
+        // increase the count
+        totalStaleAlerts++;
 
         // it is technically possible to have a null/blank label; if so,
         // default to the name of the definition
         String label = currentDefinition.getLabel();
         if (StringUtils.isEmpty(label)) {
           label = currentDefinition.getDefinitionName();
-          }
+        }
 
         if (null != history.getHostName()) {
           // keep track of the host, if not null
           String hostName = history.getHostName();
           hostsWithStaleAlerts.add(hostName);
-          if (!staleHostAlerts.containsKey(hostName)) {
-            staleHostAlerts.put(hostName, new TreeSet<String>());
-            }
+          if (!staleAlertsByHost.containsKey(hostName)) {
+            staleAlertsByHost.put(hostName, new TreeSet<String>());
+          }
 
-          staleHostAlerts.get(hostName).add(MessageFormat.format(TIMED_LABEL_MSG, label,
+          staleAlertsByHost.get(hostName).add(MessageFormat.format(TIMED_LABEL_MSG, label,
               millisToHumanReadableStr(timeDifference)));
         } else {
           // non host alerts
-          staleAlerts.add(label);
-          }
+          staleAlertGroupings.add(label);
         }
+      }
     }
 
-    for (String host : staleHostAlerts.keySet()) {
-      staleAlerts.add(MessageFormat.format(HOST_LABEL_MSG, host,
-          StringUtils.join(staleHostAlerts.get(host), ",\n  ")));
+    for (String host : staleAlertsByHost.keySet()) {
+      staleAlertGroupings.add(MessageFormat.format(HOST_LABEL_MSG, host,
+          StringUtils.join(staleAlertsByHost.get(host), ",\n  ")));
     }
 
     AlertState alertState = AlertState.OK;
@@ -168,10 +227,10 @@ public class StaleAlertRunnable extends AlertRunnable {
 
     // if there are stale alerts, mark as CRITICAL with the list of
     // alerts
-    if (!staleAlerts.isEmpty()) {
+    if (!staleAlertGroupings.isEmpty()) {
       alertState = AlertState.CRITICAL;
-      alertText = MessageFormat.format(STALE_ALERTS_MSG, staleAlerts.size(),
-          hostsWithStaleAlerts.size(), StringUtils.join(staleAlerts, ",\n"));
+      alertText = MessageFormat.format(STALE_ALERTS_MSG, totalStaleAlerts,
+          hostsWithStaleAlerts.size(), StringUtils.join(staleAlertGroupings, ",\n"));
     }
 
     Alert alert = new Alert(myDefinition.getDefinitionName(), null, myDefinition.getServiceName(),
@@ -209,5 +268,49 @@ public class StaleAlertRunnable extends AlertRunnable {
     }
     return result.trim();
   }
+
+  /**
+   * Gets the wait factor multiplier off of the definition, returning
+   * {@link #INTERVAL_WAIT_FACTOR_DEFAULT} if not specified. This will look for
+   * {@link #STALE_INTERVAL_MULTIPLIER_PARAM_KEY} in the definition parameters.
+   * The value returned from this method will be guaranteed to be in the range
+   * of 2 to 10.
+   *
+   * @param entity
+   *          the definition to read
+   * @return the wait factor interval multiplier
+   */
+  private int getWaitFactorMultiplier(AlertDefinitionEntity entity) {
+    // start with the default
+    int waitFactor = INTERVAL_WAIT_FACTOR_DEFAULT;
+
+    // coerce the entity into a business object so that the list of parameters
+    // can be extracted and used for threshold calculation
+    try {
+      AlertDefinition definition = m_definitionFactory.coerce(entity);
+      ServerSource serverSource = (ServerSource) definition.getSource();
+      List<AlertParameter> parameters = serverSource.getParameters();
+      for (AlertParameter parameter : parameters) {
+        Object value = parameter.getValue();
+
+        if (StringUtils.equals(parameter.getName(), STALE_INTERVAL_MULTIPLIER_PARAM_KEY))
{
+          waitFactor = getThresholdValue(value, INTERVAL_WAIT_FACTOR_DEFAULT);
+        }
+      }
+
+      if (waitFactor < 2 || waitFactor > 10) {
+        LOG.warn(
+            "The interval multipler of {} is outside the valid range for {} and will be set
to 2",
+            waitFactor, entity.getLabel());
+
+        waitFactor = 2;
+      }
+    } catch (Exception exception) {
+      LOG.error("Unable to read the {} parameter for {}", STALE_INTERVAL_MULTIPLIER_PARAM_KEY,
+          StaleAlertRunnable.class.getSimpleName(), exception);
+    }
+
+    return waitFactor;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/4aa1639b/ambari-server/src/main/resources/alerts.json
----------------------------------------------------------------------
diff --git a/ambari-server/src/main/resources/alerts.json b/ambari-server/src/main/resources/alerts.json
index f55211f..9cffff5 100644
--- a/ambari-server/src/main/resources/alerts.json
+++ b/ambari-server/src/main/resources/alerts.json
@@ -25,7 +25,18 @@
         "enabled": true,
         "source": {
           "type": "SERVER",
-          "class": "org.apache.ambari.server.alerts.StaleAlertRunnable"
+          "class": "org.apache.ambari.server.alerts.StaleAlertRunnable",
+          "parameters": [
+            {
+              "name": "stale.interval.multiplier",
+              "display_name": "Interval Multiplier",
+              "value": 2,
+              "type": "NUMERIC",
+              "description": "The number of intervals which must pass before an an alert
is considered stale. This value is a multiplier for each alert's individual interval.",
+              "units": "Intervals",
+              "threshold": "CRITICAL"
+            }
+          ]
         }
       },
       {

http://git-wip-us.apache.org/repos/asf/ambari/blob/4aa1639b/ambari-server/src/test/java/org/apache/ambari/server/alerts/StaleAlertRunnableTest.java
----------------------------------------------------------------------
diff --git a/ambari-server/src/test/java/org/apache/ambari/server/alerts/StaleAlertRunnableTest.java
b/ambari-server/src/test/java/org/apache/ambari/server/alerts/StaleAlertRunnableTest.java
index 9a9d989..2111beb 100644
--- a/ambari-server/src/test/java/org/apache/ambari/server/alerts/StaleAlertRunnableTest.java
+++ b/ambari-server/src/test/java/org/apache/ambari/server/alerts/StaleAlertRunnableTest.java
@@ -22,8 +22,11 @@ import static junit.framework.Assert.assertEquals;
 import static org.easymock.EasyMock.createNiceMock;
 import static org.easymock.EasyMock.expect;
 import static org.easymock.EasyMock.replay;
+import static org.easymock.EasyMock.reset;
 import static org.easymock.EasyMock.verify;
 
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
 import java.lang.reflect.Field;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -52,6 +55,10 @@ import org.easymock.EasyMock;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.powermock.api.easymock.PowerMock;
+import org.powermock.core.classloader.annotations.PrepareForTest;
+import org.powermock.modules.junit4.PowerMockRunner;
 
 import com.google.common.eventbus.EventBus;
 import com.google.inject.Binder;
@@ -62,6 +69,8 @@ import com.google.inject.Module;
 /**
  * Tests {@link StaleAlertRunnableTest}.
  */
+@RunWith(PowerMockRunner.class)
+@PrepareForTest({ StaleAlertRunnable.class, ManagementFactory.class })
 public class StaleAlertRunnableTest {
 
   private final static long CLUSTER_ID = 1;
@@ -84,6 +93,7 @@ public class StaleAlertRunnableTest {
 
   private AlertEventPublisher m_eventPublisher;
   private EventBus m_synchronizedBus;
+  private RuntimeMXBean m_runtimeMXBean;
 
   /**
    *
@@ -113,12 +123,16 @@ public class StaleAlertRunnableTest {
     clusterMap.put(CLUSTER_NAME, m_cluster);
 
     // mock the definition for the alert
+    expect(m_definition.getDefinitionId()).andReturn(1L).atLeastOnce();
     expect(m_definition.getDefinitionName()).andReturn(DEFINITION_NAME).atLeastOnce();
     expect(m_definition.getServiceName()).andReturn(DEFINITION_SERVICE).atLeastOnce();
     expect(m_definition.getComponentName()).andReturn(DEFINITION_COMPONENT).atLeastOnce();
     expect(m_definition.getLabel()).andReturn(DEFINITION_LABEL).atLeastOnce();
     expect(m_definition.getEnabled()).andReturn(true).atLeastOnce();
     expect(m_definition.getScheduleInterval()).andReturn(DEFINITION_INTERVAL).atLeastOnce();
+    expect(m_definition.getClusterId()).andReturn(CLUSTER_ID).atLeastOnce();
+
+    expect(m_definition.getSource()).andReturn("{\"type\" : \"SERVER\"}").anyTimes();
 
     // mock the cluster
     expect(m_cluster.getClusterId()).andReturn(CLUSTER_ID).atLeastOnce();
@@ -134,8 +148,16 @@ public class StaleAlertRunnableTest {
     expect(m_alertsDao.findCurrentByCluster(CLUSTER_ID)).andReturn(
         m_currentAlerts).atLeastOnce();
 
+    // mock out the uptime to be a while (since most tests are not testing
+    // system uptime)
+    m_runtimeMXBean = EasyMock.createNiceMock(RuntimeMXBean.class);
+    PowerMock.mockStatic(ManagementFactory.class);
+    expect(ManagementFactory.getRuntimeMXBean()).andReturn(m_runtimeMXBean).atLeastOnce();
+    PowerMock.replay(ManagementFactory.class);
+    expect(m_runtimeMXBean.getUptime()).andReturn(360000L);
+
     replay(m_definition, m_cluster, m_clusters,
-        m_definitionDao, m_alertsDao);
+        m_definitionDao, m_alertsDao, m_runtimeMXBean);
     }
 
   /**
@@ -260,7 +282,7 @@ public class StaleAlertRunnableTest {
    */
   @Test
   public void testStaleAlertInMaintenaceMode() {
-    // create current alerts that are not stale
+    // create current alerts that are stale
     AlertDefinitionEntity definition = new AlertDefinitionEntity();
     definition.setClusterId(CLUSTER_ID);
     definition.setDefinitionName("foo-definition");
@@ -322,6 +344,96 @@ public class StaleAlertRunnableTest {
   }
 
   /**
+   * Tests that stale alerts are not reported if the server has not be running
+   * long enough.
+   */
+  @Test
+  public void testStaleAlertWithServerUptime() {
+    // reset the Runtime MX bean to a low value
+    reset(m_runtimeMXBean);
+    expect(m_runtimeMXBean.getUptime()).andReturn(1000L);
+    replay(m_runtimeMXBean);
+
+    // create current alerts that are stale (5 minute interval)
+    AlertDefinitionEntity definition = new AlertDefinitionEntity();
+    definition.setClusterId(CLUSTER_ID);
+    definition.setDefinitionName("foo-definition");
+    definition.setServiceName("HDFS");
+    definition.setComponentName("NAMENODE");
+    definition.setEnabled(true);
+    definition.setScheduleInterval(5);
+
+    // create current alerts that are stale
+    AlertCurrentEntity current1 = createNiceMock(AlertCurrentEntity.class);
+    AlertHistoryEntity history1 = createNiceMock(AlertHistoryEntity.class);
+
+    expect(current1.getAlertHistory()).andReturn(history1).atLeastOnce();
+    expect(history1.getAlertDefinition()).andReturn(definition).atLeastOnce();
+
+    // use a timestamp that would trigger the alert, say 3x the interval ago (so
+    // 15 minutes ago)
+    long now = System.currentTimeMillis();
+    long staleTime = now - (definition.getScheduleInterval() * 60 * 1000 * 3);
+
+    expect(current1.getMaintenanceState()).andReturn(MaintenanceState.OFF).atLeastOnce();
+    expect(current1.getLatestTimestamp()).andReturn(staleTime).atLeastOnce();
+
+    replay(current1, history1);
+
+    m_currentAlerts.add(current1);
+
+    // precondition that no events were fired
+    assertEquals(0, m_listener.getAlertEventReceivedCount(AlertReceivedEvent.class));
+
+    // instantiate and inject mocks
+    StaleAlertRunnable runnable = new StaleAlertRunnable(m_definition.getDefinitionName());
+    m_injector.injectMembers(runnable);
+
+    // run the alert
+    runnable.run();
+
+    // ensure that our mock MX bean was used
+    verify(m_runtimeMXBean);
+
+    // verify that our uptime was too short so nothing should have been
+    // triggered
+    assertEquals(1, m_listener.getAlertEventReceivedCount(AlertReceivedEvent.class));
+    List<AlertEvent> events = m_listener.getAlertEventInstances(AlertReceivedEvent.class);
+    assertEquals(1, events.size());
+
+    AlertReceivedEvent event = (AlertReceivedEvent) events.get(0);
+    Alert alert = event.getAlert();
+    assertEquals("AMBARI", alert.getService());
+    assertEquals("AMBARI_SERVER", alert.getComponent());
+    assertEquals(AlertState.OK, alert.getState());
+    assertEquals(DEFINITION_NAME, alert.getName());
+
+    // now reset the mocks to indicate that Ambari has been up long enough
+    m_listener.reset();
+    long uptime = definition.getScheduleInterval() * 60 * 1000 * 4;
+    reset(m_runtimeMXBean);
+    expect(m_runtimeMXBean.getUptime()).andReturn(uptime);
+    replay(m_runtimeMXBean);
+
+    // run the alert again and verify that the same stale alert caused a
+    // CRITICAL
+    runnable.run();
+
+    // recheck for the stale alert
+    events = m_listener.getAlertEventInstances(AlertReceivedEvent.class);
+    assertEquals(1, events.size());
+
+    event = (AlertReceivedEvent) events.get(0);
+    alert = event.getAlert();
+    assertEquals("AMBARI", alert.getService());
+    assertEquals("AMBARI_SERVER", alert.getComponent());
+    assertEquals(AlertState.CRITICAL, alert.getState());
+    assertEquals(DEFINITION_NAME, alert.getName());
+
+    assertEquals(1, m_listener.getAlertEventReceivedCount(AlertReceivedEvent.class));
+  }
+
+  /**
    *
    */
   private class MockModule implements Module {


Mime
View raw message