brooklyn-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rich...@apache.org
Subject [2/3] git commit: Fix + test HA Manager inferring failed nodes
Date Sat, 07 Jun 2014 19:55:17 GMT
Fix + test HA Manager inferring failed nodes


Project: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/commit/9bf833dd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/tree/9bf833dd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/diff/9bf833dd

Branch: refs/heads/master
Commit: 9bf833ddd5ed626f0d9a4326c747bc589f024cbd
Parents: c1aa990
Author: Aled Sage <aled.sage@gmail.com>
Authored: Fri Jun 6 08:20:30 2014 +0200
Committer: Richard Downer <richard.downer@cloudsoftcorp.com>
Committed: Sat Jun 7 21:43:41 2014 +0200

----------------------------------------------------------------------
 .../ha/HighAvailabilityManagerImpl.java         | 21 ++++++++--------
 .../ha/HighAvailabilityManagerTest.java         | 26 +++++++++++++++++++-
 2 files changed, 35 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9bf833dd/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java b/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
index e3bc6ae..891cb1a 100644
--- a/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
+++ b/core/src/main/java/brooklyn/management/ha/HighAvailabilityManagerImpl.java
@@ -426,12 +426,12 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager
{
     }
 
     /**
-     * @param replaceLocalNodeWithCurrentRecord - if true, the record for this mgmt node
will be replaced with the
+     * @param reportCleanedState - if true, the record for this mgmt node will be replaced
with the
      * actual current status known in this JVM (may be more recent than what is on disk);
      * normally there is no reason to care because data is persisted to disk immediately
      * after any significant change, but for fringe cases this is perhaps more accurate (perhaps
remove in time?)
      */
-    protected ManagementPlaneSyncRecord loadManagementPlaneSyncRecord(boolean replaceLocalNodeWithCurrentRecord)
{
+    protected ManagementPlaneSyncRecord loadManagementPlaneSyncRecord(boolean reportCleanedState)
{
         if (disabled) {
             // if HA is disabled, then we are the only node - no persistence; just load a
memento to describe this node
             Builder builder = ManagementPlaneSyncRecordImpl.builder()
@@ -448,17 +448,11 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager
{
             try {
                 ManagementPlaneSyncRecord result = persister.loadSyncRecord();
                 
-                // Detect AWOL nodes
-                result = ManagementPlaneSyncRecordImpl.builder()
-                        .masterNodeId(result.getMasterNodeId())
-                        .nodes(Iterables.transform(result.getManagementNodes().values(),
detectNodesGoneAwolFunction))
-                        .node(createManagementNodeSyncRecord())
-                        .build();
-                
-                if (replaceLocalNodeWithCurrentRecord) {
+                if (reportCleanedState) {
+                    // Report this  nodes most recent state, and detect AWOL nodes
                     Builder builder = ManagementPlaneSyncRecordImpl.builder()
                         .masterNodeId(result.getMasterNodeId())
-                        .nodes(result.getManagementNodes().values())
+                        .nodes(Iterables.transform(result.getManagementNodes().values(),
detectNodesGoneAwolFunction))
                         .node(createManagementNodeSyncRecord());
                     if (getNodeState() == ManagementNodeState.MASTER) {
                         builder.masterNodeId(ownNodeId);
@@ -494,11 +488,16 @@ public class HighAvailabilityManagerImpl implements HighAvailabilityManager
{
         return ticker.read();
     }
 
+    /**
+     * Infers the health of a node - if it last reported itself as healthy (standby or master),
but we haven't heard 
+     * from it in a long time then report that node as failed; otherwise report its health
as-is.
+     */
     private class DetectNodesGoneAwol implements Function<ManagementNodeSyncRecord, ManagementNodeSyncRecord>
{
         @Nullable
         @Override
         public ManagementNodeSyncRecord apply(@Nullable ManagementNodeSyncRecord input) {
             if (input == null) return null;
+            if (!(input.getStatus() == ManagementNodeState.STANDBY || input.getStatus() ==
ManagementNodeState.MASTER)) return input;
             if (isHeartbeatOk(input, currentTimeMillis())) return input;
             return BasicManagementNodeSyncRecord.builder()
                     .from(input)

http://git-wip-us.apache.org/repos/asf/incubator-brooklyn/blob/9bf833dd/core/src/test/java/brooklyn/management/ha/HighAvailabilityManagerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/brooklyn/management/ha/HighAvailabilityManagerTest.java b/core/src/test/java/brooklyn/management/ha/HighAvailabilityManagerTest.java
index 4373c75..45b6326 100644
--- a/core/src/test/java/brooklyn/management/ha/HighAvailabilityManagerTest.java
+++ b/core/src/test/java/brooklyn/management/ha/HighAvailabilityManagerTest.java
@@ -2,6 +2,7 @@ package brooklyn.management.ha;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
 import static org.testng.Assert.assertTrue;
 
 import java.util.List;
@@ -135,12 +136,35 @@ public class HighAvailabilityManagerTest {
         testGetManagementPlaneStatus();
     }
     
+    @Test
+    public void testGetManagementPlaneSyncStateInfersTimedOutNodeAsFailed() throws Exception
{
+        persister.delta(ManagementPlaneSyncRecordDeltaImpl.builder()
+                .node(newManagerMemento(ownNodeId, ManagementNodeState.STANDBY, currentTimeMillis()))
+                .node(newManagerMemento("node1", ManagementNodeState.MASTER, currentTimeMillis()))
+                .setMaster("node1")
+                .build());
+        
+        manager.start(HighAvailabilityMode.AUTO);
+        
+        ManagementPlaneSyncRecord state = manager.getManagementPlaneSyncState();
+        assertEquals(state.getManagementNodes().get("node1").getStatus(), ManagementNodeState.MASTER);
+        assertEquals(state.getManagementNodes().get(ownNodeId).getStatus(), ManagementNodeState.STANDBY);
+        
+        // Simulate passage of time; ticker used by this HA-manager so it will "correctly"
publish
+        // its own heartbeat with the new time; but node1's record is now out-of-date.
+        incrementClock(31, TimeUnit.SECONDS);
+        
+        ManagementPlaneSyncRecord state2 = manager.getManagementPlaneSyncState();
+        assertEquals(state2.getManagementNodes().get("node1").getStatus(), ManagementNodeState.FAILED);
+        assertNotEquals(state.getManagementNodes().get(ownNodeId).getStatus(), ManagementNodeState.FAILED);
+    }
+
     private long currentTimeMillis() {
         return ticker.read();
     }
     
     private long incrementClock(long increment, TimeUnit unit) {
-        currentTime.addAndGet(unit.toNanos(increment));
+        currentTime.addAndGet(unit.toMillis(increment));
         return currentTimeMillis();
     }
     


Mime
View raw message