brooklyn-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From aleds...@apache.org
Subject [1/4] brooklyn-server git commit: BROOKLYN-325: alert if provisioning/termination aborted on rebind
Date Wed, 16 Nov 2016 20:46:15 GMT
Repository: brooklyn-server
Updated Branches:
  refs/heads/master f1d89ad97 -> be89a8dd2


BROOKLYN-325: alert if provisioning/termination aborted on rebind


Project: http://git-wip-us.apache.org/repos/asf/brooklyn-server/repo
Commit: http://git-wip-us.apache.org/repos/asf/brooklyn-server/commit/829ae6aa
Tree: http://git-wip-us.apache.org/repos/asf/brooklyn-server/tree/829ae6aa
Diff: http://git-wip-us.apache.org/repos/asf/brooklyn-server/diff/829ae6aa

Branch: refs/heads/master
Commit: 829ae6aab90217b6d8f702f8bcb1f27af71d5520
Parents: fbfe2fe
Author: Aled Sage <aled.sage@gmail.com>
Authored: Mon Nov 14 17:42:36 2016 +0000
Committer: Aled Sage <aled.sage@gmail.com>
Committed: Wed Nov 16 12:47:50 2016 +0000

----------------------------------------------------------------------
 .../entity/internal/AttributesInternal.java     | 12 +++-
 .../mgmt/rebind/BasicEntityRebindSupport.java   | 56 ++++++++++++++---
 .../MachineLifecycleEffectorTasks.java          | 13 +++-
 ...ftwareProcessRebindNotRunningEntityTest.java | 64 +++++++++++++++++---
 4 files changed, 122 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/829ae6aa/core/src/main/java/org/apache/brooklyn/core/entity/internal/AttributesInternal.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/brooklyn/core/entity/internal/AttributesInternal.java
b/core/src/main/java/org/apache/brooklyn/core/entity/internal/AttributesInternal.java
index 6051e05..7bccb4d 100644
--- a/core/src/main/java/org/apache/brooklyn/core/entity/internal/AttributesInternal.java
+++ b/core/src/main/java/org/apache/brooklyn/core/entity/internal/AttributesInternal.java
@@ -30,12 +30,18 @@ public interface AttributesInternal extends Attributes {
     public static final AttributeSensor<ProvisioningTaskState> INTERNAL_PROVISIONING_TASK_STATE
= new BasicAttributeSensor<ProvisioningTaskState>(
             TypeToken.of(ProvisioningTaskState.class), 
             "internal.provisioning.task.state",
-            "Internal transient sensor (do not use) for tracking the provisioning of a machine
(to better handle aborting/rebind)",
-            AttributeSensor.SensorPersistenceMode.NONE);
+            "Internal transient sensor (do not use) for tracking the provisioning of a machine
(to better handle aborting/rebind)");
     
+    @Beta
+    public static final AttributeSensor<ProvisioningTaskState> INTERNAL_TERMINATION_TASK_STATE
= new BasicAttributeSensor<ProvisioningTaskState>(
+            TypeToken.of(ProvisioningTaskState.class), 
+            "internal.termination.task.state",
+            "Internal transient sensor (do not use) for tracking the termination of a machine
(to better handle aborting/rebind)");
+
     /**
      * Used only internally by {@link org.apache.brooklyn.entity.software.base.lifecycle.MachineLifecycleEffectorTasks}
-     * to track provisioning, so machine can be terminated if stopped while opaque provision
call is being made.
+     * to track provisioning/termination. This is used so the machine can be terminated if
stopped while opaque provision
+     * call is being made; and is used to report if termination was prematurely aborted (e.g.
during Brooklyn restart).
      */
     @Beta
     @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/829ae6aa/core/src/main/java/org/apache/brooklyn/core/mgmt/rebind/BasicEntityRebindSupport.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/brooklyn/core/mgmt/rebind/BasicEntityRebindSupport.java
b/core/src/main/java/org/apache/brooklyn/core/mgmt/rebind/BasicEntityRebindSupport.java
index b85b768..f7b5cd8 100644
--- a/core/src/main/java/org/apache/brooklyn/core/mgmt/rebind/BasicEntityRebindSupport.java
+++ b/core/src/main/java/org/apache/brooklyn/core/mgmt/rebind/BasicEntityRebindSupport.java
@@ -26,6 +26,7 @@ import org.apache.brooklyn.api.effector.Effector;
 import org.apache.brooklyn.api.entity.Entity;
 import org.apache.brooklyn.api.entity.EntityLocal;
 import org.apache.brooklyn.api.location.Location;
+import org.apache.brooklyn.api.location.MachineLocation;
 import org.apache.brooklyn.api.mgmt.rebind.RebindContext;
 import org.apache.brooklyn.api.mgmt.rebind.mementos.EntityMemento;
 import org.apache.brooklyn.api.objs.BrooklynObjectType;
@@ -35,15 +36,18 @@ import org.apache.brooklyn.core.enricher.AbstractEnricher;
 import org.apache.brooklyn.core.entity.AbstractEntity;
 import org.apache.brooklyn.core.entity.Attributes;
 import org.apache.brooklyn.core.entity.EntityInternal;
+import org.apache.brooklyn.core.entity.internal.AttributesInternal;
+import org.apache.brooklyn.core.entity.internal.AttributesInternal.ProvisioningTaskState;
 import org.apache.brooklyn.core.entity.lifecycle.Lifecycle;
 import org.apache.brooklyn.core.entity.lifecycle.ServiceStateLogic;
 import org.apache.brooklyn.core.feed.AbstractFeed;
+import org.apache.brooklyn.core.location.Machines;
 import org.apache.brooklyn.core.mgmt.rebind.dto.MementosGenerators;
 import org.apache.brooklyn.core.objs.AbstractBrooklynObject;
 import org.apache.brooklyn.core.policy.AbstractPolicy;
 import org.apache.brooklyn.entity.group.AbstractGroupImpl;
-import org.apache.brooklyn.entity.stock.BasicApplication;
 import org.apache.brooklyn.util.exceptions.Exceptions;
+import org.apache.brooklyn.util.guava.Maybe;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -245,14 +249,52 @@ public class BasicEntityRebindSupport extends AbstractBrooklynObjectRebindSuppor
         Preconditions.checkState(instance == entity, "Expected %s and %s to match, but different
objects", instance, entity);
         Lifecycle expectedState = ServiceStateLogic.getExpectedState(entity);
         if (expectedState == Lifecycle.STARTING || expectedState == Lifecycle.STOPPING) {
-            LOG.warn("Entity {} goes on-fire because it was in state {} on rebind", entity,
expectedState);
-            LOG.warn("not-up-indicators={}", entity.getAttribute(Attributes.SERVICE_NOT_UP_INDICATORS));
-            ServiceStateLogic.setExpectedState(entity, Lifecycle.ON_FIRE);
+            // If we were previously "starting" or "stopping", then those tasks will have
been 
+            // aborted. We don't want to continue showing that state (e.g. the web-console
would
+            // then show the it as in-progress with the "spinning" icon).
+            // Therefore we set the entity as on-fire, and add the indicator that says why.
+            markTransitioningEntityOnFireOnRebind((EntityInternal) entity, expectedState);
+        }
+        
+        // Clear the provisioning/termination task-state; the task will have been aborted,
so wrong to keep this state.
+        ((EntityInternal)entity).sensors().remove(AttributesInternal.INTERNAL_PROVISIONING_TASK_STATE);
+        ((EntityInternal)entity).sensors().remove(AttributesInternal.INTERNAL_TERMINATION_TASK_STATE);
+        
+        super.instanceRebind(instance);
+    }
+    
+    protected void markTransitioningEntityOnFireOnRebind(EntityInternal entity, Lifecycle
expectedState) {
+        LOG.warn("Entity {} being marked as on-fire because it was in state {} on rebind;
indicators={}", new Object[] {entity, expectedState, entity.getAttribute(Attributes.SERVICE_NOT_UP_INDICATORS)});
+        ServiceStateLogic.setExpectedState(entity, Lifecycle.ON_FIRE);
+        ServiceStateLogic.ServiceNotUpLogic.updateNotUpIndicator(
+                entity, 
+                "Task aborted on rebind", 
+                "Set to on-fire (from previous expected state "+expectedState+") because
tasks aborted on rebind");
+        
+        // Check if we were in the process of provisioning a machine. If so, a VM might have
+        // been left behind. E.g. we might have submitted to jclouds the request to provision

+        // the VM, but not yet received back the id (so have lost details of the VM).
+        // Also see MachineLifecycleEffectorTasks.ProvisionMachineTask.
+        Maybe<MachineLocation> machine = Machines.findUniqueMachineLocation(entity.getLocations());
+        ProvisioningTaskState provisioningState = entity.sensors().get(AttributesInternal.INTERNAL_PROVISIONING_TASK_STATE);
+        if (machine.isAbsent() && provisioningState == ProvisioningTaskState.RUNNING)
{
+            LOG.warn("Entity {} was provisioning; VM may have been left running", entity);
             ServiceStateLogic.ServiceNotUpLogic.updateNotUpIndicator(
                     entity, 
-                    "Task aborted on rebind", 
-                    "Set to on-fire (from previous expected state "+expectedState+") because
tasks aborted on rebind");
+                    "VM may be lost on rebind", 
+                    "VM provisioning may have been in-progress and now lost, because tasks
aborted on rebind");
+        }
+
+        // Similar to the provisioning case, if we were terminating the VM then we may or
may 
+        // not have finished. This means the VM might have been left running.
+        // Also see MachineLifecycleEffectorTasks.stopAnyProvisionedMachines()
+        ProvisioningTaskState terminationState = entity.sensors().get(AttributesInternal.INTERNAL_TERMINATION_TASK_STATE);
+        if (machine.isAbsent() && terminationState == ProvisioningTaskState.RUNNING)
{
+            LOG.warn("Entity {} was terminating; VM may have been left running", entity);
+            ServiceStateLogic.ServiceNotUpLogic.updateNotUpIndicator(
+                    entity, 
+                    "VM may be lost on rebind", 
+                    "VM termination may have been in-progress and now lost, because tasks
aborted on rebind");
         }
-        super.instanceRebind(instance);
     }
 }

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/829ae6aa/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
----------------------------------------------------------------------
diff --git a/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
b/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
index 439770a..8748861 100644
--- a/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
+++ b/software/base/src/main/java/org/apache/brooklyn/entity/software/base/lifecycle/MachineLifecycleEffectorTasks.java
@@ -1001,10 +1001,17 @@ public abstract class MachineLifecycleEffectorTasks {
             log.debug("No decommissioning necessary for "+entity()+" - not a machine location
("+machine+")");
             return new StopMachineDetails<Integer>("No machine decommissioning necessary
- not a machine ("+machine+")", 0);
         }
-        
-        clearEntityLocationAttributes(machine);
-        provisioner.release((MachineLocation)machine);
 
+        entity().sensors().set(AttributesInternal.INTERNAL_TERMINATION_TASK_STATE, ProvisioningTaskState.RUNNING);
+        try {
+            clearEntityLocationAttributes(machine);
+            provisioner.release((MachineLocation)machine);
+        } finally {
+            // TODO On exception, should we add the machine back to the entity (because it
might not really be terminated)?
+            //      Do we need a better exception hierarchy for that?
+            entity().sensors().remove(AttributesInternal.INTERNAL_TERMINATION_TASK_STATE);
+        }
+        
         return new StopMachineDetails<Integer>("Decommissioned "+machine, 1);
     }
 

http://git-wip-us.apache.org/repos/asf/brooklyn-server/blob/829ae6aa/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
----------------------------------------------------------------------
diff --git a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
index e5e7709..89a177a 100644
--- a/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
+++ b/software/base/src/test/java/org/apache/brooklyn/entity/software/base/SoftwareProcessRebindNotRunningEntityTest.java
@@ -43,6 +43,7 @@ import org.apache.brooklyn.config.ConfigKey;
 import org.apache.brooklyn.core.config.ConfigKeys;
 import org.apache.brooklyn.core.entity.Attributes;
 import org.apache.brooklyn.core.entity.EntityAsserts;
+import org.apache.brooklyn.core.entity.internal.AttributesInternal;
 import org.apache.brooklyn.core.entity.lifecycle.Lifecycle;
 import org.apache.brooklyn.core.entity.trait.Startable;
 import org.apache.brooklyn.core.location.AbstractLocation;
@@ -75,11 +76,6 @@ import com.google.common.util.concurrent.MoreExecutors;
 
 public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixtureWithApp {
 
-    // TODO If we fail during provisioningLocation.obtain() or provisioningLocation.release(),
then we
-    // should tell the user that a VM might have started being provisioned but been forgotten
about; or
-    // that termination of the VM may or may not have completed.
-    // We could use the Attributes.SERVICE_NOT_UP_INDICATORS to achieve that.
-
     private static final Logger LOG = LoggerFactory.getLogger(SoftwareProcessRebindNotRunningEntityTest.class);
 
     private ListeningExecutorService executor;
@@ -243,6 +239,11 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
 
         assertMarkedAsOnfire(newEntity, Lifecycle.STARTING);
         assertMarkedAsOnfire(newApp, Lifecycle.STARTING);
+        assertMarkedAsVmLost(newEntity, Lifecycle.STARTING);
+
+        // Expect the marker to have been cleared on rebind (sensible because task is not
running).
+        EntityAsserts.assertAttributeEquals(newEntity, AttributesInternal.INTERNAL_PROVISIONING_TASK_STATE,
null);
+        EntityAsserts.assertAttributeEquals(newEntity, AttributesInternal.INTERNAL_TERMINATION_TASK_STATE,
null);
     }
 
     @Test
@@ -267,8 +268,14 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
 
         TestApplication newApp = rebind();
         final VanillaSoftwareProcess newEntity = (VanillaSoftwareProcess) Iterables.find(newApp.getChildren(),
Predicates.instanceOf(VanillaSoftwareProcess.class));
-
+        
+        
         assertMarkedAsOnfire(newEntity, Lifecycle.STOPPING);
+        assertMarkedAsVmLost(newEntity, Lifecycle.STOPPING);
+
+        // Expect the marker to have been cleared on rebind (sensible because task is not
running).
+        EntityAsserts.assertAttributeEquals(newEntity, AttributesInternal.INTERNAL_PROVISIONING_TASK_STATE,
null);
+        EntityAsserts.assertAttributeEquals(newEntity, AttributesInternal.INTERNAL_TERMINATION_TASK_STATE,
null);
     }
 
     @Test
@@ -299,7 +306,7 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
         assertNotMarkedOnfire(newEntity, Lifecycle.STARTING);
         assertNotMarkedOnfire(newApp, Lifecycle.STARTING);
     }
-
+    
     protected ListenableFuture<Void> startAsync(final Startable entity, final Collection<?
extends Location> locs) {
         return executor.submit(new Callable<Void>() {
             @Override public Void call() throws Exception {
@@ -327,15 +334,24 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
         return result;
     }
 
-    protected void assertMarkedAsOnfire(final Entity entity, final Lifecycle previousState)
throws Exception {
+    protected void assertMarkedAsOnfire(Entity entity, Lifecycle previousState) throws Exception
{
         EntityAsserts.assertAttributeEqualsEventually(entity, Attributes.SERVICE_STATE_ACTUAL,
Lifecycle.ON_FIRE);
         EntityAsserts.assertAttributeEqualsEventually(entity, Attributes.SERVICE_UP, false);
+        assertNotUpIndicatorIncludesEventually(entity, "Task aborted on rebind",
+                "Set to on-fire (from previous expected state "+previousState+") because
tasks aborted on rebind");
+    }
+
+    protected void assertMarkedAsVmLost(Entity entity, Lifecycle previousState) throws Exception
{
+        String expectedReason = "VM " + (previousState == Lifecycle.STARTING ? "provisioning"
: "termination") 
+                + " may have been in-progress and now lost, because tasks aborted on rebind";
+        assertNotUpIndicatorIncludesEventually(entity, "VM may be lost on rebind", expectedReason);
+    }
+
+    protected void assertNotUpIndicatorIncludesEventually(final Entity entity, final String
expectedKey, final String expectedVal) throws Exception {
         EntityAsserts.assertAttributeEventually(entity, Attributes.SERVICE_NOT_UP_INDICATORS,
new Predicate<Map<?,?>>() {
             @Override
             public boolean apply(Map<?, ?> input) {
                 if (input == null) return false;
-                String expectedKey = "Task aborted on rebind";
-                String expectedVal = "Set to on-fire (from previous expected state "+previousState+")
because tasks aborted on rebind";
                 for (Map.Entry<?, ?> entry : input.entrySet()) {
                     boolean keyMatches = expectedKey.equals(entry.getKey());
                     boolean valueMatches = expectedVal.equals(entry.getValue());
@@ -363,6 +379,8 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
                 new TypeToken<LocationSpec<SshMachineLocation>>() {},
                 "machineSpec");
 
+        protected List<CallInfo> callHistory = Collections.synchronizedList(Lists.<CallInfo>newArrayList());
+        
         @Override
         public MachineProvisioningLocation<SshMachineLocation> newSubLocation(Map<?,
?> newFlags) {
             throw new UnsupportedOperationException();
@@ -370,6 +388,8 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
         
         @Override
         public SshMachineLocation obtain(Map<?,?> flags) throws NoMachinesAvailableException
{
+            callHistory.add(new CallInfo("obtain", ImmutableList.of(flags)));
+            
             CountDownLatch calledLatch = config().get(OBTAIN_CALLED_LATCH);
             CountDownLatch blockedLatch = config().get(OBTAIN_BLOCKED_LATCH);
             LocationSpec<SshMachineLocation> machineSpec = config().get(MACHINE_SPEC);
@@ -385,6 +405,8 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
 
         @Override
         public void release(SshMachineLocation machine) {
+            callHistory.add(new CallInfo("release", ImmutableList.of(machine)));
+            
             CountDownLatch calledLatch = config().get(RELEASE_CALLED_LATCH);
             CountDownLatch blockedLatch = config().get(RELEASE_BLOCKED_LATCH);
             
@@ -400,5 +422,27 @@ public class SoftwareProcessRebindNotRunningEntityTest extends RebindTestFixture
         public Map getProvisioningFlags(Collection<String> tags) {
             return Collections.emptyMap();
         }
+        
+        public List<CallInfo> getCallHistory() {
+            synchronized (callHistory) {
+                return ImmutableList.copyOf(callHistory);
+            }
+        }
+        
+        public CallInfo getLastCall() {
+            synchronized (callHistory) {
+                return callHistory.get(callHistory.size()-1);
+            }
+        }
+        
+        static class CallInfo {
+            public final String name;
+            public final List<? extends Object> args;
+            
+            public CallInfo(String name, List<? extends Object> args) {
+                this.name = name;
+                this.args = args;
+            }
+        }
     }
 }


Mime
View raw message