Return-Path: X-Original-To: apmail-cloudstack-commits-archive@www.apache.org Delivered-To: apmail-cloudstack-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4438C1019A for ; Tue, 18 Jun 2013 00:24:48 +0000 (UTC) Received: (qmail 50621 invoked by uid 500); 18 Jun 2013 00:24:46 -0000 Delivered-To: apmail-cloudstack-commits-archive@cloudstack.apache.org Received: (qmail 50551 invoked by uid 500); 18 Jun 2013 00:24:46 -0000 Mailing-List: contact commits-help@cloudstack.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@cloudstack.apache.org Delivered-To: mailing list commits@cloudstack.apache.org Received: (qmail 50443 invoked by uid 99); 18 Jun 2013 00:24:46 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 18 Jun 2013 00:24:46 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 4A4128A68D3; Tue, 18 Jun 2013 00:24:46 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ahuang@apache.org To: commits@cloudstack.apache.org Date: Tue, 18 Jun 2013 00:24:52 -0000 Message-Id: <331e67dac4984ebfaad576fca9f6fc25@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [7/9] git commit: updated refs/heads/vmsync to 309f8da Added outcome Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/fa525a7d Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/fa525a7d Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/fa525a7d Branch: refs/heads/vmsync Commit: fa525a7d3862cac28ba12da37155d80de5d0e9a1 Parents: 8f00c19 Author: Alex Huang Authored: Mon Jun 17 11:40:14 2013 -0700 Committer: Alex Huang Committed: Mon Jun 17 17:25:01 2013 -0700 ---------------------------------------------------------------------- .../src/com/cloud/vm/VirtualMachineManager.java | 34 ++- .../com/cloud/vm/VirtualMachineManagerImpl.java | 240 +++++++++++-------- .../engine/vm/VMEntityManagerImpl.java | 8 +- .../jobs/JobCancellationException.java | 7 +- .../framework/jobs/impl/OutcomeImpl.java | 8 + .../lb/ElasticLoadBalancerManagerImpl.java | 2 +- .../lb/InternalLoadBalancerVMManagerImpl.java | 2 +- .../consoleproxy/ConsoleProxyManagerImpl.java | 4 +- .../cloud/ha/HighAvailabilityManagerImpl.java | 173 +++++++------ .../VirtualNetworkApplianceManagerImpl.java | 2 +- .../com/cloud/resource/ResourceManagerImpl.java | 2 +- .../storage/StoragePoolAutomationImpl.java | 20 +- .../secondary/SecondaryStorageManagerImpl.java | 4 +- .../src/com/cloud/user/AccountManagerImpl.java | 2 +- server/src/com/cloud/vm/UserVmManagerImpl.java | 4 +- .../vm/snapshot/VMSnapshotManagerImpl.java | 2 +- .../vm/VmWorkMockVirtualMachineManagerImpl.java | 15 +- 17 files changed, 312 insertions(+), 217 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java ---------------------------------------------------------------------- diff --git a/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java b/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java index f1ace6c..e1365e2 100644 --- a/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java +++ b/engine/components-api/src/com/cloud/vm/VirtualMachineManager.java @@ -20,6 +20,8 @@ import java.net.URI; import java.util.List; import java.util.Map; +import org.apache.cloudstack.framework.jobs.Outcome; + import com.cloud.agent.api.to.NicTO; import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.deploy.DeployDestination; @@ -88,11 +90,11 @@ public interface VirtualMachineManager extends Manager { HypervisorType hyperType, Account owner); - void start(String vmUuid, Map params); + void easyStart(String vmUuid, Map params); - void start(String vmUuid, Map params, DeploymentPlan planToDeploy); + void easyStart(String vmUuid, Map params, DeploymentPlan planToDeploy); - void stop(String vmUuid); + void easyStop(String vmUuid); void expunge(String vmUuid); @@ -100,8 +102,29 @@ public interface VirtualMachineManager extends Manager { boolean stateTransitTo(VMInstanceVO vm, VirtualMachine.Event e, Long hostId) throws NoTransitionException; - void advanceStart(String vmUuid, Map params, DeploymentPlan planToDeploy) - throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException, OperationTimedoutException; + /** + * Files a start job to start the virtual machine. The caller can use + * the Outcome object to wait for the result. The Outcome throws + * ExecutionException if there's a problem with the job execution. + * The cause of the ExecutionException carries the reason to why + * there is a problem. + * - ConcurrentOperationException: There are multiple operations happening on the same objects. + * - InsufficientCapacityException: Insufficient capacity to start a VM. The exception carries the cause. + * - ResourceUnavailableException: The resource needed to start a VM is not available. + * - OperationTimedoutException: The operation has been sent to the physical resource but we timed out waiting for results. + * + * Most callers should use this method to start VMs. Of the various + * possible exceptions, the worst is OperationTimedoutException. This + * indicates that the operation was sent to the physical resource but + * there was no response. Under these situations, we do not know if the + * operation succeeded or failed and require manual intervention. + * + * @param vmUuid uuid to the VM to start + * @param params parameters passed to be passed down + * @param planToDeploy plan on where to deploy the vm. + * @return Outcome to wait for the result. + */ + Outcome start(String vmUuid, Map params, DeploymentPlan planToDeploy); void advanceStop(String vmUuid, boolean cleanup) throws ResourceUnavailableException, OperationTimedoutException, ConcurrentOperationException; @@ -191,4 +214,5 @@ public interface VirtualMachineManager extends Manager { ManagementServerException, VirtualMachineMigrationException; NicTO toNicTO(NicProfile nic, HypervisorType hypervisorType); + } http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java ---------------------------------------------------------------------- diff --git a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java index 4f308fb..4dd0e3c 100755 --- a/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -31,6 +31,7 @@ import java.util.TimeZone; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import javax.ejb.Local; import javax.inject.Inject; @@ -40,14 +41,18 @@ import org.apache.log4j.Logger; import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao; import org.apache.cloudstack.config.ConfigDepot; +import org.apache.cloudstack.config.ConfigKey; import org.apache.cloudstack.config.ConfigValue; +import org.apache.cloudstack.config.Configurable; import org.apache.cloudstack.context.CallContext; -import org.apache.cloudstack.engine.config.Configs; +import org.apache.cloudstack.engine.service.api.OrchestrationService; import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; import org.apache.cloudstack.engine.subsystem.api.storage.StoragePoolAllocator; import org.apache.cloudstack.framework.jobs.AsyncJob; import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; import org.apache.cloudstack.framework.jobs.AsyncJobManager; +import org.apache.cloudstack.framework.jobs.Outcome; +import org.apache.cloudstack.framework.jobs.impl.OutcomeImpl; import org.apache.cloudstack.framework.messagebus.MessageBus; import org.apache.cloudstack.framework.messagebus.MessageDispatcher; import org.apache.cloudstack.framework.messagebus.MessageHandler; @@ -167,15 +172,45 @@ import com.cloud.vm.dao.VMInstanceDao; import com.cloud.vm.snapshot.VMSnapshotManager; @Local(value = VirtualMachineManager.class) -public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMachineManager, Listener { +public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMachineManager, Listener, Configurable { private static final Logger s_logger = Logger.getLogger(VirtualMachineManagerImpl.class); private static final String VM_SYNC_ALERT_SUBJECT = "VM state sync alert"; + protected static final ConfigKey StartRetry = new ConfigKey( + Integer.class, "start.retry", "Advanced", OrchestrationService.class, "10", "Number of times to retry create and start commands", true, null); + protected static final ConfigKey VmOpWaitInterval = new ConfigKey( + Long.class, "vm.op.wait.interval", "Advanced", OrchestrationService.class, "120", "Time (in seconds) to wait before checking if a previous operation has succeeded", + true, null); + protected static final ConfigKey VmOpLockStateRetry = new ConfigKey( + Integer.class, "vm.op.lock.state.retry", "Advanced", OrchestrationService.class, "5", "Times to retry locking the state of a VM for operations", + true, "-1 means try forever"); + protected static final ConfigKey VmOpCleanupInterval = new ConfigKey( + Long.class, "vm.op.cleanup.interval", "Advanced", OrchestrationService.class, "86400", "Interval to run the thread that cleans up the vm operations (in seconds)", + false, "Seconds"); + protected static final ConfigKey VmOpCleanupWait = new ConfigKey( + Long.class, "vm.op.cleanup.wait", "Advanced", OrchestrationService.class, "3600", "Time (in seconds) to wait before cleanuping up any vm work items", false, "Seconds"); + protected static final ConfigKey VmOpCancelInterval = new ConfigKey( + Integer.class, "vm.op.cancel.interval", "Advanced", OrchestrationService.class, "3600", "Time (in seconds) to wait before cancelling a operation", false, "Seconds"); + protected static final ConfigKey Wait = new ConfigKey( + Integer.class, "wait", "Advanced", OrchestrationService.class, "1800", "Time in seconds to wait for control commands to return", false, null); + protected static final ConfigKey VmDestroyForceStop = new ConfigKey( + Boolean.class, "vm.destroy.forcestop", "Advanced", OrchestrationService.class, "false", "On destroy, force-stop takes this value ", true, null); + + // New + protected static final ConfigKey VmJobCheckInterval = new ConfigKey( + Long.class, "vm.job.check.interval", "VM Orchestration", OrchestrationService.class, "3000", "Interval in milliseconds to check if the job is complete", true, + "Milliseconds"); + protected static final ConfigKey VmJobTimeout = new ConfigKey( + Long.class, "vm.job.timeout", "VM Orchestration", OrchestrationService.class, "600000", "Time in milliseconds to wait before attempting to cancel a job", true, + "Milliseconds"); + public static final ConfigKey PingInterval = new ConfigKey( + Long.class, "ping.interval", "Advanced", OrchestrationService.class, "60", "Ping interval in seconds", false, null); + @Inject protected EntityManager _entityMgr; @Inject - ConfigDepot _configRepo; + ConfigDepot _configDepot; @Inject DataStoreManager _dataStoreMgr; @Inject @@ -255,6 +290,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac protected ConfigValue _operationTimeout; protected ConfigValue _forceStop; protected ConfigValue _pingInterval; + protected ConfigValue _jobCheckInterval; + protected ConfigValue _jobTimeout; + protected long _nodeId; SearchBuilder RootVolumeSearch; @@ -437,16 +475,18 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac @Override public boolean configure(String name, Map xmlParams) throws ConfigurationException { - _retry = _configRepo.get(Configs.StartRetry); - - _cancelWait = _configRepo.get(Configs.VmOpCancelInterval); - _cleanupWait = _configRepo.get(Configs.VmOpCleanupWait); - _cleanupInterval = _configRepo.get(Configs.VmOpCleanupInterval).setMultiplier(1000); - _opWaitInterval = _configRepo.get(Configs.VmOpWaitInterval).setMultiplier(1000); - _lockStateRetry = _configRepo.get(Configs.VmOpLockStateRetry); - _operationTimeout = _configRepo.get(Configs.Wait).setMultiplier(2); - _forceStop = _configRepo.get(Configs.VmDestroyForcestop); - _pingInterval = _configRepo.get(Configs.PingInterval).setMultiplier(1000); + _retry = _configDepot.get(StartRetry); + + _pingInterval = _configDepot.get(PingInterval).setMultiplier(1000); + _cancelWait = _configDepot.get(VmOpCancelInterval); + _cleanupWait = _configDepot.get(VmOpCleanupWait); + _cleanupInterval = _configDepot.get(VmOpCleanupInterval).setMultiplier(1000); + _opWaitInterval = _configDepot.get(VmOpWaitInterval).setMultiplier(1000); + _lockStateRetry = _configDepot.get(VmOpLockStateRetry); + _operationTimeout = _configDepot.get(Wait).setMultiplier(2); + _forceStop = _configDepot.get(VmDestroyForceStop); + _jobCheckInterval = _configDepot.get(VmJobCheckInterval); + _jobTimeout = _configDepot.get(VmJobTimeout); ReservationContextImpl.setComponents(_entityMgr); VirtualMachineProfileImpl.setComponents(_entityMgr); @@ -474,20 +514,21 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } @Override - public void start(String vmUuid, Map params) { - start(vmUuid, params, null); + public void easyStart(String vmUuid, Map params) { + easyStart(vmUuid, params, null); } @Override - public void start(String vmUuid, Map params, DeploymentPlan planToDeploy) { + public void easyStart(String vmUuid, Map params, DeploymentPlan planToDeploy) { + Outcome outcome = start(vmUuid, params, planToDeploy); try { - advanceStart(vmUuid, params, planToDeploy); - } catch (ConcurrentOperationException e) { - throw new CloudRuntimeException(e).add(VirtualMachine.class, vmUuid); - } catch (InsufficientCapacityException e) { - throw new CloudRuntimeException(e).add(VirtualMachine.class, vmUuid); - } catch (ResourceUnavailableException e) { - throw new CloudRuntimeException(e).add(VirtualMachine.class, vmUuid); + outcome.get(_jobTimeout.value(), TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // FIXME: What to do + } catch (java.util.concurrent.ExecutionException e) { + // FIXME: What to do + } catch (TimeoutException e) { + // FIXME: What to do } } @@ -653,8 +694,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac @Override @DB - public void advanceStart(String vmUuid, Map params, DeploymentPlan planToDeploy) - throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException { + public Outcome start(String vmUuid, Map params, DeploymentPlan planToDeploy) { CallContext context = CallContext.current(); User callingUser = context.getCallingUser(); Account callingAccount = context.getCallingAccount(); @@ -663,77 +703,43 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac VmWorkJobVO workJob = null; Transaction txn = Transaction.currentTxn(); - try { - txn.start(); - - _vmDao.lockRow(vm.getId(), true); - - List pendingWorkJobs = _workJobDao.listPendingWorkJobs( - VirtualMachine.Type.Instance, vm.getId(), VmWorkJobDispatcher.Start); - - if(pendingWorkJobs != null && pendingWorkJobs.size() > 0) { - assert(pendingWorkJobs.size() == 1); - workJob = pendingWorkJobs.get(0); - } else { - workJob = new VmWorkJobVO(context.getContextId()); - - workJob.setDispatcher(VmWorkJobDispatcher.VM_WORK_JOB_DISPATCHER); - workJob.setCmd(VmWorkJobDispatcher.Start); - - workJob.setAccountId(callingAccount.getId()); - workJob.setUserId(callingUser.getId()); - workJob.setStep(VmWorkJobVO.Step.Starting); - workJob.setVmType(vm.getType()); - workJob.setVmInstanceId(vm.getId()); + txn.start(); - // save work context info (there are some duplications) - VmWorkStart workInfo = new VmWorkStart(); - workInfo.setAccountId(callingAccount.getId()); - workInfo.setUserId(callingUser.getId()); - workInfo.setVmId(vm.getId()); - workInfo.setPlan(planToDeploy); - workInfo.setParams(params); - workJob.setCmdInfo(VmWorkJobDispatcher.serialize(workInfo)); - - _jobMgr.submitAsyncJob(workJob, VmWorkJobDispatcher.VM_WORK_QUEUE, vm.getId()); - } - - txn.commit(); - } catch(Throwable e) { - s_logger.error("Unexpected exception", e); - throw new ConcurrentOperationException("Unhandled exception, converted to ConcurrentOperationException", e); + _vmDao.lockRow(vm.getId(), true); + + List pendingWorkJobs = _workJobDao.listPendingWorkJobs(VirtualMachine.Type.Instance, vm.getId(), VmWorkJobDispatcher.Start); + + if (pendingWorkJobs != null && pendingWorkJobs.size() > 0) { + assert (pendingWorkJobs.size() == 1); + workJob = pendingWorkJobs.get(0); + } else { + workJob = new VmWorkJobVO(context.getContextId()); + + workJob.setDispatcher(VmWorkJobDispatcher.VM_WORK_JOB_DISPATCHER); + workJob.setCmd(VmWorkJobDispatcher.Start); + + workJob.setAccountId(callingAccount.getId()); + workJob.setUserId(callingUser.getId()); + workJob.setStep(VmWorkJobVO.Step.Starting); + workJob.setVmType(vm.getType()); + workJob.setVmInstanceId(vm.getId()); + + // save work context info (there are some duplications) + VmWorkStart workInfo = new VmWorkStart(); + workInfo.setAccountId(callingAccount.getId()); + workInfo.setUserId(callingUser.getId()); + workInfo.setVmId(vm.getId()); + workInfo.setPlan(planToDeploy); + workInfo.setParams(params); + workJob.setCmdInfo(VmWorkJobDispatcher.serialize(workInfo)); + + _jobMgr.submitAsyncJob(workJob, VmWorkJobDispatcher.VM_WORK_QUEUE, vm.getId()); } + txn.commit(); final long jobId = workJob.getId(); - AsyncJobExecutionContext.getCurrentExecutionContext().joinJob(jobId); - - // - // TODO : this will be replaced with fully-asynchronous way later so that we don't need - // to wait here. The reason we do synchronous-wait here is that callers of advanceStart is expecting - // synchronous semantics - // - // - _jobMgr.waitAndCheck( - new String[] {Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE}, - 3000L, 600000L, new Predicate() { - - @Override - public boolean checkCondition() { - VMInstanceVO instance = _vmDao.findById(vm.getId()); - if(instance.getPowerState() == VirtualMachine.PowerState.PowerOn) - return true; - - VmWorkJobVO workJob = _workJobDao.findById(jobId); - if(workJob.getStatus() != JobInfo.Status.IN_PROGRESS) - return true; - - return false; - } - }); - AsyncJobExecutionContext.getCurrentExecutionContext().disjoinJob(jobId); - - return; + return new VmOutcome(workJob, VirtualMachine.PowerState.PowerOn); } private Pair findDestination(VirtualMachineProfileImpl profile, DeploymentPlan planRequested, boolean reuseVolume, @@ -988,7 +994,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } @Override - public void stop(String vmUuid) { + public void easyStop(String vmUuid) { try { advanceStop(vmUuid, false); } catch (OperationTimedoutException e) { @@ -1169,8 +1175,6 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac public void orchestrateStop(String vmUuid, boolean forced) throws AgentUnavailableException, OperationTimedoutException, ConcurrentOperationException { CallContext context = CallContext.current(); - User user = context.getCallingUser(); - Account account = context.getCallingAccount(); VmWorkJobVO work = _workJobDao.findById(AsyncJobExecutionContext.getCurrentExecutionContext().getJob().getId()); @@ -2730,7 +2734,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } try { -/* +/* lock.addRef(); List instances = _vmDao.findVMInTransition(new Date(new Date().getTime() - (_operationTimeout.value() * 1000)), State.Starting, State.Stopping); for (VMInstanceVO instance : instances) { @@ -2741,7 +2745,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac _haMgr.scheduleRestart(instance, true); } } -*/ +*/ scanStalledVMInTransitionStateOnDisconnectedHosts(); } catch (Exception e) { @@ -3448,7 +3452,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } // we need to alert admin or user about this risky state transition - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (Starting -> Running) from out-of-context transition. VM network environment may need to be reset"); break; @@ -3469,7 +3473,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } catch(NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); } - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState() + " -> Running) from out-of-context transition. VM network environment may need to be reset"); break; @@ -3541,7 +3545,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac // or from designed behave of XS/KVM), the VM may not get a chance to run the state-sync logic // // Therefor, we will scan thoses VMs on UP host based on last update timestamp, if the host is UP - // and a VM stalls for status update, we will consider them to be powered off + // and a VM stalls for status update, we will consider them to be powered off // (which is relatively safe to do so) long stallThresholdInMs = _pingInterval.value() + (_pingInterval.value() >> 1); @@ -3565,13 +3569,13 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } private void scanStalledVMInTransitionStateOnDisconnectedHosts() { - Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - this._operationTimeout.value()*1000); + Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - _operationTimeout.value()*1000); List stuckAndUncontrollableVMs = listStalledVMInTransitionStateOnDisconnectedHosts(cutTime); for(Long vmId : stuckAndUncontrollableVMs) { VMInstanceVO vm = _vmDao.findById(vmId); // We now only alert administrator about this situation - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") is stuck in " + vm.getState() + " state and its host is unreachable for too long"); } } @@ -3606,9 +3610,9 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac // VMs that in transitional state and recently have power state update @DB private List listVMInTransitionStateWithRecentReportOnUpHost(long hostId, Date cutTime) { - String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " + + String sql = "SELECT i.* FROM vm_instance as i, host as h WHERE h.status = 'UP' " + "AND h.id = ? AND i.power_state_update_time > ? AND i.host_id = h.id " + - "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " + + "AND (i.state ='Starting' OR i.state='Stopping' OR i.state='Migrating') " + "AND i.id NOT IN (SELECT vm_instance_id FROM vm_work_job)"; List l = new ArrayList(); @@ -3652,4 +3656,34 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac } return l; } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[] {StartRetry, VmOpWaitInterval, VmOpLockStateRetry, VmOpCleanupInterval, VmOpCleanupWait, VmOpCancelInterval, VmDestroyForceStop, + VmJobCheckInterval, VmJobTimeout, PingInterval}; + } + + public class VmOutcome extends OutcomeImpl { + public VmOutcome(final AsyncJob job, final PowerState desiredPowerState) { + super(VirtualMachine.class, job, _jobCheckInterval.value(), new Predicate() { + @Override + public boolean checkCondition() { + VMInstanceVO instance = _vmDao.findById(job.getInstanceId()); + if (instance.getPowerState() == desiredPowerState) + return true; + + VmWorkJobVO workJob = _workJobDao.findById(job.getId()); + if (workJob.getStatus() != JobInfo.Status.IN_PROGRESS) + return true; + + return false; + } + }, Topics.VM_POWER_STATE, AsyncJob.Topics.JOB_STATE); + } + + @Override + protected VirtualMachine retrieve() { + return _vmDao.findById(_job.getInstanceId()); + } + } } http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/engine/orchestration/src/org/apache/cloudstack/engine/vm/VMEntityManagerImpl.java ---------------------------------------------------------------------- diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/vm/VMEntityManagerImpl.java b/engine/orchestration/src/org/apache/cloudstack/engine/vm/VMEntityManagerImpl.java index 77a842e..3b25dfe 100755 --- a/engine/orchestration/src/org/apache/cloudstack/engine/vm/VMEntityManagerImpl.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/vm/VMEntityManagerImpl.java @@ -242,14 +242,14 @@ public class VMEntityManagerImpl implements VMEntityManager { DataCenterDeployment reservedPlan = new DataCenterDeployment(vm.getDataCenterId(), vmReservation.getPodId(), vmReservation.getClusterId(), vmReservation.getHostId(), null, null); try { - _itMgr.start(vm.getUuid(), params, reservedPlan); + _itMgr.easyStart(vm.getUuid(), params, reservedPlan); } catch (CloudRuntimeException ex) { // Retry the deployment without using the reservation plan - _itMgr.start(vm.getUuid(), params, null); + _itMgr.easyStart(vm.getUuid(), params, null); } } else { // no reservation found. Let VirtualMachineManager retry - _itMgr.start(vm.getUuid(), params, null); + _itMgr.easyStart(vm.getUuid(), params, null); } } @@ -257,7 +257,7 @@ public class VMEntityManagerImpl implements VMEntityManager { @Override public boolean stop(VMEntityVO vm, String caller) throws ResourceUnavailableException { try { - _itMgr.stop(vm.getUuid()); + _itMgr.easyStop(vm.getUuid()); return true; } catch (CloudRuntimeException e) { s_logger.warn("Unable to stop " + vm, e); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/framework/jobs/src/org/apache/cloudstack/framework/jobs/JobCancellationException.java ---------------------------------------------------------------------- diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/JobCancellationException.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/JobCancellationException.java index a433b2b..28c1e5b 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/JobCancellationException.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/JobCancellationException.java @@ -16,14 +16,16 @@ // under the License. package org.apache.cloudstack.framework.jobs; +import java.util.concurrent.CancellationException; + import com.cloud.utils.SerialVersionUID; -import com.cloud.utils.exception.CloudRuntimeException; + /** * This exception is fired when the job has been cancelled * */ -public class JobCancellationException extends CloudRuntimeException { +public class JobCancellationException extends CancellationException { private static final long serialVersionUID = SerialVersionUID.AffinityConflictException; @@ -33,7 +35,6 @@ public class JobCancellationException extends CloudRuntimeException { TimedOut; } - Reason reason; public JobCancellationException(Reason reason) { http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/OutcomeImpl.java ---------------------------------------------------------------------- diff --git a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/OutcomeImpl.java b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/OutcomeImpl.java index f1e4f4b..020029d 100644 --- a/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/OutcomeImpl.java +++ b/framework/jobs/src/org/apache/cloudstack/framework/jobs/impl/OutcomeImpl.java @@ -21,6 +21,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.cloudstack.framework.jobs.AsyncJob; +import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; import org.apache.cloudstack.framework.jobs.AsyncJobManager; import org.apache.cloudstack.framework.jobs.Outcome; @@ -62,12 +63,19 @@ public class OutcomeImpl implements Outcome { @Override public T get() throws InterruptedException, ExecutionException { s_jobMgr.waitAndCheck(_topics, _checkIntervalInMs, -1, _predicate); + s_jobMgr.disjoinJob(AsyncJobExecutionContext.getCurrentExecutionContext().getJob().getId(), _job.getId()); + return retrieve(); } @Override public T get(long timeToWait, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { s_jobMgr.waitAndCheck(_topics, _checkIntervalInMs, unit.toMillis(timeToWait), _predicate); + try { + AsyncJobExecutionContext.getCurrentExecutionContext().disjoinJob(_job.getId()); + } catch (Throwable e) { + throw new ExecutionException("Job task has trouble executing", e); + } return retrieve(); } http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/plugins/network-elements/elastic-loadbalancer/src/com/cloud/network/lb/ElasticLoadBalancerManagerImpl.java ---------------------------------------------------------------------- diff --git a/plugins/network-elements/elastic-loadbalancer/src/com/cloud/network/lb/ElasticLoadBalancerManagerImpl.java b/plugins/network-elements/elastic-loadbalancer/src/com/cloud/network/lb/ElasticLoadBalancerManagerImpl.java index d8daeae..2bb3ba3 100644 --- a/plugins/network-elements/elastic-loadbalancer/src/com/cloud/network/lb/ElasticLoadBalancerManagerImpl.java +++ b/plugins/network-elements/elastic-loadbalancer/src/com/cloud/network/lb/ElasticLoadBalancerManagerImpl.java @@ -546,7 +546,7 @@ public class ElasticLoadBalancerManagerImpl extends ManagerBase implements ConcurrentOperationException, ResourceUnavailableException { s_logger.debug("Starting ELB VM " + elbVm); try { - _itMgr.start(elbVm.getUuid(), params); + _itMgr.easyStart(elbVm.getUuid(), params); return _routerDao.findById(elbVm.getId()); } catch (CloudRuntimeException e) { s_logger.warn("Unable to start " + elbVm, e); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/plugins/network-elements/internal-loadbalancer/src/org/apache/cloudstack/network/lb/InternalLoadBalancerVMManagerImpl.java ---------------------------------------------------------------------- diff --git a/plugins/network-elements/internal-loadbalancer/src/org/apache/cloudstack/network/lb/InternalLoadBalancerVMManagerImpl.java b/plugins/network-elements/internal-loadbalancer/src/org/apache/cloudstack/network/lb/InternalLoadBalancerVMManagerImpl.java index 6754103..2a91c95 100644 --- a/plugins/network-elements/internal-loadbalancer/src/org/apache/cloudstack/network/lb/InternalLoadBalancerVMManagerImpl.java +++ b/plugins/network-elements/internal-loadbalancer/src/org/apache/cloudstack/network/lb/InternalLoadBalancerVMManagerImpl.java @@ -830,7 +830,7 @@ public class InternalLoadBalancerVMManagerImpl extends ManagerBase implements ConcurrentOperationException, ResourceUnavailableException { s_logger.debug("Starting Internal LB VM " + internalLbVm); try { - _itMgr.start(internalLbVm.getUuid(), params, null); + _itMgr.easyStart(internalLbVm.getUuid(), params, null); internalLbVm = _internalLbVmDao.findById(internalLbVm.getId()); if (internalLbVm.isStopPending()) { s_logger.info("Clear the stop pending flag of Internal LB VM " + internalLbVm.getHostName() + " after start router successfully!"); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java b/server/src/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java index 8f0c096..59e3901 100755 --- a/server/src/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java +++ b/server/src/com/cloud/consoleproxy/ConsoleProxyManagerImpl.java @@ -558,7 +558,7 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy } if (proxy.getState() == VirtualMachine.State.Stopped) { - _itMgr.start(proxy.getUuid(), null); + _itMgr.easyStart(proxy.getUuid(), null); proxy = _consoleProxyDao.findById(proxyVmId); if (proxy.getState() == State.Running) { return null; @@ -1036,7 +1036,7 @@ public class ConsoleProxyManagerImpl extends ManagerBase implements ConsoleProxy } try { - _itMgr.stop(proxy.getUuid()); + _itMgr.easyStop(proxy.getUuid()); return true; } catch (CloudRuntimeException e) { s_logger.warn("Unable to stop " + proxy, e); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java index 2385b16..fb20711 100755 --- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java +++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java @@ -21,6 +21,8 @@ import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -33,6 +35,7 @@ import org.apache.log4j.Logger; import org.apache.log4j.NDC; import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.framework.jobs.Outcome; import com.cloud.agent.AgentManager; import com.cloud.alert.AlertManager; @@ -97,7 +100,7 @@ import com.cloud.vm.dao.VMInstanceDao; * ha.retry.wait | time to wait before retrying the work item | seconds | 120 || || stop.retry.wait | time to wait * before retrying the stop | seconds | 120 || * } **/ -@Local(value = { HighAvailabilityManager.class }) +@Local(value = {HighAvailabilityManager.class}) public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener { protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class); WorkerThread[] _workers; @@ -118,22 +121,26 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai long _serverId; List _investigators; + public List getInvestigators() { - return _investigators; - } - public void setInvestigators(List _investigators) { - this._investigators = _investigators; - } + return _investigators; + } + + public void setInvestigators(List _investigators) { + this._investigators = _investigators; + } + + List _fenceBuilders; - List _fenceBuilders; public List getFenceBuilders() { - return _fenceBuilders; - } - public void setFenceBuilders(List _fenceBuilders) { - this._fenceBuilders = _fenceBuilders; - } + return _fenceBuilders; + } - @Inject + public void setFenceBuilders(List _fenceBuilders) { + this._fenceBuilders = _fenceBuilders; + } + + @Inject AgentManager _agentMgr; @Inject AlertManager _alertMgr; @@ -180,7 +187,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai } Status hostState = null; - for(Investigator investigator : _investigators) { + for (Investigator investigator : _investigators) { hostState = investigator.isAgentAlive(host); if (hostState != null) { if (s_logger.isDebugEnabled()) { @@ -203,7 +210,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai return; } - if(host.getHypervisorType() == HypervisorType.VMware) { + if (host.getHypervisorType() == HypervisorType.VMware) { s_logger.info("Don't restart for VMs on host " + host.getId() + " as the host is VMware host"); return; } @@ -288,18 +295,18 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } return; } - if(vm.getHypervisorType() == HypervisorType.VMware) { + if (vm.getHypervisorType() == HypervisorType.VMware) { s_logger.info("Skip HA for VMware VM " + vm.getInstanceName()); return; } @@ -320,7 +327,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai if (!(_forceHA || vm.isHaEnabled())) { String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodIdToDeployIn(); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); if (s_logger.isDebugEnabled()) { @@ -332,13 +340,13 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } @@ -351,7 +359,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai } } - HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), maxRetries + 1, vm.getUpdated()); + HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId, vm.getState(), maxRetries + 1, + vm.getUpdated()); _haDao.persist(work); if (s_logger.isInfoEnabled()) { @@ -395,7 +404,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai s_logger.info("HA on " + vm); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { - s_logger.info("VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + s_logger.info("VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + + vm.getUpdated() + " previous updated = " + work.getUpdateTime()); return null; } @@ -432,7 +442,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai } Investigator investigator = null; - for(Investigator it : _investigators) { + for (Investigator it : _investigators) { investigator = it; alive = investigator.isVmAlive(vm, host); s_logger.info(investigator.getName() + " found " + vm + "to be alive? " + alive); @@ -444,7 +454,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai boolean fenced = false; if (alive == null) { s_logger.debug("Fencing off VM that we don't know the state of"); - for(FenceBuilder fb : _fenceBuilders) { + for (FenceBuilder fb : _fenceBuilders) { Boolean result = fb.fenceOff(vm, host); s_logger.info("Fencer " + fb.getName() + " returned " + result); if (result != null && result) { @@ -468,7 +478,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai if (!fenced) { s_logger.debug("We were unable to fence off the VM " + vm); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } @@ -477,13 +488,13 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } work.setStep(Step.Scheduled); @@ -494,13 +505,13 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; - throw new CloudRuntimeException("Caught exception even though it should be handled.", e); + throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } } @@ -526,45 +537,59 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai return null; } - try { - HashMap params = new HashMap(); - if (_haTag != null) { - params.put(VirtualMachineProfile.Param.HaTag, _haTag); - } - _itMgr.advanceStart(vm.getUuid(), params, null); - - VMInstanceVO started = _instanceDao.findById(vm.getId()); + HashMap params = new HashMap(); + if (_haTag != null) { + params.put(VirtualMachineProfile.Param.HaTag, _haTag); + } + Outcome outcome = _itMgr.start(vm.getUuid(), params, null); - if (started.getState() == State.Running) { - s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId()); + VirtualMachine started = null; + try { + started = outcome.get(); + } catch (InterruptedException e) { + s_logger.warn("Received interrupted exception ", e); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + "Interrupted while attempting to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); + return null; + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + if (cause instanceof InsufficientCapacityException) { + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); + } else if (cause instanceof ResourceUnavailableException) { + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + "Resource is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); + } else if (cause instanceof ConcurrentOperationException) { + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); + } else if (cause instanceof OperationTimedoutException) { + s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + "Operations on the resource timed out for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); + } else { + s_logger.warn("Unable to restart " + vm, e); + _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, + "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); return null; } - - if (s_logger.isDebugEnabled()) { - s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); - } - } catch (final InsufficientCapacityException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, - "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); - } catch (final ResourceUnavailableException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, - "Resource is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); - } catch (ConcurrentOperationException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, - "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); - } catch (OperationTimedoutException e) { - s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); + } catch (CancellationException e) { + s_logger.warn("Received interrupted exception ", e); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, - "Operations on the resource timed out for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); - } catch (Exception e) { - s_logger.warn("Unable to restart " + vm, e); - _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, - "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); + "Interrupted while attempting to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); return null; } + + if (started != null && started.getState() == State.Running) { + s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId()); + return null; + } + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); + } vm = _instanceDao.findById(vm.getId()); work.setUpdateTime(vm.getUpdated()); work.setPreviousState(vm.getState()); @@ -650,16 +675,20 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai s_logger.info("Successfully stopped " + vm); return null; } else if (work.getWorkType() == WorkType.CheckStop) { - if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { - s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); + if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null + || vm.getHostId().longValue() != work.getHostId()) { + s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + + " State: " + vm.getState()); return null; } _itMgr.advanceStop(vm.getUuid(), false); s_logger.info("Stop for " + vm + " was successful"); return null; } else if (work.getWorkType() == WorkType.ForceStop) { - if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { - s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); + if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null + || vm.getHostId().longValue() != work.getHostId()) { + s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + + " State: " + vm.getState()); return null; } _itMgr.advanceStop(vm.getUuid(), true); @@ -834,7 +863,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvai nextTime = destroyVM(work); } else { assert false : "How did we get here with " + wt.toString(); - continue; + continue; } if (nextTime == null) { http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index a1bb1ff..ab66bb0 100755 --- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -2717,7 +2717,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V ConcurrentOperationException, ResourceUnavailableException { s_logger.debug("Starting router " + router); try { - _itMgr.start(router.getUuid(), params, planToDeploy); + _itMgr.easyStart(router.getUuid(), params, planToDeploy); router = _routerDao.findById(router.getId()); if (router.isStopPending()) { s_logger.info("Clear the stop pending flag of router " + router.getHostName() + " after start router successfully!"); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/resource/ResourceManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/resource/ResourceManagerImpl.java b/server/src/com/cloud/resource/ResourceManagerImpl.java index b429e01..effc4e0 100755 --- a/server/src/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/com/cloud/resource/ResourceManagerImpl.java @@ -2375,7 +2375,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, + " as a part of deleteHost id=" + host.getId()); try { - _vmMgr.stop(vm.getUuid()); + _vmMgr.easyStop(vm.getUuid()); } catch (Exception e) { String errorMsg = "There was an error stopping the vm: " + vm http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/storage/StoragePoolAutomationImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/storage/StoragePoolAutomationImpl.java b/server/src/com/cloud/storage/StoragePoolAutomationImpl.java index 0e6d53b..e1550b8 100644 --- a/server/src/com/cloud/storage/StoragePoolAutomationImpl.java +++ b/server/src/com/cloud/storage/StoragePoolAutomationImpl.java @@ -203,32 +203,32 @@ public class StoragePoolAutomationImpl implements StoragePoolAutomation { // call the consoleproxymanager ConsoleProxyVO consoleProxy = _consoleProxyDao .findById(vmInstance.getId()); - vmMgr.stop(consoleProxy.getUuid()); + vmMgr.easyStop(consoleProxy.getUuid()); // update work status work.setStoppedForMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); if (restart) { - vmMgr.start(consoleProxy.getUuid(), null); + vmMgr.easyStart(consoleProxy.getUuid(), null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } } else if (vmInstance.getType().equals(VirtualMachine.Type.User)) { UserVmVO userVm = userVmDao.findById(vmInstance.getId()); - vmMgr.stop(userVm.getUuid()); + vmMgr.easyStop(userVm.getUuid()); // update work status work.setStoppedForMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } else if (vmInstance.getType().equals(VirtualMachine.Type.SecondaryStorageVm)) { SecondaryStorageVmVO secStrgVm = _secStrgDao .findById(vmInstance.getId()); - vmMgr.stop(secStrgVm.getUuid()); + vmMgr.easyStop(secStrgVm.getUuid()); work.setStoppedForMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); if (restart) { - vmMgr.start(secStrgVm.getUuid(), null); + vmMgr.easyStart(secStrgVm.getUuid(), null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } @@ -238,7 +238,7 @@ public class StoragePoolAutomationImpl implements StoragePoolAutomation { work.setStoppedForMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); if (restart) { - vmMgr.start(domR.getUuid(), null); + vmMgr.easyStart(domR.getUuid(), null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } @@ -302,23 +302,23 @@ public class StoragePoolAutomationImpl implements StoragePoolAutomation { if (vmInstance.getType().equals(VirtualMachine.Type.ConsoleProxy)) { ConsoleProxyVO consoleProxy = _consoleProxyDao.findById(vmInstance.getId()); - vmMgr.start(consoleProxy.getUuid(), null); + vmMgr.easyStart(consoleProxy.getUuid(), null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } else if (vmInstance.getType().equals(VirtualMachine.Type.SecondaryStorageVm)) { SecondaryStorageVmVO ssVm = _secStrgDao.findById(vmInstance.getId()); - vmMgr.advanceStart(ssVm.getUuid(), null, null); + vmMgr.start(ssVm.getUuid(), null, null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } else if (vmInstance.getType().equals(VirtualMachine.Type.DomainRouter)) { DomainRouterVO domR = _domrDao.findById(vmInstance.getId()); - vmMgr.start(domR.getUuid(), null); + vmMgr.easyStart(domR.getUuid(), null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } else if (vmInstance.getType().equals(VirtualMachine.Type.User)) { UserVmVO userVm = userVmDao.findById(vmInstance.getId()); - vmMgr.start(userVm.getUuid(), null); + vmMgr.easyStart(userVm.getUuid(), null); work.setStartedAfterMaintenance(true); _storagePoolWorkDao.update(work.getId(), work); } http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/storage/secondary/SecondaryStorageManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/storage/secondary/SecondaryStorageManagerImpl.java b/server/src/com/cloud/storage/secondary/SecondaryStorageManagerImpl.java index 65e76f6..d4e360a 100755 --- a/server/src/com/cloud/storage/secondary/SecondaryStorageManagerImpl.java +++ b/server/src/com/cloud/storage/secondary/SecondaryStorageManagerImpl.java @@ -258,7 +258,7 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar SecondaryStorageVmVO secStorageVm = _secStorageVmDao.findById(secStorageVmId); Account systemAcct = _accountMgr.getSystemAccount(); User systemUser = _accountMgr.getSystemUser(); - _itMgr.start(secStorageVm.getUuid(), null); + _itMgr.easyStart(secStorageVm.getUuid(), null); return _secStorageVmDao.findById(secStorageVmId); } catch (Exception e) { s_logger.warn("Exception while trying to start secondary storage vm", e); @@ -921,7 +921,7 @@ public class SecondaryStorageManagerImpl extends ManagerBase implements Secondar try { if (secStorageVmLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_SYNC)) { try { - _itMgr.stop(secStorageVm.getUuid()); + _itMgr.easyStop(secStorageVm.getUuid()); return true; } catch (Exception e) { s_logger.warn("Unable to stop " + secStorageVm, e); http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/user/AccountManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/user/AccountManagerImpl.java b/server/src/com/cloud/user/AccountManagerImpl.java index 3e4520c..ad2654b 100755 --- a/server/src/com/cloud/user/AccountManagerImpl.java +++ b/server/src/com/cloud/user/AccountManagerImpl.java @@ -815,7 +815,7 @@ public class AccountManagerImpl extends ManagerBase implements AccountManager, M boolean success = true; for (VMInstanceVO vm : vms) { try { - _itMgr.stop(vm.getUuid()); + _itMgr.easyStop(vm.getUuid()); } catch (CloudRuntimeException ote) { s_logger.warn("Operation for stopping vm timed out, unable to stop vm " + vm.getHostName(), ote); success = false; http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/vm/UserVmManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/vm/UserVmManagerImpl.java b/server/src/com/cloud/vm/UserVmManagerImpl.java index 7a79fc6..d37ef95 100755 --- a/server/src/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/com/cloud/vm/UserVmManagerImpl.java @@ -4552,7 +4552,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Use if (needRestart) { try { - _itMgr.stop(vm.getUuid()); + _itMgr.easyStop(vm.getUuid()); } catch (CloudRuntimeException e) { s_logger.debug("Stop vm " + vm.getUuid() + " failed"); CloudRuntimeException ex = new CloudRuntimeException("Stop vm failed for specified vmId"); @@ -4612,7 +4612,7 @@ public class UserVmManagerImpl extends ManagerBase implements UserVmManager, Use if (needRestart) { try { - _itMgr.start(vm.getUuid(), null); + _itMgr.easyStart(vm.getUuid(), null); } catch (Exception e) { s_logger.debug("Unable to start VM " + vm.getUuid(), e); CloudRuntimeException ex = new CloudRuntimeException( http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java b/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java index 8d1c628..64adc92 100644 --- a/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java +++ b/server/src/com/cloud/vm/snapshot/VMSnapshotManagerImpl.java @@ -673,7 +673,7 @@ public class VMSnapshotManagerImpl extends ManagerBase implements VMSnapshotMana // start or stop VM first, if revert from stopped state to running state, or from running to stopped if(userVm.getState() == VirtualMachine.State.Stopped && vmSnapshotVo.getType() == VMSnapshot.Type.DiskAndMemory){ try { - _itMgr.start(userVm.getUuid(), new HashMap()); + _itMgr.easyStart(userVm.getUuid(), new HashMap()); userVm = _userVMDao.findById(userVm.getId()); hostId = userVm.getHostId(); } catch (CloudRuntimeException e) { http://git-wip-us.apache.org/repos/asf/cloudstack/blob/fa525a7d/server/test/com/cloud/vm/VmWorkMockVirtualMachineManagerImpl.java ---------------------------------------------------------------------- diff --git a/server/test/com/cloud/vm/VmWorkMockVirtualMachineManagerImpl.java b/server/test/com/cloud/vm/VmWorkMockVirtualMachineManagerImpl.java index 3f35a42..0c23220 100644 --- a/server/test/com/cloud/vm/VmWorkMockVirtualMachineManagerImpl.java +++ b/server/test/com/cloud/vm/VmWorkMockVirtualMachineManagerImpl.java @@ -26,6 +26,7 @@ import javax.naming.ConfigurationException; import org.apache.log4j.Logger; import org.apache.cloudstack.framework.jobs.AsyncJobExecutionContext; +import org.apache.cloudstack.framework.jobs.Outcome; import org.apache.cloudstack.framework.messagebus.MessageBus; import org.apache.cloudstack.jobs.JobInfo; @@ -139,17 +140,17 @@ public class VmWorkMockVirtualMachineManagerImpl implements VirtualMachineManage } @Override - public void start(String vmUuid, Map params) { + public void easyStart(String vmUuid, Map params) { // TODO Auto-generated method stub } @Override - public void start(String vmUuid, Map params, DeploymentPlan planToDeploy) { + public void easyStart(String vmUuid, Map params, DeploymentPlan planToDeploy) { // TODO Auto-generated method stub } @Override - public void stop(String vmUuid) { + public void easyStop(String vmUuid) { // TODO Auto-generated method stub } @@ -172,11 +173,9 @@ public class VmWorkMockVirtualMachineManagerImpl implements VirtualMachineManage } @Override - public void advanceStart(String vmUuid, Map params, - DeploymentPlan planToDeploy) throws InsufficientCapacityException, - ResourceUnavailableException, ConcurrentOperationException, - OperationTimedoutException { - // TODO Auto-generated method stub + public Outcome start(String vmUuid, Map params, + DeploymentPlan planToDeploy) { + return null; } @Override