cloudstack-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kous...@apache.org
Subject [1/2] git commit: updated refs/heads/master to f732c7d
Date Tue, 01 Sep 2015 12:31:51 GMT
Repository: cloudstack
Updated Branches:
  refs/heads/master c8bfeb88c -> f732c7d1e


CLOUDSTACK-8785: Proper enforcement of retry count (max.retries) for all work type handled
by HighAvailability manager
Retry count is properly enforced for all work types in HA manager. Also reorganized some of
the code for easy testing.


Project: http://git-wip-us.apache.org/repos/asf/cloudstack/repo
Commit: http://git-wip-us.apache.org/repos/asf/cloudstack/commit/cbf2c3bb
Tree: http://git-wip-us.apache.org/repos/asf/cloudstack/tree/cbf2c3bb
Diff: http://git-wip-us.apache.org/repos/asf/cloudstack/diff/cbf2c3bb

Branch: refs/heads/master
Commit: cbf2c3bbf66214ec4a6b6f0666657185446c88ce
Parents: cab57b2
Author: Koushik Das <koushik@apache.org>
Authored: Fri Aug 28 17:59:17 2015 +0530
Committer: Koushik Das <koushik@apache.org>
Committed: Fri Aug 28 17:59:17 2015 +0530

----------------------------------------------------------------------
 .../cloud/ha/HighAvailabilityManagerImpl.java   | 126 +++++++++----------
 .../ha/HighAvailabilityManagerImplTest.java     |  44 +++++++
 2 files changed, 103 insertions(+), 67 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cbf2c3bb/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
----------------------------------------------------------------------
diff --git a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
index bec1fd5..cce4457 100644
--- a/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
+++ b/server/src/com/cloud/ha/HighAvailabilityManagerImpl.java
@@ -383,10 +383,10 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
         }
 
         List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId());
-        int maxRetries = 0;
+        int timesTried = 0;
         for (HaWorkVO item : items) {
-            if (maxRetries < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures))
{
-                maxRetries = item.getTimesTried();
+            if (timesTried < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures))
{
+                timesTried = item.getTimesTried();
                 break;
             }
         }
@@ -396,7 +396,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
         }
 
         HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ?
Step.Investigating : Step.Scheduled,
-                hostId != null ? hostId : 0L, vm.getState(), maxRetries + 1, vm.getUpdated());
+                hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated());
         _haDao.persist(work);
 
         if (s_logger.isInfoEnabled()) {
@@ -407,7 +407,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
 
     }
 
-    protected Long restart(HaWorkVO work) {
+    protected Long restart(final HaWorkVO work) {
         List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId());
         if (items.size() > 0) {
             StringBuilder str = new StringBuilder("Cancelling this work item because newer
ones have been scheduled.  Work Ids = [");
@@ -571,11 +571,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
             return null;
         }
 
-        if (work.getTimesTried() > _maxRetries) {
-            s_logger.warn("Retried to max times so deleting: " + vmId);
-            return null;
-        }
-
         try {
             HashMap<VirtualMachineProfile.Param, Object> params = new HashMap<VirtualMachineProfile.Param,
Object>();
             if (_haTag != null) {
@@ -663,7 +658,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
         _haDao.delete(vm.getId(), WorkType.Destroy);
     }
 
-    protected Long destroyVM(HaWorkVO work) {
+    protected Long destroyVM(final HaWorkVO work) {
         final VirtualMachine vm = _itMgr.findById(work.getInstanceId());
         s_logger.info("Destroying " + vm.toString());
         try {
@@ -690,7 +685,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
             s_logger.debug("concurrent operation: " + e.getMessage());
         }
 
-        work.setTimesTried(work.getTimesTried() + 1);
         return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
     }
 
@@ -738,10 +732,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
             s_logger.debug("operation timed out: " + e.getMessage());
         }
 
-        work.setTimesTried(work.getTimesTried() + 1);
-        if (s_logger.isDebugEnabled()) {
-            s_logger.debug("Stop was unsuccessful.  Rescheduling");
-        }
         return (System.currentTimeMillis() >> 10) + _stopRetryInterval;
     }
 
@@ -765,6 +755,56 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
         return vms;
     }
 
+    private void rescheduleWork(final HaWorkVO work, final long nextTime) {
+        s_logger.info("Rescheduling work " + work + " to try again at " + new Date(nextTime
<< 10));
+        work.setTimeToTry(nextTime);
+        work.setTimesTried(work.getTimesTried() + 1);
+        work.setServerId(null);
+        work.setDateTaken(null);
+    }
+
+    private void processWork(final HaWorkVO work) {
+        try {
+            final WorkType wt = work.getWorkType();
+            Long nextTime = null;
+            if (wt == WorkType.Migration) {
+                nextTime = migrate(work);
+            } else if (wt == WorkType.HA) {
+                nextTime = restart(work);
+            } else if (wt == WorkType.Stop || wt == WorkType.CheckStop || wt == WorkType.ForceStop)
{
+                nextTime = stopVM(work);
+            } else if (wt == WorkType.Destroy) {
+                nextTime = destroyVM(work);
+            } else {
+                assert false : "How did we get here with " + wt.toString();
+                return;
+            }
+
+            if (nextTime == null) {
+                s_logger.info("Completed work " + work);
+                work.setStep(Step.Done);
+            } else {
+                rescheduleWork(work, nextTime.longValue());
+            }
+        } catch (Exception e) {
+            s_logger.warn("Encountered unhandled exception during HA process, reschedule
work", e);
+
+            long nextTime = (System.currentTimeMillis() >> 10) + _restartRetryInterval;
+            rescheduleWork(work, nextTime);
+
+            // if restart failed in the middle due to exception, VM state may has been changed
+            // recapture into the HA worker so that it can really continue in it next turn
+            VMInstanceVO vm = _instanceDao.findById(work.getInstanceId());
+            work.setUpdateTime(vm.getUpdated());
+            work.setPreviousState(vm.getState());
+        }
+        if (!Step.Done.equals(work.getStep()) && work.getTimesTried() >= _maxRetries)
{
+            s_logger.warn("Giving up, retried max. times for work: " + work);
+            work.setStep(Step.Done);
+        }
+        _haDao.update(work.getId(), work);
+    }
+
     @Override
     public boolean configure(final String name, final Map<String, Object> xmlParams)
throws ConfigurationException {
         _serverId = _msServer.getId();
@@ -881,7 +921,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
         private void runWithContext() {
             HaWorkVO work = null;
             try {
-                s_logger.trace("Checking the database");
+                s_logger.trace("Checking the database for work");
                 work = _haDao.take(_serverId);
                 if (work == null) {
                     try {
@@ -896,56 +936,8 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements
HighAvai
                 }
 
                 NDC.push("work-" + work.getId());
-                s_logger.info("Processing " + work);
-
-                try {
-                    final WorkType wt = work.getWorkType();
-                    Long nextTime = null;
-                    if (wt == WorkType.Migration) {
-                        nextTime = migrate(work);
-                    } else if (wt == WorkType.HA) {
-                        nextTime = restart(work);
-                    } else if (wt == WorkType.Stop || wt == WorkType.CheckStop || wt == WorkType.ForceStop)
{
-                        nextTime = stopVM(work);
-                    } else if (wt == WorkType.Destroy) {
-                        nextTime = destroyVM(work);
-                    } else {
-                        assert false : "How did we get here with " + wt.toString();
-                        return;
-                    }
-
-                    if (nextTime == null) {
-                        s_logger.info("Completed " + work);
-                        work.setStep(Step.Done);
-                    } else {
-                        s_logger.info("Rescheduling " + work + " to try again at " + new
Date(nextTime << 10));
-                        work.setTimeToTry(nextTime);
-                        work.setTimesTried(work.getTimesTried() + 1);
-                        work.setServerId(null);
-                        work.setDateTaken(null);
-                    }
-                } catch (Exception e) {
-                    s_logger.warn("Encountered unhandled exception during HA process, reschedule
retry", e);
-
-                    long nextTime = (System.currentTimeMillis() >> 10) + _restartRetryInterval;
-
-                    s_logger.info("Rescheduling " + work + " to try again at " + new Date(nextTime
<< 10));
-                    work.setTimeToTry(nextTime);
-                    work.setTimesTried(work.getTimesTried() + 1);
-                    work.setServerId(null);
-                    work.setDateTaken(null);
-
-                    // if restart failed in the middle due to exception, VM state may has
been changed
-                    // recapture into the HA worker so that it can really continue in it
next turn
-                    VMInstanceVO vm = _instanceDao.findById(work.getInstanceId());
-                    work.setUpdateTime(vm.getUpdated());
-                    work.setPreviousState(vm.getState());
-                    if (!Step.Done.equals(work.getStep()) && work.getTimesTried()
>= _maxRetries) {
-                        s_logger.warn("Giving up, retries max times for work: " + work);
-                        work.setStep(Step.Done);
-                    }
-                }
-                _haDao.update(work.getId(), work);
+                s_logger.info("Processing work " + work);
+                processWork(work);
             } catch (final Throwable th) {
                 s_logger.error("Caught this throwable, ", th);
             } finally {

http://git-wip-us.apache.org/repos/asf/cloudstack/blob/cbf2c3bb/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
----------------------------------------------------------------------
diff --git a/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java b/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
index 087c71a..3102c9a 100644
--- a/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
+++ b/server/test/com/cloud/ha/HighAvailabilityManagerImplTest.java
@@ -16,11 +16,14 @@
 // under the License.
 package com.cloud.ha;
 
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
 import java.lang.reflect.Array;
 import java.lang.reflect.Field;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -31,6 +34,7 @@ import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationSer
 import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
 import org.apache.cloudstack.managed.context.ManagedContext;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
@@ -44,6 +48,8 @@ import com.cloud.dc.DataCenterVO;
 import com.cloud.dc.HostPodVO;
 import com.cloud.dc.dao.DataCenterDao;
 import com.cloud.dc.dao.HostPodDao;
+import com.cloud.ha.HighAvailabilityManager.Step;
+import com.cloud.ha.HighAvailabilityManager.WorkType;
 import com.cloud.ha.dao.HighAvailabilityDao;
 import com.cloud.host.Host;
 import com.cloud.host.HostVO;
@@ -106,6 +112,17 @@ public class HighAvailabilityManagerImplTest {
     HostVO hostVO;
 
     HighAvailabilityManagerImpl highAvailabilityManager;
+    HighAvailabilityManagerImpl highAvailabilityManagerSpy;
+    static Method processWorkMethod = null;
+
+    @BeforeClass
+    public static void initOnce() {
+        try {
+            processWorkMethod = HighAvailabilityManagerImpl.class.getDeclaredMethod("processWork",
HaWorkVO.class);
+            processWorkMethod.setAccessible(true);
+        } catch (NoSuchMethodException e) {
+        }
+    }
 
     @Before
     public void setup() throws IllegalArgumentException,
@@ -123,8 +140,12 @@ public class HighAvailabilityManagerImplTest {
                         injectField.set(highAvailabilityManager, obj);
                     }
                 }
+            } else if (injectField.getName().equals("_maxRetries")) {
+                injectField.setAccessible(true);
+                injectField.set(highAvailabilityManager, 5);
             }
         }
+        highAvailabilityManagerSpy = Mockito.spy(highAvailabilityManager);
     }
 
     @Test
@@ -201,4 +222,27 @@ public class HighAvailabilityManagerImplTest {
 
         assertNull(highAvailabilityManager.investigate(1l));
     }
+
+    private void processWorkWithRetryCount(int count, Step expectedStep) {
+        assertNotNull(processWorkMethod);
+        HaWorkVO work = new HaWorkVO(1l, VirtualMachine.Type.User, WorkType.Migration, Step.Scheduled,
1l, VirtualMachine.State.Running, count, 12345678l);
+        Mockito.doReturn(12345678l).when(highAvailabilityManagerSpy).migrate(work);
+        try {
+            processWorkMethod.invoke(highAvailabilityManagerSpy, work);
+        } catch (IllegalAccessException e) {
+        } catch (IllegalArgumentException e) {
+        } catch (InvocationTargetException e) {
+        }
+        assertTrue(work.getStep() == expectedStep);
+    }
+
+    @Test
+    public void processWorkWithRetryCountExceeded() {
+        processWorkWithRetryCount(5, Step.Done); // max retry count is 5
+    }
+
+    @Test
+    public void processWorkWithRetryCountNotExceeded() {
+        processWorkWithRetryCount(3, Step.Scheduled);
+    }
 }


Mime
View raw message