libcloud-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From quent...@apache.org
Subject [2/4] libcloud git commit: destroy_node() will return True only when all node's resources have been cleaned up successfully. Can be called multiple times to retry when partial errors happen.
Date Tue, 10 Oct 2017 05:12:54 GMT
destroy_node() will return True only when all node's resources have been cleaned up successfully.
Can be called multiple times to retry when partial errors happen.

Signed-off-by: Quentin Pradet <quentinp@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/libcloud/repo
Commit: http://git-wip-us.apache.org/repos/asf/libcloud/commit/fbf015d0
Tree: http://git-wip-us.apache.org/repos/asf/libcloud/tree/fbf015d0
Diff: http://git-wip-us.apache.org/repos/asf/libcloud/diff/fbf015d0

Branch: refs/heads/trunk
Commit: fbf015d0d42f515d41142dc7c8aa03a41f6b61c9
Parents: cd68110
Author: Lucas Di Pentima <ldipentima@veritasgenetics.com>
Authored: Fri Oct 6 19:15:29 2017 -0300
Committer: Quentin Pradet <quentinp@apache.org>
Committed: Tue Oct 10 09:01:17 2017 +0400

----------------------------------------------------------------------
 libcloud/compute/drivers/azure_arm.py   | 19 +++++++++++++++----
 libcloud/test/compute/test_azure_arm.py | 22 ++++++++++++++++++++--
 2 files changed, 35 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/libcloud/blob/fbf015d0/libcloud/compute/drivers/azure_arm.py
----------------------------------------------------------------------
diff --git a/libcloud/compute/drivers/azure_arm.py b/libcloud/compute/drivers/azure_arm.py
index 913c461..3a5b648 100644
--- a/libcloud/compute/drivers/azure_arm.py
+++ b/libcloud/compute/drivers/azure_arm.py
@@ -699,8 +699,14 @@ class AzureNodeDriver(NodeDriver):
         :rtype: ``bool``
         """
 
+        do_node_polling = True
+        success = True
+
         # This returns a 202 (Accepted) which means that the delete happens
         # asynchronously.
+        # If returns 404, we may be retrying a previous destroy_node call that
+        # failed to clean up its related resources, so it isn't taken as a
+        # failure.
         try:
             self.connection.request(node.id,
                                     params={"api-version": "2015-06-15"},
@@ -708,11 +714,14 @@ class AzureNodeDriver(NodeDriver):
         except BaseHTTPError as h:
             if h.code == 202:
                 pass
+            elif h.code == 404:
+                # No need to ask again, node already down.
+                do_node_polling = False
             else:
                 return False
 
         # Need to poll until the node actually goes away.
-        while True:
+        while do_node_polling:
             try:
                 time.sleep(10)
                 self.connection.request(
@@ -738,13 +747,14 @@ class AzureNodeDriver(NodeDriver):
                             method='DELETE')
                         break
                     except BaseHTTPError as h:
-                        if h.code == 202:
+                        if h.code == 202 or h.code == 404:
                             break
                         inuse = h.message.startswith("[NicInUse]")
                         if h.code == 400 and inuse:
                             time.sleep(10)
                         else:
                             # NIC cleanup failed, try cleaning up the VHD.
+                            success = False
                             break
 
         # Optionally clean up OS disk VHD.
@@ -766,9 +776,10 @@ class AzureNodeDriver(NodeDriver):
                         # LibcloudError.  Wait a bit and try again.
                         time.sleep(10)
                     else:
-                        raise
+                        success = False
+                        break
 
-        return True
+        return success
 
     def create_volume(self, size, name, location=None, snapshot=None,
                       ex_resource_group=None, ex_account_type=None,

http://git-wip-us.apache.org/repos/asf/libcloud/blob/fbf015d0/libcloud/test/compute/test_azure_arm.py
----------------------------------------------------------------------
diff --git a/libcloud/test/compute/test_azure_arm.py b/libcloud/test/compute/test_azure_arm.py
index af1776e..b12ee93 100644
--- a/libcloud/test/compute/test_azure_arm.py
+++ b/libcloud/test/compute/test_azure_arm.py
@@ -21,6 +21,7 @@ from datetime import datetime
 import mock
 
 from libcloud.common.exceptions import BaseHTTPError
+from libcloud.common.types import LibcloudError
 from libcloud.compute.base import (NodeLocation, NodeSize, VolumeSnapshot,
                                    StorageVolume)
 from libcloud.compute.drivers.azure_arm import AzureImage, NodeAuthPassword
@@ -147,6 +148,23 @@ class AzureNodeDriverTests(LibcloudTestCase):
         ret = self.driver.destroy_node(node)
         self.assertTrue(ret)
 
+    def test_destroy_node__node_not_found(self):
+        """
+        This simulates the case when destroy_node is being called for the 2nd
+        time because some related resource failed to clean up, so the DELETE
+        operation on the node will return 404 (because it was already deleted)
+        but the method should return success.
+        """
+        def error(e, **kwargs):
+            raise e(**kwargs)
+        node = self.driver.list_nodes()[0]
+        AzureMockHttp.responses = [
+            # 404 (Not Found) to the DELETE request
+            lambda f: error(BaseHTTPError, code=404, message='Not found'),
+        ]
+        ret = self.driver.destroy_node(node)
+        self.assertTrue(ret)
+
     @mock.patch('time.sleep', return_value=None)
     def test_destroy_node__async(self, time_sleep_mock):
         def error(e, **kwargs):
@@ -172,10 +190,10 @@ class AzureNodeDriverTests(LibcloudTestCase):
             # 404 means node destroyed successfully
             lambda f: error(BaseHTTPError, code=404, message='Not found'),
             # 500 - transient error when trying to clean up the NIC
-            lambda f: error(BaseHTTPError, code=500, message="Cloud weather")
+            lambda f: error(BaseHTTPError, code=500, message="Cloud weather"),
         ]
         ret = self.driver.destroy_node(node)
-        self.assertTrue(ret)
+        self.assertFalse(ret)
 
     def test_destroy_node__failed(self):
         def error(e, **kwargs):


Mime
View raw message