aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject git commit: Adding support for transient storage errors.
Date Thu, 25 Sep 2014 18:34:51 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master caf7e1702 -> a64bd057c


Adding support for transient storage errors.

Bugs closed: AURORA-187

Reviewed at https://reviews.apache.org/r/25970/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/a64bd057
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/a64bd057
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/a64bd057

Branch: refs/heads/master
Commit: a64bd057c408dfaac5f12cd1441b32decfcb4099
Parents: caf7e17
Author: Maxim Khutornenko <maxim@apache.org>
Authored: Thu Sep 25 11:34:14 2014 -0700
Committer: Maxim Khutornenko <maxim@apache.org>
Committed: Thu Sep 25 11:34:14 2014 -0700

----------------------------------------------------------------------
 .../storage/CallOrderEnforcingStorage.java      |  2 +-
 .../aurora/scheduler/storage/Storage.java       |  9 ++++++++
 .../thrift/aop/LoggingInterceptor.java          |  4 ++++
 .../aurora/client/api/scheduler_client.py       | 11 ++++++++--
 .../thrift/org/apache/aurora/gen/api.thrift     |  4 +++-
 .../thrift/SchedulerThriftInterfaceTest.java    | 23 ++++++++++++++++++++
 .../aurora/client/api/test_scheduler_client.py  | 22 +++++++++++++++++++
 7 files changed, 71 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/main/java/org/apache/aurora/scheduler/storage/CallOrderEnforcingStorage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/storage/CallOrderEnforcingStorage.java
b/src/main/java/org/apache/aurora/scheduler/storage/CallOrderEnforcingStorage.java
index 671dbd1..0d02207 100644
--- a/src/main/java/org/apache/aurora/scheduler/storage/CallOrderEnforcingStorage.java
+++ b/src/main/java/org/apache/aurora/scheduler/storage/CallOrderEnforcingStorage.java
@@ -75,7 +75,7 @@ public class CallOrderEnforcingStorage implements NonVolatileStorage {
 
   private void checkInState(State state) throws StorageException {
     if (stateMachine.getState() != state) {
-      throw new StorageException("Storage is not " + state);
+      throw new TransientStorageException("Storage is not " + state);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/main/java/org/apache/aurora/scheduler/storage/Storage.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/storage/Storage.java b/src/main/java/org/apache/aurora/scheduler/storage/Storage.java
index 775564e..682bca8 100644
--- a/src/main/java/org/apache/aurora/scheduler/storage/Storage.java
+++ b/src/main/java/org/apache/aurora/scheduler/storage/Storage.java
@@ -166,6 +166,15 @@ public interface Storage {
   }
 
   /**
+   * Indicates that stable storage is temporarily unavailable.
+   */
+  class TransientStorageException extends StorageException {
+    public TransientStorageException(String message) {
+      super(message);
+    }
+  }
+
+  /**
    * Executes the unit of read-only {@code work}.  All data in the stores may be expected
to be
    * consistent, as the invocation is mutually exclusive of any writes.
    *

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/main/java/org/apache/aurora/scheduler/thrift/aop/LoggingInterceptor.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/thrift/aop/LoggingInterceptor.java
b/src/main/java/org/apache/aurora/scheduler/thrift/aop/LoggingInterceptor.java
index a21ab90..cad63c7 100644
--- a/src/main/java/org/apache/aurora/scheduler/thrift/aop/LoggingInterceptor.java
+++ b/src/main/java/org/apache/aurora/scheduler/thrift/aop/LoggingInterceptor.java
@@ -32,6 +32,7 @@ import org.apache.aurora.gen.ExecutorConfig;
 import org.apache.aurora.gen.JobConfiguration;
 import org.apache.aurora.gen.ResponseCode;
 import org.apache.aurora.gen.SessionKey;
+import org.apache.aurora.scheduler.storage.Storage;
 import org.apache.aurora.scheduler.thrift.Util;
 
 /**
@@ -85,6 +86,9 @@ class LoggingInterceptor implements MethodInterceptor {
     LOG.info(message);
     try {
       return invocation.proceed();
+    } catch (Storage.TransientStorageException e) {
+      LOG.log(Level.WARNING, "Uncaught transient exception while handling " + message, e);
+      return Util.addMessage(Util.emptyResponse(), ResponseCode.ERROR_TRANSIENT, e);
     } catch (RuntimeException e) {
       LOG.log(Level.WARNING, "Uncaught exception while handling " + message, e);
       return Util.addMessage(Util.emptyResponse(), ResponseCode.ERROR, e);

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/main/python/apache/aurora/client/api/scheduler_client.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/scheduler_client.py b/src/main/python/apache/aurora/client/api/scheduler_client.py
index ec63251..b400cb2 100644
--- a/src/main/python/apache/aurora/client/api/scheduler_client.py
+++ b/src/main/python/apache/aurora/client/api/scheduler_client.py
@@ -31,6 +31,7 @@ from apache.aurora.common.transport import TRequestsTransport
 
 from gen.apache.aurora.api import AuroraAdmin, ReadOnlyScheduler
 from gen.apache.aurora.api.constants import CURRENT_API_VERSION
+from gen.apache.aurora.api.ttypes import ResponseCode
 
 try:
   from urlparse import urljoin
@@ -192,6 +193,7 @@ class SchedulerProxy(object):
 
   class Error(Exception): pass
   class TimeoutError(Error): pass
+  class TransientError(Error): pass
   class AuthenticationError(Error): pass
   class APIVersionError(Error): pass
   class ThriftInternalError(Error): pass
@@ -288,8 +290,13 @@ class SchedulerProxy(object):
             method = getattr(self.client(), method_name)
             if not callable(method):
               return method
-            return method(*(args + auth_args))
-          except (TTransport.TTransportException, self.TimeoutError) as e:
+
+            resp = method(*(args + auth_args))
+            if resp is not None and resp.responseCode == ResponseCode.ERROR_TRANSIENT:
+              raise self.TransientError(", ".join(
+                  [m for m in resp.details] if resp.details else []))
+            return resp
+          except (TTransport.TTransportException, self.TimeoutError, self.TransientError)
as e:
             if not self._terminating:
               log.warning('Connection error with scheduler: %s, reconnecting...' % e)
               self.invalidate()

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/main/thrift/org/apache/aurora/gen/api.thrift
----------------------------------------------------------------------
diff --git a/src/main/thrift/org/apache/aurora/gen/api.thrift b/src/main/thrift/org/apache/aurora/gen/api.thrift
index 85d591b..cebd2c3 100644
--- a/src/main/thrift/org/apache/aurora/gen/api.thrift
+++ b/src/main/thrift/org/apache/aurora/gen/api.thrift
@@ -28,7 +28,9 @@ enum ResponseCode {
   WARNING         = 3,
   AUTH_FAILED     = 4,
   /** Raised when a Lock-protected operation failed due to lock validation. */
-  LOCK_ERROR      = 5
+  LOCK_ERROR      = 5,
+  /** Raised when a scheduler is transiently unavailable and later retry is recommended.
*/
+  ERROR_TRANSIENT = 6
 }
 
 const i32 THRIFT_API_VERSION = 3

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/test/java/org/apache/aurora/scheduler/thrift/SchedulerThriftInterfaceTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/thrift/SchedulerThriftInterfaceTest.java
b/src/test/java/org/apache/aurora/scheduler/thrift/SchedulerThriftInterfaceTest.java
index 4ef78db..a894a3a 100644
--- a/src/test/java/org/apache/aurora/scheduler/thrift/SchedulerThriftInterfaceTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/thrift/SchedulerThriftInterfaceTest.java
@@ -152,6 +152,7 @@ import static org.apache.aurora.gen.MaintenanceMode.NONE;
 import static org.apache.aurora.gen.MaintenanceMode.SCHEDULED;
 import static org.apache.aurora.gen.ResponseCode.AUTH_FAILED;
 import static org.apache.aurora.gen.ResponseCode.ERROR;
+import static org.apache.aurora.gen.ResponseCode.ERROR_TRANSIENT;
 import static org.apache.aurora.gen.ResponseCode.INVALID_REQUEST;
 import static org.apache.aurora.gen.ResponseCode.LOCK_ERROR;
 import static org.apache.aurora.gen.ResponseCode.OK;
@@ -1847,6 +1848,28 @@ public class SchedulerThriftInterfaceTest extends EasyMockTest {
   }
 
   @Test
+  public void testAddInstancesFailsWithTransient() throws Exception {
+    AddInstancesConfig config = createInstanceConfig(defaultTask(true));
+    expectAuth(ROLE, true);
+    expect(cronJobManager.hasJob(JOB_KEY)).andThrow(new Storage.TransientStorageException("retry"));
+
+    control.replay();
+
+    assertResponse(ERROR_TRANSIENT, thrift.addInstances(config, LOCK.newBuilder(), SESSION));
+  }
+
+  @Test
+  public void testAddInstancesFailsWithNonTransient() throws Exception {
+    AddInstancesConfig config = createInstanceConfig(defaultTask(true));
+    expectAuth(ROLE, true);
+    expect(cronJobManager.hasJob(JOB_KEY)).andThrow(new Storage.StorageException("no retry"));
+
+    control.replay();
+
+    assertResponse(ERROR, thrift.addInstances(config, LOCK.newBuilder(), SESSION));
+  }
+
+  @Test
   public void testAddInstancesLockCheckFails() throws Exception {
     AddInstancesConfig config = createInstanceConfig(defaultTask(true));
     expectAuth(ROLE, true);

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/a64bd057/src/test/python/apache/aurora/client/api/test_scheduler_client.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/test_scheduler_client.py b/src/test/python/apache/aurora/client/api/test_scheduler_client.py
index 527448f..1cbfbf8 100644
--- a/src/test/python/apache/aurora/client/api/test_scheduler_client.py
+++ b/src/test/python/apache/aurora/client/api/test_scheduler_client.py
@@ -343,3 +343,25 @@ def test_connect_scheduler(mock_client):
   assert mock_client.return_value.open.call_count == 2
   mock_time.sleep.assert_called_once_with(
       scheduler_client.SchedulerClient.RETRY_TIMEOUT.as_(Time.SECONDS))
+
+
+@mock.patch('apache.aurora.client.api.scheduler_client.SchedulerClient',
+            spec=scheduler_client.SchedulerClient)
+def test_transient_error(client):
+  mock_scheduler_client = mock.Mock(spec=scheduler_client.SchedulerClient)
+  mock_thrift_client = mock.Mock(spec=AuroraAdmin.Client)
+  mock_thrift_client.killTasks.side_effect = [
+      Response(responseCode=ResponseCode.ERROR_TRANSIENT, details=["message1", "message2"]),
+      Response(responseCode=ResponseCode.ERROR_TRANSIENT),
+      Response(responseCode=ResponseCode.OK)]
+
+  mock_thrift_client.killTasks.return_value = Response(
+      responseCode=ResponseCode.OK, messageDEPRECATED="ok")
+
+  mock_scheduler_client.get_thrift_client.return_value = mock_thrift_client
+  client.get.return_value = mock_scheduler_client
+
+  proxy = TestSchedulerProxy('local')
+  proxy.killTasks(TaskQuery())
+
+  assert mock_thrift_client.killTasks.call_count == 3


Mime
View raw message