aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wfar...@apache.org
Subject git commit: Add updater support for disabling rollback and batched update settings.
Date Sat, 27 Sep 2014 07:37:05 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master fffd7c227 -> e7f050003


Add updater support for disabling rollback and batched update settings.

Bugs closed: AURORA-744, AURORA-747

Reviewed at https://reviews.apache.org/r/26106/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/e7f05000
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/e7f05000
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/e7f05000

Branch: refs/heads/master
Commit: e7f0500037c5496693d99d624ab22daded411ac6
Parents: fffd7c2
Author: Bill Farner <wfarner@apache.org>
Authored: Sat Sep 27 00:37:00 2014 -0700
Committer: Bill Farner <wfarner@apache.org>
Committed: Sat Sep 27 00:37:00 2014 -0700

----------------------------------------------------------------------
 .../updater/JobUpdateStateMachine.java          |  1 +
 .../aurora/scheduler/updater/UpdateFactory.java | 32 ++++---
 .../apache/aurora/client/api/updater_util.py    |  1 +
 .../storage/db/JobUpdateDetailsMapper.xml       |  8 +-
 .../aurora/scheduler/storage/db/schema.sql      |  1 +
 .../thrift/org/apache/aurora/gen/api.thrift     |  6 ++
 .../updater/JobUpdateStateMachineTest.java      |  1 +
 .../aurora/scheduler/updater/JobUpdaterIT.java  | 95 ++++++++++++++++++++
 .../python/apache/aurora/client/api/test_api.py |  3 +-
 9 files changed, 135 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/main/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachine.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachine.java
b/src/main/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachine.java
index 5dec82a..76460f9 100644
--- a/src/main/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachine.java
+++ b/src/main/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachine.java
@@ -57,6 +57,7 @@ final class JobUpdateStateMachine {
               ROLL_FORWARD_PAUSED,
               ROLLED_FORWARD,
               ABORTED,
+              FAILED,
               ERROR)
           .putAll(ROLLING_BACK, ROLL_BACK_PAUSED, ROLLED_BACK, ABORTED, ERROR, FAILED)
           .putAll(ROLL_FORWARD_PAUSED, ROLLING_FORWARD, ABORTED, ERROR)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java b/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java
index f4aefb2..62d08ad 100644
--- a/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java
+++ b/src/main/java/org/apache/aurora/scheduler/updater/UpdateFactory.java
@@ -36,6 +36,7 @@ import org.apache.aurora.scheduler.storage.entities.IJobUpdateSettings;
 import org.apache.aurora.scheduler.storage.entities.IRange;
 import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
 import org.apache.aurora.scheduler.storage.entities.ITaskConfig;
+import org.apache.aurora.scheduler.updater.strategy.BatchStrategy;
 import org.apache.aurora.scheduler.updater.strategy.QueueStrategy;
 import org.apache.aurora.scheduler.updater.strategy.UpdateStrategy;
 
@@ -128,17 +129,22 @@ interface UpdateFactory {
           ? Ordering.<Integer>natural()
           : Ordering.<Integer>natural().reverse();
 
-      // TODO(wfarner): Add the batch_completion flag to JobUpdateSettings and pick correct
-      // strategy.
-      UpdateStrategy<Integer> strategy =
-          new QueueStrategy<>(updateOrder, settings.getUpdateGroupSize());
+      UpdateStrategy<Integer> strategy = settings.isWaitForBatchCompletion()
+          ? new BatchStrategy<>(updateOrder, settings.getUpdateGroupSize())
+          : new QueueStrategy<>(updateOrder, settings.getUpdateGroupSize());
+      JobUpdateStatus successStatus =
+          rollingForward ? JobUpdateStatus.ROLLED_FORWARD : JobUpdateStatus.ROLLED_BACK;
+      JobUpdateStatus failureStatus = rollingForward && settings.isRollbackOnFailure()
+          ? JobUpdateStatus.ROLLING_BACK
+          : JobUpdateStatus.FAILED;
 
       return new Update(
           new OneWayJobUpdater<>(
               strategy,
               settings.getMaxFailedInstances(),
               evaluators.build()),
-          rollingForward);
+          successStatus,
+          failureStatus);
     }
 
     @VisibleForTesting
@@ -171,11 +177,17 @@ interface UpdateFactory {
 
   class Update {
     private final OneWayJobUpdater<Integer, Optional<IScheduledTask>> updater;
-    private final boolean rollingForward;
+    private final JobUpdateStatus successStatus;
+    private final JobUpdateStatus failureStatus;
+
+    Update(
+        OneWayJobUpdater<Integer, Optional<IScheduledTask>> updater,
+        JobUpdateStatus successStatus,
+        JobUpdateStatus failureStatus) {
 
-    Update(OneWayJobUpdater<Integer, Optional<IScheduledTask>> updater, boolean
rollingForward) {
       this.updater = requireNonNull(updater);
-      this.rollingForward = rollingForward;
+      this.successStatus = requireNonNull(successStatus);
+      this.failureStatus = requireNonNull(failureStatus);
     }
 
     OneWayJobUpdater<Integer, Optional<IScheduledTask>> getUpdater() {
@@ -183,11 +195,11 @@ interface UpdateFactory {
     }
 
     JobUpdateStatus getSuccessStatus() {
-      return rollingForward ? JobUpdateStatus.ROLLED_FORWARD : JobUpdateStatus.ROLLED_BACK;
+      return successStatus;
     }
 
     JobUpdateStatus getFailureStatus() {
-      return rollingForward ? JobUpdateStatus.ROLLING_BACK : JobUpdateStatus.FAILED;
+      return failureStatus;
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/main/python/apache/aurora/client/api/updater_util.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/aurora/client/api/updater_util.py b/src/main/python/apache/aurora/client/api/updater_util.py
index 06eaf3d..9bfc27d 100644
--- a/src/main/python/apache/aurora/client/api/updater_util.py
+++ b/src/main/python/apache/aurora/client/api/updater_util.py
@@ -94,6 +94,7 @@ class UpdaterConfig(object):
         maxWaitToInstanceRunningMs=self.restart_threshold*1000,
         minWaitInInstanceRunningMs=self.watch_secs*1000,
         rollbackOnFailure=self.rollback_on_failure,
+        waitForBatchCompletion=self.wait_for_batch_completion,
         updateOnlyTheseInstances=self.instances_to_ranges(instances) if instances else None)
 
 

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/main/resources/org/apache/aurora/scheduler/storage/db/JobUpdateDetailsMapper.xml
----------------------------------------------------------------------
diff --git a/src/main/resources/org/apache/aurora/scheduler/storage/db/JobUpdateDetailsMapper.xml
b/src/main/resources/org/apache/aurora/scheduler/storage/db/JobUpdateDetailsMapper.xml
index 2794242..631ab25 100644
--- a/src/main/resources/org/apache/aurora/scheduler/storage/db/JobUpdateDetailsMapper.xml
+++ b/src/main/resources/org/apache/aurora/scheduler/storage/db/JobUpdateDetailsMapper.xml
@@ -35,7 +35,8 @@
       max_failed_instances,
       max_wait_to_instance_running_ms,
       min_wait_in_instance_running_ms,
-      rollback_on_failure
+      rollback_on_failure,
+      wait_for_batch_completion
     ) VALUES (
       (
         SELECT ID
@@ -51,7 +52,8 @@
       #{instructions.settings.maxFailedInstances},
       #{instructions.settings.maxWaitToInstanceRunningMs},
       #{instructions.settings.minWaitInInstanceRunningMs},
-      #{instructions.settings.rollbackOnFailure}
+      #{instructions.settings.rollbackOnFailure},
+      #{instructions.settings.waitForBatchCompletion},
     )
   </insert>
 
@@ -301,6 +303,7 @@
       u.max_wait_to_instance_running_ms AS jui_juse_max_wait_to_instance_running_ms,
       u.min_wait_in_instance_running_ms AS jui_juse_min_wait_in_instance_running_ms,
       u.rollback_on_failure AS jui_juse_rollback_on_failure,
+      u.wait_for_batch_completion AS jui_juse_wait_for_batch_completion,
       u.id AS jui_id,
       cn.id AS jui_ditc_id,
       cn.task_config AS jui_ditc_task,
@@ -361,6 +364,7 @@
       u.max_wait_to_instance_running_ms AS juse_max_wait_to_instance_running_ms,
       u.min_wait_in_instance_running_ms AS juse_min_wait_in_instance_running_ms,
       u.rollback_on_failure AS juse_rollback_on_failure,
+      u.wait_for_batch_completion AS juse_wait_for_batch_completion,
       u.id AS id,
       cn.id AS ditc_id,
       cn.task_config AS ditc_task,

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/main/resources/org/apache/aurora/scheduler/storage/db/schema.sql
----------------------------------------------------------------------
diff --git a/src/main/resources/org/apache/aurora/scheduler/storage/db/schema.sql b/src/main/resources/org/apache/aurora/scheduler/storage/db/schema.sql
index 866e658..2894b61 100644
--- a/src/main/resources/org/apache/aurora/scheduler/storage/db/schema.sql
+++ b/src/main/resources/org/apache/aurora/scheduler/storage/db/schema.sql
@@ -105,6 +105,7 @@ CREATE TABLE job_updates(
   max_wait_to_instance_running_ms INT NOT NULL,
   min_wait_in_instance_running_ms INT NOT NULL,
   rollback_on_failure BOOLEAN NOT NULL,
+  wait_for_batch_completion BOOLEAN NOT NULL,
 
   UNIQUE(update_id)
 );

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/main/thrift/org/apache/aurora/gen/api.thrift
----------------------------------------------------------------------
diff --git a/src/main/thrift/org/apache/aurora/gen/api.thrift b/src/main/thrift/org/apache/aurora/gen/api.thrift
index cebd2c3..a1217ed 100644
--- a/src/main/thrift/org/apache/aurora/gen/api.thrift
+++ b/src/main/thrift/org/apache/aurora/gen/api.thrift
@@ -606,6 +606,12 @@ struct JobUpdateSettings {
 
   /** Instance IDs to act on. All instances will be affected if this is not set. */
   7: set<Range> updateOnlyTheseInstances
+
+  /**
+   * If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to
another
+   * batch until the preceding batch finishes updating.
+   */
+  8: bool waitForBatchCompletion
 }
 
 /** Event marking a state transition in job update lifecycle. */

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/test/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachineTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachineTest.java
b/src/test/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachineTest.java
index 7e41843..89765ac 100644
--- a/src/test/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachineTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/updater/JobUpdateStateMachineTest.java
@@ -46,6 +46,7 @@ public class JobUpdateStateMachineTest {
           .put(Pair.of(ROLLING_FORWARD, ROLLED_FORWARD), STOP_WATCHING)
           .put(Pair.of(ROLLING_FORWARD, ABORTED), STOP_WATCHING)
           .put(Pair.of(ROLLING_FORWARD, ERROR), STOP_WATCHING)
+          .put(Pair.of(ROLLING_FORWARD, FAILED), STOP_WATCHING)
           .put(Pair.of(ROLLING_BACK, ROLL_BACK_PAUSED), STOP_WATCHING)
           .put(Pair.of(ROLLING_BACK, ROLLED_BACK), STOP_WATCHING)
           .put(Pair.of(ROLLING_BACK, ABORTED), STOP_WATCHING)

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java b/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java
index 5b00d3c..4934cbb 100644
--- a/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java
+++ b/src/test/java/org/apache/aurora/scheduler/updater/JobUpdaterIT.java
@@ -112,6 +112,7 @@ import static org.apache.aurora.gen.JobUpdateStatus.ROLL_BACK_PAUSED;
 import static org.apache.aurora.gen.JobUpdateStatus.ROLL_FORWARD_PAUSED;
 import static org.apache.aurora.gen.ScheduleStatus.ASSIGNED;
 import static org.apache.aurora.gen.ScheduleStatus.FAILED;
+import static org.apache.aurora.gen.ScheduleStatus.FINISHED;
 import static org.apache.aurora.gen.ScheduleStatus.KILLED;
 import static org.apache.aurora.gen.ScheduleStatus.RUNNING;
 import static org.apache.aurora.gen.ScheduleStatus.STARTING;
@@ -353,6 +354,54 @@ public class JobUpdaterIT extends EasyMockTest {
   }
 
   @Test
+  public void testSuccessfulBatchedUpdate() throws Exception {
+    expectTaskKilled().times(3);
+
+    control.replay();
+
+    JobUpdate builder = makeJobUpdate(makeInstanceConfig(0, 2, OLD_CONFIG)).newBuilder();
+    builder.getInstructions().getSettings()
+        .setWaitForBatchCompletion(true)
+        .setUpdateGroupSize(2);
+    IJobUpdate update = IJobUpdate.build(builder);
+    insertInitialTasks(update);
+
+    changeState(JOB, 0, ASSIGNED, STARTING, RUNNING);
+    changeState(JOB, 1, ASSIGNED, STARTING, RUNNING);
+    changeState(JOB, 2, ASSIGNED, STARTING, RUNNING);
+    clock.advance(WATCH_TIMEOUT);
+
+    ImmutableMultimap.Builder<Integer, JobUpdateAction> actions = ImmutableMultimap.builder();
+
+    // Instances 0 and 1 are updated.
+    updater.start(update, USER);
+    actions.putAll(0, INSTANCE_UPDATING)
+        .putAll(1, INSTANCE_UPDATING);
+    assertState(ROLLING_FORWARD, actions.build());
+    changeState(JOB, 1, FINISHED, ASSIGNED, STARTING, RUNNING);
+    clock.advance(Amount.of(RUNNING_TIMEOUT.getValue() / 2, Time.MILLISECONDS));
+    changeState(JOB, 0, FINISHED, ASSIGNED, STARTING, RUNNING);
+    clock.advance(
+        Amount.of(WATCH_TIMEOUT.getValue() - (RUNNING_TIMEOUT.getValue() / 2), Time.MILLISECONDS));
+
+    // Instance 1 finished first, but update does not yet proceed until 0 finishes.
+    actions.putAll(1, INSTANCE_UPDATED);
+    assertState(ROLLING_FORWARD, actions.build());
+    clock.advance(WATCH_TIMEOUT);
+    actions.putAll(0, INSTANCE_UPDATED);
+
+    // Instance 2 is updated.
+    changeState(JOB, 2, FINISHED, ASSIGNED, STARTING, RUNNING);
+    clock.advance(WATCH_TIMEOUT);
+    actions.putAll(2, INSTANCE_UPDATING, INSTANCE_UPDATED);
+    assertState(ROLLED_FORWARD, actions.build());
+
+    assertJobState(
+        JOB,
+        ImmutableMap.of(0, NEW_CONFIG, 1, NEW_CONFIG, 2, NEW_CONFIG));
+  }
+
+  @Test
   public void testUpdateSpecificInstances() throws Exception {
     expectTaskKilled();
 
@@ -451,6 +500,51 @@ public class JobUpdaterIT extends EasyMockTest {
   }
 
   @Test
+  public void testRollbackDisabled() throws Exception {
+    expectTaskKilled().times(2);
+
+    control.replay();
+
+    JobUpdate builder = makeJobUpdate(
+        makeInstanceConfig(0, 0, OLD_CONFIG),
+        makeInstanceConfig(2, 3, OLD_CONFIG))
+        .newBuilder();
+    builder.getInstructions().getSettings().setRollbackOnFailure(false);
+    IJobUpdate update = IJobUpdate.build(builder);
+    insertInitialTasks(update);
+
+    changeState(JOB, 0, ASSIGNED, STARTING, RUNNING);
+    changeState(JOB, 2, ASSIGNED, STARTING, RUNNING);
+    changeState(JOB, 3, ASSIGNED, STARTING, RUNNING);
+    clock.advance(WATCH_TIMEOUT);
+
+    ImmutableMultimap.Builder<Integer, JobUpdateAction> actions = ImmutableMultimap.builder();
+
+    // Instance 0 is updated.
+    updater.start(update, USER);
+    actions.putAll(0, INSTANCE_UPDATING);
+    assertState(ROLLING_FORWARD, actions.build());
+    changeState(JOB, 0, KILLED, ASSIGNED, STARTING, RUNNING);
+    clock.advance(WATCH_TIMEOUT);
+
+    // Instance 1 is added.
+    changeState(JOB, 1, ASSIGNED, STARTING, RUNNING);
+    actions.putAll(0, INSTANCE_UPDATED)
+        .putAll(1, INSTANCE_UPDATING, INSTANCE_UPDATED);
+    clock.advance(WATCH_TIMEOUT);
+
+    // Instance 2 is updated, but fails.
+    changeState(JOB, 2, KILLED, ASSIGNED, STARTING, RUNNING);
+    actions.putAll(2, INSTANCE_UPDATING, INSTANCE_UPDATE_FAILED);
+    clock.advance(FLAPPING_THRESHOLD);
+    changeState(JOB, 2, FAILED);
+    clock.advance(WATCH_TIMEOUT);
+
+    // Rollback is disabled, update fails.
+    assertState(JobUpdateStatus.FAILED, actions.build());
+  }
+
+  @Test
   public void testAbort() throws Exception {
     expectTaskKilled();
 
@@ -824,6 +918,7 @@ public class JobUpdaterIT extends EasyMockTest {
                 .setInstances(ImmutableSet.of(new Range(0, 2))))
             .setSettings(new JobUpdateSettings()
                 .setUpdateGroupSize(1)
+                .setRollbackOnFailure(true)
                 .setMaxWaitToInstanceRunningMs(RUNNING_TIMEOUT.as(Time.MILLISECONDS).intValue())
                 .setMinWaitInInstanceRunningMs(WATCH_TIMEOUT.as(Time.MILLISECONDS).intValue())
                 .setUpdateOnlyTheseInstances(ImmutableSet.<Range>of())));

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/e7f05000/src/test/python/apache/aurora/client/api/test_api.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/aurora/client/api/test_api.py b/src/test/python/apache/aurora/client/api/test_api.py
index e1885f8..43370d8 100644
--- a/src/test/python/apache/aurora/client/api/test_api.py
+++ b/src/test/python/apache/aurora/client/api/test_api.py
@@ -79,7 +79,8 @@ class TestJobUpdateApis(unittest.TestCase):
         maxFailedInstances=1,
         maxWaitToInstanceRunningMs=50 * 1000,
         minWaitInInstanceRunningMs=50 * 1000,
-        rollbackOnFailure=True)
+        rollbackOnFailure=True,
+        waitForBatchCompletion=False)
 
   @classmethod
   def create_update_request(cls, task_config):


Mime
View raw message