aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wfar...@apache.org
Subject git commit: Instrument task scheduling code to give visibility into failed searches and backoffs.
Date Mon, 22 Sep 2014 18:47:53 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master 8984fce34 -> 2b2c9910a


Instrument task scheduling code to give visibility into failed searches and backoffs.

Reviewed at https://reviews.apache.org/r/25872/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/2b2c9910
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/2b2c9910
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/2b2c9910

Branch: refs/heads/master
Commit: 2b2c9910a92a4bad02078aeeb7f3d532ac0f1c5d
Parents: 8984fce
Author: Bill Farner <wfarner@apache.org>
Authored: Mon Sep 22 11:45:36 2014 -0700
Committer: Bill Farner <wfarner@apache.org>
Committed: Mon Sep 22 11:45:36 2014 -0700

----------------------------------------------------------------------
 .../org/apache/aurora/scheduler/async/TaskGroups.java   |  7 +++++++
 .../apache/aurora/scheduler/async/TaskScheduler.java    | 12 +++++++-----
 2 files changed, 14 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/2b2c9910/src/main/java/org/apache/aurora/scheduler/async/TaskGroups.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/async/TaskGroups.java b/src/main/java/org/apache/aurora/scheduler/async/TaskGroups.java
index f64497f..8fb75b0 100644
--- a/src/main/java/org/apache/aurora/scheduler/async/TaskGroups.java
+++ b/src/main/java/org/apache/aurora/scheduler/async/TaskGroups.java
@@ -32,6 +32,7 @@ import com.twitter.common.application.ShutdownRegistry;
 import com.twitter.common.base.Command;
 import com.twitter.common.quantity.Amount;
 import com.twitter.common.quantity.Time;
+import com.twitter.common.stats.SlidingStats;
 import com.twitter.common.stats.Stats;
 import com.twitter.common.util.BackoffStrategy;
 import com.twitter.common.util.concurrent.ExecutorServiceShutdown;
@@ -70,6 +71,11 @@ public class TaskGroups implements EventSubscriber {
   private final BackoffStrategy backoff;
   private final RescheduleCalculator rescheduleCalculator;
 
+  // Track the penalties of tasks at the time they were scheduled. This is to provide data
that
+  // may influence the selection of a different backoff strategy.
+  private final SlidingStats scheduledTaskPenalties =
+      new SlidingStats("scheduled_task_penalty", "ms");
+
   public static class TaskGroupsSettings {
     private final BackoffStrategy taskGroupBackoff;
     private final RateLimiter rateLimiter;
@@ -136,6 +142,7 @@ public class TaskGroups implements EventSubscriber {
         long penaltyMs = 0;
         if (taskId.isPresent()) {
           if (taskScheduler.schedule(taskId.get())) {
+            scheduledTaskPenalties.accumulate(group.getPenaltyMs());
             group.remove(taskId.get());
             if (group.hasMore()) {
               penaltyMs = backoff.calculateBackoffMs(0);

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/2b2c9910/src/main/java/org/apache/aurora/scheduler/async/TaskScheduler.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/async/TaskScheduler.java b/src/main/java/org/apache/aurora/scheduler/async/TaskScheduler.java
index 882cdfd..d9c4c02 100644
--- a/src/main/java/org/apache/aurora/scheduler/async/TaskScheduler.java
+++ b/src/main/java/org/apache/aurora/scheduler/async/TaskScheduler.java
@@ -104,8 +104,9 @@ public interface TaskScheduler extends EventSubscriber {
     private final Preemptor preemptor;
     private final Reservations reservations;
 
-    private final AtomicLong scheduleAttemptsFired = Stats.exportLong("schedule_attempts_fired");
-    private final AtomicLong scheduleAttemptsFailed = Stats.exportLong("schedule_attempts_failed");
+    private final AtomicLong attemptsFired = Stats.exportLong("schedule_attempts_fired");
+    private final AtomicLong attemptsFailed = Stats.exportLong("schedule_attempts_failed");
+    private final AtomicLong attemptsNoMatch = Stats.exportLong("schedule_attempts_no_match");
 
     @Inject
     TaskSchedulerImpl(
@@ -176,7 +177,7 @@ public interface TaskScheduler extends EventSubscriber {
     @Timed("task_schedule_attempt")
     @Override
     public boolean schedule(final String taskId) {
-      scheduleAttemptsFired.incrementAndGet();
+      attemptsFired.incrementAndGet();
       try {
         return storage.write(new MutateWork.Quiet<Boolean>() {
           @Override
@@ -194,11 +195,12 @@ public interface TaskScheduler extends EventSubscriber {
                 if (!offerQueue.launchFirst(getAssignerFunction(aggregate, taskId, task)))
{
                   // Task could not be scheduled.
                   maybePreemptFor(taskId, aggregate);
+                  attemptsNoMatch.incrementAndGet();
                   return false;
                 }
               } catch (OfferQueue.LaunchException e) {
                 LOG.log(Level.WARNING, "Failed to launch task.", e);
-                scheduleAttemptsFailed.incrementAndGet();
+                attemptsFailed.incrementAndGet();
 
                 // The attempt to schedule the task failed, so we need to backpedal on the
                 // assignment.
@@ -216,7 +218,7 @@ public interface TaskScheduler extends EventSubscriber {
         // We catch the generic unchecked exception here to ensure tasks are not abandoned
         // if there is a transient issue resulting in an unchecked exception.
         LOG.log(Level.WARNING, "Task scheduling unexpectedly failed, will be retried", e);
-        scheduleAttemptsFailed.incrementAndGet();
+        attemptsFailed.incrementAndGet();
         return false;
       }
     }


Mime
View raw message