aurora-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wfar...@apache.org
Subject incubator-aurora git commit: Remove shard uniqueness check from scheduler recovery phase.
Date Tue, 03 Feb 2015 01:00:24 GMT
Repository: incubator-aurora
Updated Branches:
  refs/heads/master a67458136 -> 2742120e2


Remove shard uniqueness check from scheduler recovery phase.

Bugs closed: AURORA-1090

Reviewed at https://reviews.apache.org/r/30535/


Project: http://git-wip-us.apache.org/repos/asf/incubator-aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-aurora/commit/2742120e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-aurora/tree/2742120e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-aurora/diff/2742120e

Branch: refs/heads/master
Commit: 2742120e2a8bfecd233cc08979d440129dda3496
Parents: a674581
Author: Bill Farner <wfarner@apache.org>
Authored: Mon Feb 2 16:58:25 2015 -0800
Committer: Bill Farner <wfarner@apache.org>
Committed: Mon Feb 2 16:58:25 2015 -0800

----------------------------------------------------------------------
 .../scheduler/storage/StorageBackfill.java      | 51 --------------------
 .../scheduler/storage/StorageBackfillTest.java  | 36 --------------
 2 files changed, 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/2742120e/src/main/java/org/apache/aurora/scheduler/storage/StorageBackfill.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/aurora/scheduler/storage/StorageBackfill.java b/src/main/java/org/apache/aurora/scheduler/storage/StorageBackfill.java
index 1814658..52377bc 100644
--- a/src/main/java/org/apache/aurora/scheduler/storage/StorageBackfill.java
+++ b/src/main/java/org/apache/aurora/scheduler/storage/StorageBackfill.java
@@ -13,25 +13,18 @@
  */
 package org.apache.aurora.scheduler.storage;
 
-import java.util.Set;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.logging.Logger;
 
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Sets;
 import com.twitter.common.stats.Stats;
 import com.twitter.common.util.Clock;
 
 import org.apache.aurora.gen.JobConfiguration;
 import org.apache.aurora.gen.JobKey;
-import org.apache.aurora.gen.ScheduleStatus;
 import org.apache.aurora.gen.ScheduledTask;
 import org.apache.aurora.gen.TaskConfig;
-import org.apache.aurora.gen.TaskEvent;
 import org.apache.aurora.scheduler.base.JobKeys;
 import org.apache.aurora.scheduler.base.Query;
-import org.apache.aurora.scheduler.base.Tasks;
 import org.apache.aurora.scheduler.configuration.ConfigurationManager;
 import org.apache.aurora.scheduler.storage.Storage.MutableStoreProvider;
 import org.apache.aurora.scheduler.storage.TaskStore.Mutable.TaskMutation;
@@ -45,9 +38,6 @@ public final class StorageBackfill {
 
   private static final Logger LOG = Logger.getLogger(StorageBackfill.class.getName());
 
-  private static final AtomicLong SHARD_SANITY_CHECK_FAILS =
-      Stats.exportLong("shard_sanity_check_failures");
-
   private static final AtomicLong BACKFILLED_TASK_CONFIG_KEYS =
       Stats.exportLong("task_config_keys_backfilled");
 
@@ -68,44 +58,6 @@ public final class StorageBackfill {
     }
   }
 
-  private static void guaranteeShardUniqueness(
-      ScheduledTask task,
-      TaskStore.Mutable taskStore,
-      Clock clock) {
-
-    if (Tasks.isActive(task.getStatus())) {
-      // Perform a sanity check on the number of active shards.
-      Query.Builder query = Query.instanceScoped(
-          IJobKey.build(task.getAssignedTask().getTask().getJob()),
-          task.getAssignedTask().getInstanceId())
-          .active();
-      Set<String> activeTasksInShard = FluentIterable.from(taskStore.fetchTasks(query))
-          .transform(Tasks.SCHEDULED_TO_ID)
-          .toSet();
-
-      if (activeTasksInShard.size() > 1) {
-        SHARD_SANITY_CHECK_FAILS.incrementAndGet();
-        LOG.severe("Active shard sanity check failed when loading " + Tasks.id(task)
-            + ", active tasks found: " + activeTasksInShard);
-
-        // We want to keep exactly one task from this shard, so sort the IDs and keep the
-        // highest (newest) in the hopes that it is legitimately running.
-        String newestTask = Iterables.getLast(Sets.newTreeSet(activeTasksInShard));
-        if (Tasks.id(task).equals(newestTask)) {
-          LOG.info("Retaining task " + Tasks.id(task));
-        } else {
-          task.setStatus(ScheduleStatus.KILLED);
-          task.addToTaskEvents(new TaskEvent(clock.nowMillis(), ScheduleStatus.KILLED)
-              .setMessage("Killed duplicate shard."));
-          // TODO(wfarner); Circle back if this is necessary.  Currently there's a race
-          // condition between the time the scheduler is actually available without hitting
-          // IllegalStateException (see DriverImpl).
-          // driver.killTask(Tasks.id(task));
-        }
-      }
-    }
-  }
-
   private static void populateJobKey(TaskConfig config, AtomicLong counter) {
     if (!config.isSetJob() || !JobKeys.isValid(IJobKey.build(config.getJob()))) {
       config.setJob(new JobKey()
@@ -145,9 +97,6 @@ public final class StorageBackfill {
       public IScheduledTask apply(final IScheduledTask task) {
         ScheduledTask builder = task.newBuilder();
         ConfigurationManager.applyDefaultsIfUnset(builder.getAssignedTask().getTask());
-        // TODO(ksweeney): Guarantee tasks pass current validation code here and quarantine
if they
-        // don't.
-        guaranteeShardUniqueness(builder, storeProvider.getUnsafeTaskStore(), clock);
         return IScheduledTask.build(builder);
       }
     });

http://git-wip-us.apache.org/repos/asf/incubator-aurora/blob/2742120e/src/test/java/org/apache/aurora/scheduler/storage/StorageBackfillTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/aurora/scheduler/storage/StorageBackfillTest.java b/src/test/java/org/apache/aurora/scheduler/storage/StorageBackfillTest.java
index 93773eb..493150b 100644
--- a/src/test/java/org/apache/aurora/scheduler/storage/StorageBackfillTest.java
+++ b/src/test/java/org/apache/aurora/scheduler/storage/StorageBackfillTest.java
@@ -14,9 +14,7 @@
 package org.apache.aurora.scheduler.storage;
 
 import java.util.Set;
-import java.util.concurrent.atomic.AtomicInteger;
 
-import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
@@ -49,7 +47,6 @@ import org.apache.aurora.scheduler.storage.mem.MemStorage;
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.apache.aurora.gen.ScheduleStatus.KILLED;
 import static org.apache.aurora.gen.ScheduleStatus.PENDING;
 import static org.apache.aurora.gen.ScheduleStatus.RUNNING;
 import static org.junit.Assert.assertEquals;
@@ -108,39 +105,6 @@ public class StorageBackfillTest {
   }
 
   @Test
-  public void testShardUniquenessCorrection() throws Exception {
-    final AtomicInteger taskId = new AtomicInteger();
-
-    final TaskConfig task = defaultTask();
-    SanitizedConfiguration job = makeJob(JOB_KEY, task, 10);
-    final Set<IScheduledTask> badTasks = ImmutableSet.copyOf(Iterables.transform(
-        job.getInstanceIds(),
-        new Function<Integer, IScheduledTask>() {
-          @Override
-          public IScheduledTask apply(Integer instanceId) {
-            return IScheduledTask.build(new ScheduledTask()
-                .setStatus(RUNNING)
-                .setAssignedTask(new AssignedTask()
-                    .setInstanceId(0)
-                    .setTaskId("task-" + taskId.incrementAndGet())
-                    .setTask(task)));
-          }
-        }));
-
-    storage.write(new Storage.MutateWork.NoResult.Quiet() {
-      @Override
-      protected void execute(Storage.MutableStoreProvider storeProvider) {
-        storeProvider.getUnsafeTaskStore().saveTasks(badTasks);
-      }
-    });
-
-    backfill();
-
-    assertEquals(1, getTasksByStatus(RUNNING).size());
-    assertEquals(9, getTasksByStatus(KILLED).size());
-  }
-
-  @Test
   public void testJobConfigurationBackfill() throws Exception {
     final JobConfiguration config = makeJobConfig(JOB_KEY, defaultTask(), 1);
     SanitizedConfiguration expected =


Mime
View raw message