hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wan...@apache.org
Subject [6/9] hadoop git commit: YARN-3388. Allocation in LeafQueue could get stuck because DRF calculator isn't well supported when computing user-limit. (Nathan Roberts via wangda)
Date Sat, 20 Aug 2016 00:04:11 GMT
YARN-3388. Allocation in LeafQueue could get stuck because DRF calculator isn't well supported
when computing user-limit. (Nathan Roberts via wangda)

(cherry picked from commit 444b2ea7afebf9f6c3d356154b71abfd0ea95b23)
(cherry picked from commit c7d782d2f6e61103f7a009c48b11dcd64e07c2ce)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3bf2e16f
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3bf2e16f
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3bf2e16f

Branch: refs/heads/branch-2.8
Commit: 3bf2e16f76a985b97fa5ee61266236e4212a4610
Parents: e2bd490
Author: Wangda Tan <wangda@apache.org>
Authored: Fri Aug 19 16:28:32 2016 -0700
Committer: Wangda Tan <wangda@apache.org>
Committed: Fri Aug 19 16:54:03 2016 -0700

----------------------------------------------------------------------
 .../scheduler/capacity/LeafQueue.java           | 230 ++++++++++++++++---
 .../scheduler/capacity/TestLeafQueue.java       | 198 +++++++++++++++-
 .../scheduler/capacity/TestUtils.java           |  24 +-
 3 files changed, 407 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/3bf2e16f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index 006d840..8870be5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -18,17 +18,8 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Sets;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -58,7 +49,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
-
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest;
@@ -71,9 +61,22 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FifoOrderi
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy;
 import org.apache.hadoop.yarn.server.utils.Lock;
 import org.apache.hadoop.yarn.server.utils.Lock.NoLock;
+import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.Resources;
 
-import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
 
 @Private
 @Unstable
@@ -112,7 +115,7 @@ public class LeafQueue extends AbstractCSQueue {
 
   // cache last cluster resource to compute actual capacity
   private Resource lastClusterResource = Resources.none();
-  
+
   private final QueueResourceLimitsInfo queueResourceLimitsInfo =
       new QueueResourceLimitsInfo();
 
@@ -120,6 +123,10 @@ public class LeafQueue extends AbstractCSQueue {
 
   private OrderingPolicy<FiCaSchedulerApp> orderingPolicy = null;
 
+  // Summation of consumed ratios for all users in queue
+  private float totalUserConsumedRatio = 0;
+  private UsageRatios qUsageRatios;
+
   // record all ignore partition exclusivityRMContainer, this will be used to do
   // preemption, key is the partition of the RMContainer allocated on
   private Map<String, TreeSet<RMContainer>> ignorePartitionExclusivityRMContainers
=
@@ -136,6 +143,8 @@ public class LeafQueue extends AbstractCSQueue {
     // One time initialization is enough since it is static ordering policy
     this.pendingOrderingPolicy = new FifoOrderingPolicyForPendingApps();
 
+    qUsageRatios = new UsageRatios();
+
     if(LOG.isDebugEnabled()) {
       LOG.debug("LeafQueue:" + " name=" + queueName
         + ", fullname=" + getQueuePath());
@@ -160,7 +169,7 @@ public class LeafQueue extends AbstractCSQueue {
     setQueueResourceLimitsInfo(clusterResource);
 
     CapacitySchedulerConfiguration conf = csContext.getConfiguration();
-    
+
     setOrderingPolicy(conf.<FiCaSchedulerApp>getOrderingPolicy(getQueuePath()));
 
     userLimit = conf.getUserLimit(getQueuePath());
@@ -1071,6 +1080,9 @@ public class LeafQueue extends AbstractCSQueue {
   private Resource computeUserLimit(FiCaSchedulerApp application,
       Resource clusterResource, User user,
       String nodePartition, SchedulingMode schedulingMode) {
+    Resource partitionResource = labelManager.getResourceByLabel(nodePartition,
+        clusterResource);
+
     // What is our current capacity? 
     // * It is equal to the max(required, queue-capacity) if
     //   we're running below capacity. The 'max' ensures that jobs in queues
@@ -1079,7 +1091,7 @@ public class LeafQueue extends AbstractCSQueue {
     //   (usedResources + required) (which extra resources we are allocating)
     Resource queueCapacity =
         Resources.multiplyAndNormalizeUp(resourceCalculator,
-            labelManager.getResourceByLabel(nodePartition, clusterResource),
+            partitionResource,
             queueCapacities.getAbsoluteCapacity(nodePartition),
             minimumAllocation);
 
@@ -1091,15 +1103,30 @@ public class LeafQueue extends AbstractCSQueue {
     // Allow progress for queues with miniscule capacity
     queueCapacity =
         Resources.max(
-            resourceCalculator, clusterResource, 
+            resourceCalculator, partitionResource,
             queueCapacity, 
             required);
 
+
+    /* We want to base the userLimit calculation on
+     * max(queueCapacity, usedResources+required). However, we want
+     * usedResources to be based on the combined ratios of all the users in the
+     * queue so we use consumedRatio to calculate such.
+     * The calculation is dependent on how the resourceCalculator calculates the
+     * ratio between two Resources. DRF Example: If usedResources is
+     * greater than queueCapacity and users have the following [mem,cpu] usages:
+     * User1: [10%,20%] - Dominant resource is 20%
+     * User2: [30%,10%] - Dominant resource is 30%
+     * Then total consumedRatio is then 20+30=50%. Yes, this value can be
+     * larger than 100% but for the purposes of making sure all users are
+     * getting their fair share, it works.
+     */
+    Resource consumed = Resources.multiplyAndNormalizeUp(resourceCalculator,
+        partitionResource, qUsageRatios.getUsageRatio(nodePartition),
+        minimumAllocation);
     Resource currentCapacity =
-        Resources.lessThan(resourceCalculator, clusterResource,
-            queueUsage.getUsed(nodePartition), queueCapacity) ? queueCapacity
-            : Resources.add(queueUsage.getUsed(nodePartition), required);
-    
+        Resources.lessThan(resourceCalculator, partitionResource, consumed,
+            queueCapacity) ? queueCapacity : Resources.add(consumed, required);
     // Never allow a single user to take more than the 
     // queue's configured capacity * user-limit-factor.
     // Also, the queue's configured capacity should be higher than 
@@ -1108,9 +1135,10 @@ public class LeafQueue extends AbstractCSQueue {
     final int activeUsers = activeUsersManager.getNumActiveUsers();
     
     // User limit resource is determined by:
-    // max{currentCapacity / #activeUsers, currentCapacity * user-limit-percentage%)
+    // max{currentCapacity / #activeUsers, currentCapacity *
+    // user-limit-percentage%)
     Resource userLimitResource = Resources.max(
-        resourceCalculator, clusterResource, 
+        resourceCalculator, partitionResource,
         Resources.divideAndCeil(
             resourceCalculator, currentCapacity, activeUsers),
         Resources.divideAndCeil(
@@ -1134,8 +1162,7 @@ public class LeafQueue extends AbstractCSQueue {
       maxUserLimit =
           Resources.multiplyAndRoundDown(queueCapacity, userLimitFactor);
     } else if (schedulingMode == SchedulingMode.IGNORE_PARTITION_EXCLUSIVITY) {
-      maxUserLimit =
-          labelManager.getResourceByLabel(nodePartition, clusterResource);
+      maxUserLimit = partitionResource;
     }
     
     // Cap final user limit with maxUserLimit
@@ -1143,7 +1170,7 @@ public class LeafQueue extends AbstractCSQueue {
         Resources.roundUp(
             resourceCalculator, 
             Resources.min(
-                resourceCalculator, clusterResource,   
+                resourceCalculator, partitionResource,
                   userLimitResource,
                   maxUserLimit
                 ), 
@@ -1151,18 +1178,22 @@ public class LeafQueue extends AbstractCSQueue {
 
     if (LOG.isDebugEnabled()) {
       String userName = application.getUser();
-      LOG.debug("User limit computation for " + userName + 
+      LOG.debug("User limit computation for " + userName +
           " in queue " + getQueueName() +
           " userLimitPercent=" + userLimit +
           " userLimitFactor=" + userLimitFactor +
-          " required: " + required + 
-          " consumed: " + user.getUsed() + 
+          " required: " + required +
+          " consumed: " + consumed +
           " user-limit-resource: " + userLimitResource +
-          " queueCapacity: " + queueCapacity + 
+          " queueCapacity: " + queueCapacity +
           " qconsumed: " + queueUsage.getUsed() +
+          " consumedRatio: " + totalUserConsumedRatio +
           " currentCapacity: " + currentCapacity +
           " activeUsers: " + activeUsers +
-          " clusterCapacity: " + clusterResource
+          " clusterCapacity: " + clusterResource +
+          " resourceByLabel: " + partitionResource +
+          " usageratio: " + qUsageRatios.getUsageRatio(nodePartition) +
+          " Partition: " + nodePartition
       );
     }
     user.setUserResourceLimit(userLimitResource);
@@ -1248,6 +1279,42 @@ public class LeafQueue extends AbstractCSQueue {
     }
   }
 
+  private synchronized float calculateUserUsageRatio(Resource clusterResource,
+      String nodePartition) {
+    Resource resourceByLabel =
+        labelManager.getResourceByLabel(nodePartition, clusterResource);
+    float consumed = 0;
+    User user;
+    for (Map.Entry<String, User> entry : users.entrySet()) {
+      user = entry.getValue();
+      consumed += user.resetAndUpdateUsageRatio(resourceCalculator,
+          resourceByLabel, nodePartition);
+    }
+    return consumed;
+  }
+
+  private synchronized void recalculateQueueUsageRatio(Resource clusterResource,
+      String nodePartition) {
+    ResourceUsage queueResourceUsage = this.getQueueResourceUsage();
+
+    if (nodePartition == null) {
+      for (String partition : Sets.union(queueCapacities.getNodePartitionsSet(),
+          queueResourceUsage.getNodePartitionsSet())) {
+        qUsageRatios.setUsageRatio(partition,
+            calculateUserUsageRatio(clusterResource, partition));
+      }
+    } else {
+      qUsageRatios.setUsageRatio(nodePartition,
+          calculateUserUsageRatio(clusterResource, nodePartition));
+    }
+  }
+
+  private synchronized void updateQueueUsageRatio(String nodePartition,
+      float delta) {
+    qUsageRatios.incUsageRatio(nodePartition, delta);
+  }
+
+
   @Override
   public void completedContainer(Resource clusterResource, 
       FiCaSchedulerApp application, FiCaSchedulerNode node, RMContainer rmContainer, 
@@ -1282,7 +1349,7 @@ public class LeafQueue extends AbstractCSQueue {
           removed =
               application.containerCompleted(rmContainer, containerStatus,
                   event, node.getPartition());
-          
+
           node.releaseContainer(container);
         }
 
@@ -1311,6 +1378,8 @@ public class LeafQueue extends AbstractCSQueue {
       boolean isIncreasedAllocation) {
     super.allocateResource(clusterResource, resource, nodePartition,
         isIncreasedAllocation);
+    Resource resourceByLabel = labelManager.getResourceByLabel(nodePartition,
+        clusterResource);
     
     // handle ignore exclusivity container
     if (null != rmContainer && rmContainer.getNodeLabelExpression().equals(
@@ -1329,6 +1398,12 @@ public class LeafQueue extends AbstractCSQueue {
     String userName = application.getUser();
     User user = getUser(userName);
     user.assignContainer(resource, nodePartition);
+
+    // Update usage ratios
+    updateQueueUsageRatio(nodePartition,
+        user.updateUsageRatio(resourceCalculator, resourceByLabel,
+            nodePartition));
+
     // Note this is a bit unconventional since it gets the object and modifies
     // it here, rather then using set routine
     Resources.subtractFrom(application.getHeadroom(), resource); // headroom
@@ -1349,6 +1424,8 @@ public class LeafQueue extends AbstractCSQueue {
       RMContainer rmContainer, boolean isChangeResource) {
     super.releaseResource(clusterResource, resource, nodePartition,
         isChangeResource);
+    Resource resourceByLabel = labelManager.getResourceByLabel(nodePartition,
+        clusterResource);
     
     // handle ignore exclusivity container
     if (null != rmContainer && rmContainer.getNodeLabelExpression().equals(
@@ -1368,6 +1445,12 @@ public class LeafQueue extends AbstractCSQueue {
     String userName = application.getUser();
     User user = getUser(userName);
     user.releaseContainer(resource, nodePartition);
+
+    // Update usage ratios
+    updateQueueUsageRatio(nodePartition,
+        user.updateUsageRatio(resourceCalculator, resourceByLabel,
+            nodePartition));
+
     metrics.setAvailableResourcesToUser(userName, application.getHeadroom());
 
     if (LOG.isDebugEnabled()) {
@@ -1407,7 +1490,10 @@ public class LeafQueue extends AbstractCSQueue {
     // absoluteMaxCapacity now,  will be replaced with absoluteMaxAvailCapacity
     // during allocation
     setQueueResourceLimitsInfo(clusterResource);
-    
+
+    // Update user consumedRatios
+    recalculateQueueUsageRatio(clusterResource, null);
+
     // Update metrics
     CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource,
         minimumAllocation, this, labelManager, null);
@@ -1459,17 +1545,93 @@ public class LeafQueue extends AbstractCSQueue {
     queueUsage.decAMUsed(nodeLabel, resourceToDec);
   }
 
+  /*
+   * Usage Ratio
+   */
+  static private class UsageRatios {
+    private Map<String, Float> usageRatios;
+    private ReadLock readLock;
+    private WriteLock writeLock;
+
+    public UsageRatios() {
+      ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+      readLock = lock.readLock();
+      writeLock = lock.writeLock();
+      usageRatios = new HashMap<String, Float>();
+    }
+
+    private void incUsageRatio(String label, float delta) {
+      try {
+        writeLock.lock();
+        Float fl = usageRatios.get(label);
+        if (null == fl) {
+          fl = new Float(0.0);
+        }
+        fl += delta;
+        usageRatios.put(label, new Float(fl));
+      } finally {
+        writeLock.unlock();
+      }
+    }
+
+    float getUsageRatio(String label) {
+      try {
+        readLock.lock();
+        Float f = usageRatios.get(label);
+        if (null == f) {
+          return 0.0f;
+        }
+        return f;
+      } finally {
+        readLock.unlock();
+      }
+    }
+
+    private void setUsageRatio(String label, float ratio) {
+      try {
+        writeLock.lock();
+        usageRatios.put(label, new Float(ratio));
+      } finally {
+        writeLock.unlock();
+      }
+    }
+  }
+
+  @VisibleForTesting
+  public float getUsageRatio(String label) {
+    return qUsageRatios.getUsageRatio(label);
+  }
+
   @VisibleForTesting
   public static class User {
     ResourceUsage userResourceUsage = new ResourceUsage();
     volatile Resource userResourceLimit = Resource.newInstance(0, 0);
     int pendingApplications = 0;
     int activeApplications = 0;
+    private UsageRatios userUsageRatios = new UsageRatios();
 
     public ResourceUsage getResourceUsage() {
       return userResourceUsage;
     }
     
+    public synchronized float resetAndUpdateUsageRatio(
+        ResourceCalculator resourceCalculator,
+        Resource resource, String nodePartition) {
+      userUsageRatios.setUsageRatio(nodePartition, 0);
+      return updateUsageRatio(resourceCalculator, resource, nodePartition);
+    }
+
+    public synchronized float updateUsageRatio(
+        ResourceCalculator resourceCalculator,
+        Resource resource, String nodePartition) {
+      float delta;
+      float newRatio =
+          Resources.ratio(resourceCalculator, getUsed(nodePartition), resource);
+      delta = newRatio - userUsageRatios.getUsageRatio(nodePartition);
+      userUsageRatios.setUsageRatio(nodePartition, newRatio);
+      return delta;
+    }
+
     public Resource getUsed() {
       return userResourceUsage.getUsed();
     }
@@ -1607,7 +1769,7 @@ public class LeafQueue extends AbstractCSQueue {
         .getSchedulableEntities()) {
       apps.add(pendingApp.getApplicationAttemptId());
     }
-    for (FiCaSchedulerApp app : 
+    for (FiCaSchedulerApp app :
       orderingPolicy.getSchedulableEntities()) {
       apps.add(app.getApplicationAttemptId());
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/3bf2e16f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
index 9572bf5..272a950 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -45,6 +47,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue.User;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
@@ -56,6 +59,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FifoOrderi
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy;
 import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
 import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
+import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
 import org.apache.hadoop.yarn.util.resource.Resources;
 import org.junit.After;
@@ -91,6 +95,7 @@ import static org.mockito.Mockito.when;
 public class TestLeafQueue {  
   private final RecordFactory recordFactory = 
       RecordFactoryProvider.getRecordFactory(null);
+  private static final Log LOG = LogFactory.getLog(TestLeafQueue.class);
 
   RMContext rmContext;
   RMContext spyRMContext;
@@ -100,16 +105,29 @@ public class TestLeafQueue {
   CapacitySchedulerContext csContext;
   
   CSQueue root;
-  Map<String, CSQueue> queues = new HashMap<String, CSQueue>();
+  Map<String, CSQueue> queues;
   
   final static int GB = 1024;
   final static String DEFAULT_RACK = "/default";
 
-  private final ResourceCalculator resourceCalculator = new DefaultResourceCalculator();
+  private final ResourceCalculator resourceCalculator =
+      new DefaultResourceCalculator();
   
+  private final ResourceCalculator dominantResourceCalculator =
+      new DominantResourceCalculator();
+
   @Before
   public void setUp() throws Exception {
+    setUpInternal(resourceCalculator);
+  }
+
+  private void setUpWithDominantResourceCalculator() throws Exception {
+    setUpInternal(dominantResourceCalculator);
+  }
+
+  private void setUpInternal(ResourceCalculator rC) throws Exception {
     CapacityScheduler spyCs = new CapacityScheduler();
+    queues = new HashMap<String, CSQueue>();
     cs = spy(spyCs);
     rmContext = TestUtils.getMockRMContext();
     spyRMContext = spy(rmContext);
@@ -128,6 +146,8 @@ public class TestLeafQueue {
     csConf = 
         new CapacitySchedulerConfiguration();
     csConf.setBoolean("yarn.scheduler.capacity.user-metrics.enable", true);
+    csConf.setBoolean(
+        "yarn.scheduler.capacity.reservations-continue-look-all-nodes", false);
     final String newRoot = "root" + System.currentTimeMillis();
     setupQueueConfiguration(csConf, newRoot);
     YarnConfiguration conf = new YarnConfiguration();
@@ -146,6 +166,7 @@ public class TestLeafQueue {
         thenReturn(CapacityScheduler.nonPartitionedQueueComparator);
     when(csContext.getResourceCalculator()).
         thenReturn(resourceCalculator);
+    when(csContext.getResourceCalculator()).thenReturn(rC);
     when(csContext.getRMContext()).thenReturn(rmContext);
     RMContainerTokenSecretManager containerTokenSecretManager =
         new RMContainerTokenSecretManager(conf);
@@ -172,7 +193,8 @@ public class TestLeafQueue {
         .thenReturn(new YarnConfiguration());
     when(cs.getNumClusterNodes()).thenReturn(3);
   }
-  
+
+
   private static final String A = "a";
   private static final String B = "b";
   private static final String C = "c";
@@ -601,14 +623,180 @@ public class TestLeafQueue {
     assertEquals((int)(a.getCapacity() * node_0.getTotalResource().getMemorySize()),
         a.getMetrics().getAvailableMB());
   }
-  
+  @Test
+  public void testDRFUsageRatioRounding() throws Exception {
+    CSAssignment assign;
+    setUpWithDominantResourceCalculator();
+    // Mock the queue
+    LeafQueue b = stubLeafQueue((LeafQueue) queues.get(E));
+
+    // Users
+    final String user0 = "user_0";
+
+    // Submit applications
+    final ApplicationAttemptId appAttemptId0 =
+        TestUtils.getMockApplicationAttemptId(0, 0);
+    FiCaSchedulerApp app0 =
+        new FiCaSchedulerApp(appAttemptId0, user0, b,
+            b.getActiveUsersManager(), spyRMContext);
+    b.submitApplicationAttempt(app0, user0);
+
+    // Setup some nodes
+    String host0 = "127.0.0.1";
+    FiCaSchedulerNode node0 =
+        TestUtils.getMockNode(host0, DEFAULT_RACK, 0, 80 * GB, 100);
+
+    // Make cluster relatively large so usageRatios are small
+    int numNodes = 1000;
+    Resource clusterResource =
+        Resources.createResource(numNodes * (80 * GB), numNodes * 100);
+    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
+
+    // Set user-limit. Need a small queue within a large cluster.
+    b.setUserLimit(50);
+    b.setUserLimitFactor(1000000);
+    b.setMaxCapacity(1.0f);
+    b.setAbsoluteCapacity(0.00001f);
+
+    // First allocation is larger than second but is still vcore dominant
+    // so usage ratio will be based on vcores. If consumedRatio doesn't round
+    // in our favor then new limit calculation will actually be less than
+    // what is currently consumed and we will fail to allocate
+    Priority priority = TestUtils.createMockPriority(1);
+    app0.updateResourceRequests(Collections.singletonList(TestUtils
+        .createResourceRequest(ResourceRequest.ANY, 20 * GB, 29, 1, true,
+            priority, recordFactory, RMNodeLabelsManager.NO_LABEL)));
+    assign = b.assignContainers(clusterResource, node0, new ResourceLimits(
+        clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+    app0.updateResourceRequests(Collections.singletonList(TestUtils
+        .createResourceRequest(ResourceRequest.ANY, 10 * GB, 29, 2, true,
+            priority, recordFactory, RMNodeLabelsManager.NO_LABEL)));
+    assign = b.assignContainers(clusterResource, node0, new ResourceLimits(
+        clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+    assertTrue("Still within limits, should assign",
+        assign.getResource().getMemorySize() > 0);
+  }
+
+  @Test
+  public void testDRFUserLimits() throws Exception {
+    setUpWithDominantResourceCalculator();
+
+    // Mock the queue
+    LeafQueue b = stubLeafQueue((LeafQueue) queues.get(B));
+    // unset maxCapacity
+    b.setMaxCapacity(1.0f);
+
+    // Users
+    final String user0 = "user_0";
+    final String user1 = "user_1";
+
+    // Submit applications
+    final ApplicationAttemptId appAttemptId0 =
+        TestUtils.getMockApplicationAttemptId(0, 0);
+    FiCaSchedulerApp app0 =
+        new FiCaSchedulerApp(appAttemptId0, user0, b,
+            b.getActiveUsersManager(), spyRMContext);
+    b.submitApplicationAttempt(app0, user0);
+
+    final ApplicationAttemptId appAttemptId2 =
+        TestUtils.getMockApplicationAttemptId(2, 0);
+    FiCaSchedulerApp app2 =
+        new FiCaSchedulerApp(appAttemptId2, user1, b,
+            b.getActiveUsersManager(), spyRMContext);
+    b.submitApplicationAttempt(app2, user1);
+
+    // Setup some nodes
+    String host0 = "127.0.0.1";
+    FiCaSchedulerNode node0 =
+        TestUtils.getMockNode(host0, DEFAULT_RACK, 0, 8 * GB, 100);
+    String host1 = "127.0.0.2";
+    FiCaSchedulerNode node1 =
+        TestUtils.getMockNode(host1, DEFAULT_RACK, 0, 8 * GB, 100);
+
+    int numNodes = 2;
+    Resource clusterResource =
+        Resources.createResource(numNodes * (8 * GB), numNodes * 100);
+    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
+
+    // Setup resource-requests so that one application is memory dominant
+    // and other application is vcores dominant
+    Priority priority = TestUtils.createMockPriority(1);
+    app0.updateResourceRequests(Collections.singletonList(TestUtils
+        .createResourceRequest(ResourceRequest.ANY, 1 * GB, 40, 10, true,
+            priority, recordFactory, RMNodeLabelsManager.NO_LABEL)));
+
+    app2.updateResourceRequests(Collections.singletonList(TestUtils
+        .createResourceRequest(ResourceRequest.ANY, 2 * GB, 10, 10, true,
+            priority, recordFactory, RMNodeLabelsManager.NO_LABEL)));
+
+    /**
+     * Start testing...
+     */
+
+    // Set user-limit
+    b.setUserLimit(50);
+    b.setUserLimitFactor(2);
+    User queueUser0 = b.getUser(user0);
+    User queueUser1 = b.getUser(user1);
+
+    assertEquals("There should 2 active users!", 2, b
+        .getActiveUsersManager().getNumActiveUsers());
+    // Fill both Nodes as far as we can
+    CSAssignment assign;
+    do {
+      assign =
+          b.assignContainers(clusterResource, node0, new ResourceLimits(
+              clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+      LOG.info(assign.toString());
+    } while (assign.getResource().getMemorySize() > 0 &&
+        assign.getAssignmentInformation().getNumReservations() == 0);
+    do {
+      assign =
+          b.assignContainers(clusterResource, node1, new ResourceLimits(
+              clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY);
+    } while (assign.getResource().getMemorySize() > 0 &&
+        assign.getAssignmentInformation().getNumReservations() == 0);
+    //LOG.info("user_0: " + queueUser0.getUsed());
+    //LOG.info("user_1: " + queueUser1.getUsed());
+
+    assertTrue("Verify user_0 got resources ", queueUser0.getUsed()
+        .getMemorySize() > 0);
+    assertTrue("Verify user_1 got resources ", queueUser1.getUsed()
+        .getMemorySize() > 0);
+    assertTrue(
+        "Exepected AbsoluteUsedCapacity > 0.95, got: "
+            + b.getAbsoluteUsedCapacity(), b.getAbsoluteUsedCapacity() > 0.95);
+
+    // Verify consumedRatio is based on dominant resources
+    float expectedRatio =
+        queueUser0.getUsed().getVirtualCores()
+            / (numNodes * 100.0f)
+            + queueUser1.getUsed().getMemorySize()
+            / (numNodes * 8.0f * GB);
+    assertEquals(expectedRatio, b.getUsageRatio(""), 0.001);
+    // Add another node and make sure consumedRatio is adjusted
+    // accordingly.
+    numNodes = 3;
+    clusterResource =
+        Resources.createResource(numNodes * (8 * GB), numNodes * 100);
+    when(csContext.getNumClusterNodes()).thenReturn(numNodes);
+    root.updateClusterResource(clusterResource, new ResourceLimits(
+        clusterResource));
+    expectedRatio =
+        queueUser0.getUsed().getVirtualCores()
+            / (numNodes * 100.0f)
+            + queueUser1.getUsed().getMemorySize()
+            / (numNodes * 8.0f * GB);
+    assertEquals(expectedRatio, b.getUsageRatio(""), 0.001);
+  }
+
   @Test
   public void testUserLimits() throws Exception {
     // Mock the queue
     LeafQueue a = stubLeafQueue((LeafQueue)queues.get(A));
     //unset maxCapacity
     a.setMaxCapacity(1.0f);
-    
+
     // Users
     final String user_0 = "user_0";
     final String user_1 = "user_1";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/3bf2e16f/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
index d2c5174..600d99a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
@@ -160,10 +160,17 @@ public class TestUtils {
   public static ResourceRequest createResourceRequest(
       String resourceName, int memory, int numContainers, boolean relaxLocality,
       Priority priority, RecordFactory recordFactory, String labelExpression) {
-    ResourceRequest request = 
+    return createResourceRequest(resourceName, memory, 1, numContainers,
+        relaxLocality, priority, recordFactory, labelExpression);
+  }
+
+  public static ResourceRequest createResourceRequest(String resourceName,
+      int memory, int vcores, int numContainers, boolean relaxLocality,
+      Priority priority, RecordFactory recordFactory, String labelExpression) {
+    ResourceRequest request =
         recordFactory.newRecordInstance(ResourceRequest.class);
-    Resource capability = Resources.createResource(memory, 1);
-    
+    Resource capability = Resources.createResource(memory, vcores);
+
     request.setNumContainers(numContainers);
     request.setResourceName(resourceName);
     request.setCapability(capability);
@@ -191,13 +198,18 @@ public class TestUtils {
     return ApplicationAttemptId.newInstance(applicationId, attemptId);
   }
   
-  public static FiCaSchedulerNode getMockNode(
-      String host, String rack, int port, int capability) {
+  public static FiCaSchedulerNode getMockNode(String host, String rack,
+      int port, int memory) {
+    return getMockNode(host, rack, port, memory, 1);
+  }
+
+  public static FiCaSchedulerNode getMockNode(String host, String rack,
+      int port, int memory, int vcores) {
     NodeId nodeId = NodeId.newInstance(host, port);
     RMNode rmNode = mock(RMNode.class);
     when(rmNode.getNodeID()).thenReturn(nodeId);
     when(rmNode.getTotalCapability()).thenReturn(
-        Resources.createResource(capability, 1));
+        Resources.createResource(memory, vcores));
     when(rmNode.getNodeAddress()).thenReturn(host+":"+port);
     when(rmNode.getHostName()).thenReturn(host);
     when(rmNode.getRackName()).thenReturn(rack);


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message