Return-Path: Delivered-To: apmail-hadoop-core-commits-archive@www.apache.org Received: (qmail 91661 invoked from network); 23 Jan 2009 04:59:26 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 23 Jan 2009 04:59:26 -0000 Received: (qmail 17101 invoked by uid 500); 23 Jan 2009 04:59:25 -0000 Delivered-To: apmail-hadoop-core-commits-archive@hadoop.apache.org Received: (qmail 17074 invoked by uid 500); 23 Jan 2009 04:59:25 -0000 Mailing-List: contact core-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: core-dev@hadoop.apache.org Delivered-To: mailing list core-commits@hadoop.apache.org Received: (qmail 17065 invoked by uid 99); 23 Jan 2009 04:59:24 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 22 Jan 2009 20:59:24 -0800 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 23 Jan 2009 04:59:22 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 580EE23888E6; Thu, 22 Jan 2009 20:59:02 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r736917 - in /hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src: java/org/apache/hadoop/mapred/FairScheduler.java test/org/apache/hadoop/mapred/TestFairScheduler.java Date: Fri, 23 Jan 2009 04:59:02 -0000 To: core-commits@hadoop.apache.org From: matei@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20090123045902.580EE23888E6@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: matei Date: Thu Jan 22 20:59:01 2009 New Revision: 736917 URL: http://svn.apache.org/viewvc?rev=736917&view=rev Log: HADOOP-5075. Potential infinite loop in updateMinSlots. Modified: hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java Modified: hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java?rev=736917&r1=736916&r2=736917&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java (original) +++ hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/java/org/apache/hadoop/mapred/FairScheduler.java Thu Jan 22 20:59:01 2009 @@ -480,7 +480,7 @@ } } mapWeightSums.put(pool.getName(), mapWeightSum); - reduceWeightSums.put(pool.getName(), mapWeightSum); + reduceWeightSums.put(pool.getName(), reduceWeightSum); } // And normalize the weights based on pool sums and pool weights // to share fairly across pools (proportional to their weights) @@ -489,8 +489,16 @@ JobInfo info = entry.getValue(); String pool = poolMgr.getPoolName(job); double poolWeight = poolMgr.getPoolWeight(pool); - info.mapWeight *= (poolWeight / mapWeightSums.get(pool)); - info.reduceWeight *= (poolWeight / reduceWeightSums.get(pool)); + double mapWeightSum = mapWeightSums.get(pool); + double reduceWeightSum = reduceWeightSums.get(pool); + if (mapWeightSum == 0) + info.mapWeight = 0; + else + info.mapWeight *= (poolWeight / mapWeightSum); + if (reduceWeightSum == 0) + info.reduceWeight = 0; + else + info.reduceWeight *= (poolWeight / reduceWeightSum); } } @@ -555,6 +563,12 @@ int share = (int) Math.ceil(oldSlots * weight / totalWeight); slotsLeft = giveMinSlots(job, type, slotsLeft, share); } + if (slotsLeft > 0) { + LOG.warn("Had slotsLeft = " + slotsLeft + " after the final " + + "loop in updateMinSlots. This probably means some fair " + + "scheduler weights are being set to NaN or Infinity."); + } + break; } } } Modified: hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java URL: http://svn.apache.org/viewvc/hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java?rev=736917&r1=736916&r2=736917&view=diff ============================================================================== --- hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java (original) +++ hadoop/core/branches/branch-0.20/src/contrib/fairscheduler/src/test/org/apache/hadoop/mapred/TestFairScheduler.java Thu Jan 22 20:59:01 2009 @@ -1152,6 +1152,51 @@ } /** + * This test submits jobs in two pools, poolA and poolB. None of the + * jobs in poolA have maps, but this should not affect their reduce + * share. + */ + public void testPoolWeightsWhenNoMaps() throws Exception { + // Set up pools file + PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE)); + out.println(""); + out.println(""); + out.println(""); + out.println("2.0"); + out.println(""); + out.println(""); + out.println("1.0"); + out.println(""); + out.println(""); + out.close(); + scheduler.getPoolManager().reloadAllocs(); + + // Submit jobs, advancing time in-between to make sure that they are + // all submitted at distinct times. + JobInProgress job1 = submitJob(JobStatus.RUNNING, 0, 10, "poolA"); + JobInfo info1 = scheduler.infos.get(job1); + JobInProgress job2 = submitJob(JobStatus.RUNNING, 0, 10, "poolA"); + JobInfo info2 = scheduler.infos.get(job2); + JobInProgress job3 = submitJob(JobStatus.RUNNING, 10, 10, "poolB"); + JobInfo info3 = scheduler.infos.get(job3); + advanceTime(10); + + assertEquals(0, info1.mapWeight, 0.01); + assertEquals(1.0, info1.reduceWeight, 0.01); + assertEquals(0, info2.mapWeight, 0.01); + assertEquals(1.0, info2.reduceWeight, 0.01); + assertEquals(1.0, info3.mapWeight, 0.01); + assertEquals(1.0, info3.reduceWeight, 0.01); + + assertEquals(0, info1.mapFairShare, 0.01); + assertEquals(1.33, info1.reduceFairShare, 0.01); + assertEquals(0, info2.mapFairShare, 0.01); + assertEquals(1.33, info2.reduceFairShare, 0.01); + assertEquals(4, info3.mapFairShare, 0.01); + assertEquals(1.33, info3.reduceFairShare, 0.01); + } + + /** * Tests that max-running-tasks per node are set by assigning load * equally accross the cluster in CapBasedLoadManager. */