hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r1491544 - in /hadoop/common/branches/branch-1.2: CHANGES.txt src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
Date Mon, 10 Jun 2013 17:59:29 GMT
Author: suresh
Date: Mon Jun 10 17:59:29 2013
New Revision: 1491544

URL: http://svn.apache.org/r1491544
Log:
HDFS-4261. Merge r1488865 from branch-1

Modified:
    hadoop/common/branches/branch-1.2/CHANGES.txt
    hadoop/common/branches/branch-1.2/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
    hadoop/common/branches/branch-1.2/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java

Modified: hadoop/common/branches/branch-1.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.2/CHANGES.txt?rev=1491544&r1=1491543&r2=1491544&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.2/CHANGES.txt (original)
+++ hadoop/common/branches/branch-1.2/CHANGES.txt Mon Jun 10 17:59:29 2013
@@ -29,6 +29,9 @@ Release 1.2.1 - Unreleased 
     HDFS-4699. Additional conditions for avoiding unnecessary 
     DataNode.checkDiskError calls. (Chris Nauroth via kihwal)
 
+    HDFS-4261. Fix bugs in Balaner causing infinite loop and
+    TestBalancerWithNodeGroup timeing out.  (Junping Du via szetszwo)
+
 Release 1.2.0 - 2013.05.05
 
   INCOMPATIBLE CHANGES

Modified: hadoop/common/branches/branch-1.2/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.2/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java?rev=1491544&r1=1491543&r2=1491544&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.2/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
(original)
+++ hadoop/common/branches/branch-1.2/src/hdfs/org/apache/hadoop/hdfs/server/balancer/Balancer.java
Mon Jun 10 17:59:29 2013
@@ -193,6 +193,8 @@ public class Balancer implements Tool {
    */
   public static final int MAX_NUM_CONCURRENT_MOVES = 5;
   
+  public static final int MAX_NO_PENDING_BLOCK_ITERATIONS = 5;
+  
   private Configuration conf;
 
   private double threshold = 10D;
@@ -746,6 +748,7 @@ public class Balancer implements Tool {
       long startTime = Util.now();
       this.blocksToReceive = 2*scheduledSize;
       boolean isTimeUp = false;
+      int noPendingBlockIteration = 0;
       while(!isTimeUp && scheduledSize > 0 &&
           (!srcBlockList.isEmpty() || blocksToReceive > 0)) {
         PendingBlockMove pendingBlock = chooseNextBlockToMove();
@@ -769,7 +772,15 @@ public class Balancer implements Tool {
             LOG.warn(StringUtils.stringifyException(e));
             return;
           }
-        } 
+        } else {
+          // source node cannot find a pendingBlockToMove, iteration +1
+          noPendingBlockIteration++;
+          // in case no blocks can be moved for source node's task,
+          // jump out of while-loop after 5 iterations.
+          if (noPendingBlockIteration >= MAX_NO_PENDING_BLOCK_ITERATIONS) {
+            scheduledSize = 0;
+          }
+        }
         
         // check if time is up or not
         if (Util.now()-startTime > MAX_ITERATION_TIME) {
@@ -1496,7 +1507,11 @@ public class Balancer implements Tool {
       Formatter formatter = new Formatter(System.out);
       System.out.println("Time Stamp               Iteration#  Bytes Already Moved  Bytes
Left To Move  Bytes Being Moved");
       int iterations = 0;
+      
       while (true) {
+        // clean all lists at the beginning of balancer iteration.
+        resetData();
+
         /* get all live datanodes of a cluster and their disk usage
          * decide the number of bytes need to be moved
          */
@@ -1547,9 +1562,6 @@ public class Balancer implements Tool {
             return NO_MOVE_PROGRESS;
           }
         }
-
-        // clean all lists
-        resetData();
         
         try {
           Thread.sleep(2*conf.getLong("dfs.heartbeat.interval", 3));

Modified: hadoop/common/branches/branch-1.2/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-1.2/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java?rev=1491544&r1=1491543&r2=1491544&view=diff
==============================================================================
--- hadoop/common/branches/branch-1.2/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
(original)
+++ hadoop/common/branches/branch-1.2/src/test/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithNodeGroup.java
Mon Jun 10 17:59:29 2013
@@ -216,7 +216,7 @@ public class TestBalancerWithNodeGroup {
    * to n0 or n1 as balancer policy with node group. Thus, we expect the balancer
    * to end in 5 iterations without move block process.
    */
-  @Test
+  @Test(timeout=60000)
   public void testBalancerEndInNoMoveProgress() throws Exception {
     Configuration conf = createConf();
     long[] capacities = new long[]{CAPACITY, CAPACITY, CAPACITY, CAPACITY};
@@ -255,7 +255,7 @@ public class TestBalancerWithNodeGroup {
    * Create a cluster with even distribution, and a new empty node is added to
    * the cluster, then test rack locality for balancer policy. 
    */
-  @Test
+  @Test(timeout=60000)
   public void testBalancerWithRackLocality() throws Exception {
     Configuration conf = createConf();
     long[] capacities = new long[]{CAPACITY, CAPACITY};
@@ -294,7 +294,7 @@ public class TestBalancerWithNodeGroup {
       totalCapacity += newCapacity;
 
       // run balancer and validate results
-      runBalancer(conf, totalUsedSpace, totalCapacity);
+      runBalancerCanFinish(conf, totalUsedSpace, totalCapacity);
       
       DatanodeInfo[] datanodeReport = 
               client.getDatanodeReport(DatanodeReportType.ALL);
@@ -321,7 +321,7 @@ public class TestBalancerWithNodeGroup {
   /** Create a cluster with even distribution, and a new empty node is added to
    *  the cluster, then test rack locality for balancer policy. 
    **/
-  @Test
+  @Test(timeout=60000)
   public void testBalancerWithNodeGroup() throws Exception {
     Configuration conf = createConf();
     long[] capacities = new long[]{CAPACITY, CAPACITY};



Mime
View raw message