Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B165475C5 for ; Thu, 6 Oct 2011 20:08:39 +0000 (UTC) Received: (qmail 90408 invoked by uid 500); 6 Oct 2011 20:08:39 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 90364 invoked by uid 500); 6 Oct 2011 20:08:39 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 90356 invoked by uid 99); 6 Oct 2011 20:08:39 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 06 Oct 2011 20:08:39 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 06 Oct 2011 20:08:37 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 540A823889C5 for ; Thu, 6 Oct 2011 20:08:17 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1179806 - in /hbase/branches/0.92: ./ src/main/java/org/apache/hadoop/hbase/master/ src/test/java/org/apache/hadoop/hbase/master/ Date: Thu, 06 Oct 2011 20:08:17 -0000 To: commits@hbase.apache.org From: stack@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20111006200817.540A823889C5@eris.apache.org> Author: stack Date: Thu Oct 6 20:08:16 2011 New Revision: 1179806 URL: http://svn.apache.org/viewvc?rev=1179806&view=rev Log: HBASE-4402 Retaining locality after restart broken Modified: hbase/branches/0.92/CHANGES.txt hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java Modified: hbase/branches/0.92/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/branches/0.92/CHANGES.txt?rev=1179806&r1=1179805&r2=1179806&view=diff ============================================================================== --- hbase/branches/0.92/CHANGES.txt (original) +++ hbase/branches/0.92/CHANGES.txt Thu Oct 6 20:08:16 2011 @@ -324,6 +324,7 @@ Release 0.92.0 - Unreleased (Kay Kay) HBASE-4481 TestMergeTool failed in 0.92 build 20 HBASE-4386 Fix a potential NPE in TaskMonitor (todd) + HBASE-4402 Retaining locality after restart broken TESTS HBASE-4492 TestRollingRestart fails intermittently Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1179806&r1=1179805&r2=1179806&view=diff ============================================================================== --- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original) +++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Thu Oct 6 20:08:16 2011 @@ -620,7 +620,7 @@ public class AssignmentManager extends Z (System.currentTimeMillis() - 15000); LOG.debug("Handling transition=" + data.getEventType() + ", server=" + data.getOrigin() + ", region=" + - prettyPrintedRegionName + + (prettyPrintedRegionName == null? "null": prettyPrintedRegionName) + (lateEvent? ", which is more than 15 seconds late" : "")); RegionState regionState = regionsInTransition.get(encodedName); switch (data.getEventType()) { Modified: hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java?rev=1179806&r1=1179805&r2=1179806&view=diff ============================================================================== --- hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java (original) +++ hbase/branches/0.92/src/main/java/org/apache/hadoop/hbase/master/DefaultLoadBalancer.java Thu Oct 6 20:08:16 2011 @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.Random; +import java.util.Set; import java.util.TreeMap; import org.apache.commons.logging.Log; @@ -46,7 +47,10 @@ import org.apache.hadoop.hbase.TableExis import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; +import com.google.common.base.Joiner; +import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.MinMaxPriorityQueue; +import com.google.common.collect.Sets; /** * Makes decisions about the placement and movement of Regions across @@ -576,20 +580,71 @@ public class DefaultLoadBalancer impleme */ public Map> retainAssignment( Map regions, List servers) { + // Group all of the old assignments by their hostname. + // We can't group directly by ServerName since the servers all have + // new start-codes. + + // Group the servers by their hostname. It's possible we have multiple + // servers on the same host on different ports. + ArrayListMultimap serversByHostname = + ArrayListMultimap.create(); + for (ServerName server : servers) { + serversByHostname.put(server.getHostname(), server); + } + + // Now come up with new assignments Map> assignments = new TreeMap>(); + for (ServerName server : servers) { assignments.put(server, new ArrayList()); } - for (Map.Entry region : regions.entrySet()) { - ServerName sn = region.getValue(); - if (sn != null && servers.contains(sn)) { - assignments.get(sn).add(region.getKey()); + + // Collection of the hostnames that used to have regions + // assigned, but for which we no longer have any RS running + // after the cluster restart. + Set oldHostsNoLongerPresent = Sets.newTreeSet(); + + int numRandomAssignments = 0; + int numRetainedAssigments = 0; + for (Map.Entry entry : regions.entrySet()) { + HRegionInfo region = entry.getKey(); + ServerName oldServerName = entry.getValue(); + List localServers = new ArrayList(); + if (oldServerName != null) { + localServers = serversByHostname.get(oldServerName.getHostname()); + } + if (localServers.isEmpty()) { + // No servers on the new cluster match up with this hostname, + // assign randomly. + ServerName randomServer = servers.get(RANDOM.nextInt(servers.size())); + assignments.get(randomServer).add(region); + numRandomAssignments++; + if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname()); + } else if (localServers.size() == 1) { + // the usual case - one new server on same host + assignments.get(localServers.get(0)).add(region); + numRetainedAssigments++; } else { - int size = assignments.size(); - assignments.get(servers.get(RANDOM.nextInt(size))).add(region.getKey()); + // multiple new servers in the cluster on this same host + int size = localServers.size(); + ServerName target = localServers.get(RANDOM.nextInt(size)); + assignments.get(target).add(region); + numRetainedAssigments++; } } + + String randomAssignMsg = ""; + if (numRandomAssignments > 0) { + randomAssignMsg = numRandomAssignments + " regions were assigned " + + "to random hosts, since the old hosts for these regions are no " + + "longer present in the cluster. These hosts were:\n " + + Joiner.on("\n ").join(oldHostsNoLongerPresent); + } + + LOG.info("Reassigned " + regions.size() + " regions. " + + numRetainedAssigments + " retained the pre-restart assignment. " + + randomAssignMsg); return assignments; } Modified: hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java URL: http://svn.apache.org/viewvc/hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java?rev=1179806&r1=1179805&r2=1179806&view=diff ============================================================================== --- hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java (original) +++ hbase/branches/0.92/src/test/java/org/apache/hadoop/hbase/master/TestDefaultLoadBalancer.java Thu Oct 6 20:08:16 2011 @@ -277,7 +277,12 @@ public class TestDefaultLoadBalancer { Map existing = new TreeMap(); for (int i = 0; i < regions.size(); i++) { - existing.put(regions.get(i), servers.get(i % servers.size()).getServerName()); + ServerName sn = servers.get(i % servers.size()).getServerName(); + // The old server would have had same host and port, but different + // start code! + ServerName snWithOldStartCode = + new ServerName(sn.getHostname(), sn.getPort(), sn.getStartcode() - 10); + existing.put(regions.get(i), snWithOldStartCode); } List listOfServerNames = getListOfServerNames(servers); Map> assignment = @@ -296,9 +301,9 @@ public class TestDefaultLoadBalancer { // Remove two of the servers that were previously there List servers3 = new ArrayList(servers); - servers3.remove(servers3.size()-1); - servers3.remove(servers3.size()-2); - listOfServerNames = getListOfServerNames(servers2); + servers3.remove(0); + servers3.remove(0); + listOfServerNames = getListOfServerNames(servers3); assignment = loadBalancer.retainAssignment(existing, listOfServerNames); assertRetainedAssignment(existing, listOfServerNames, assignment); } @@ -338,13 +343,20 @@ public class TestDefaultLoadBalancer { assertEquals(existing.size(), assignedRegions.size()); // Verify condition 2, if server had existing assignment, must have same - Set onlineAddresses = new TreeSet(); - for (ServerName s : servers) onlineAddresses.add(s); + Set onlineHostNames = new TreeSet(); + for (ServerName s : servers) { + onlineHostNames.add(s.getHostname()); + } + for (Map.Entry> a : assignment.entrySet()) { + ServerName assignedTo = a.getKey(); for (HRegionInfo r : a.getValue()) { ServerName address = existing.get(r); - if (address != null && onlineAddresses.contains(address)) { - assertTrue(a.getKey().equals(address)); + if (address != null && onlineHostNames.contains(address.getHostname())) { + // this region was prevously assigned somewhere, and that + // host is still around, then it should be re-assigned on the + // same host + assertEquals(address.getHostname(), assignedTo.getHostname()); } } } @@ -470,7 +482,7 @@ public class TestDefaultLoadBalancer { ServerName sn = this.serverQueue.poll(); return new ServerAndLoad(sn, numRegionsPerServer); } - String host = "127.0.0.1"; + String host = "server" + rand.nextInt(100000); int port = rand.nextInt(60000); long startCode = rand.nextLong(); ServerName sn = new ServerName(host, port, startCode);