Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 95819 invoked from network); 22 Aug 2007 04:05:34 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 22 Aug 2007 04:05:34 -0000 Received: (qmail 35361 invoked by uid 500); 22 Aug 2007 04:05:31 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 35260 invoked by uid 500); 22 Aug 2007 04:05:31 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 35251 invoked by uid 99); 22 Aug 2007 04:05:31 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 21 Aug 2007 21:05:31 -0700 X-ASF-Spam-Status: No, hits=-100.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 22 Aug 2007 04:06:07 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 1CC281A981A; Tue, 21 Aug 2007 21:05:09 -0700 (PDT) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r568404 - in /lucene/hadoop/trunk/src/contrib/hbase: CHANGES.txt src/java/org/apache/hadoop/hbase/HMaster.java Date: Wed, 22 Aug 2007 04:05:08 -0000 To: hadoop-commits@lucene.apache.org From: stack@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20070822040509.1CC281A981A@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: stack Date: Tue Aug 21 21:05:08 2007 New Revision: 568404 URL: http://svn.apache.org/viewvc?rev=568404&view=rev Log: HADOOP-1747 On a cluster, on restart, regions multiply assigned M src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Removed some empty lines so I can squeeze more code into a screenful. (assignedRegions): Factored out some code into own methods so this method is made a bit shorter. Added early returns near top -- if nothing to assign, etc. -- so less nesting. Added fix: Instead of iterating over unassignedRegions after all the loadings have been calculated, instead iterate over the locally calculated map, regionsToAssign (Otherwise, we were running over the same territory each time through the loop and were thus giving out same region multiple times). (regionsPerServer, assignRegionsToOneServer, getRegionsToAssign): Added. Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=568404&r1=568403&r2=568404&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Tue Aug 21 21:05:08 2007 @@ -14,6 +14,7 @@ shutdown message HADOOP-1729 Recent renaming or META tables breaks hbase shell HADOOP-1730 unexpected null value causes META scanner to exit (silently) + HADOOP-1747 On a cluster, on restart, regions multiply assigned IMPROVEMENTS HADOOP-1737 Make HColumnDescriptor data publically members settable Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java?rev=568404&r1=568403&r2=568404&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java Tue Aug 21 21:05:08 2007 @@ -65,7 +65,6 @@ public class HMaster implements HConstants, HMasterInterface, HMasterRegionInterface, Runnable { - /** {@inheritDoc} */ public long getProtocolVersion(String protocol, @SuppressWarnings("unused") long clientVersion) throws IOException { @@ -189,7 +188,6 @@ // Array to hold list of split parents found. Scan adds to list. After // scan we go check if parents can be removed. - Map> splitParents = new HashMap>(); try { @@ -211,17 +209,14 @@ results.put(key.getColumn(), ((ImmutableBytesWritable) e.getValue()).get()); } - HRegionInfo info = (HRegionInfo) Writables.getWritable( results.get(COL_REGIONINFO), new HRegionInfo()); - String serverName = Writables.bytesToString(results.get(COL_SERVER)); long startCode = Writables.bytesToLong(results.get(COL_STARTCODE)); - if (LOG.isDebugEnabled()) { LOG.debug(Thread.currentThread().getName() + " scanner: " + - Long.valueOf(scannerId) + " regioninfo: {" + info.toString() + - "}, server: " + serverName + ", startCode: " + startCode); + Long.valueOf(scannerId) + " regioninfo: {" + info.toString() + + "}, server: " + serverName + ", startCode: " + startCode); } // Note Region has been assigned. @@ -467,7 +462,6 @@ try { // Don't interrupt us while we're working - synchronized(rootScannerLock) { scanRegion(new MetaRegion(rootRegionLocation.get(), HGlobals.rootRegionInfo.regionName, null)); @@ -478,7 +472,6 @@ try { e = RemoteExceptionHandler.decodeRemoteException( (RemoteException) e); - } catch (IOException ex) { e = ex; } @@ -717,6 +710,9 @@ * * We fill 'unassignedRecords' by scanning ROOT and META tables, learning the * set of all known valid regions. + * + *

Items are removed from this list when a region server reports in that + * the region has been deployed. */ SortedMap unassignedRegions; @@ -790,7 +786,6 @@ this.rand = new Random(); // Make sure the root directory exists! - if(! fs.exists(dir)) { fs.mkdirs(dir); } @@ -1157,8 +1152,8 @@ onlineMetaRegions.remove(info.getStartKey()); } - unassignedRegions.put(info.regionName, info); - assignAttempts.put(info.regionName, Long.valueOf(0L)); + this.unassignedRegions.put(info.regionName, info); + this.assignAttempts.put(info.regionName, Long.valueOf(0L)); } } } @@ -1323,18 +1318,17 @@ region.regionName); // Remove from unassigned list so we don't assign it to someone else - - unassignedRegions.remove(region.regionName); - assignAttempts.remove(region.regionName); + this.unassignedRegions.remove(region.regionName); + this.assignAttempts.remove(region.regionName); if (region.regionName.compareTo( HGlobals.rootRegionInfo.regionName) == 0) { // Store the Root Region location (in memory) - synchronized (rootRegionLocation) { - rootRegionLocation.set(new HServerAddress(info.getServerAddress())); - rootRegionLocation.notifyAll(); + this.rootRegionLocation. + set(new HServerAddress(info.getServerAddress())); + this.rootRegionLocation.notifyAll(); } break; } @@ -1436,12 +1430,11 @@ } // Figure out what the RegionServer ought to do, and write back. - assignRegions(info, serverName, returnMsgs); return returnMsgs.toArray(new HMsg[returnMsgs.size()]); } - - /** + + /* * Assigns regions to region servers attempting to balance the load across * all region servers * @@ -1451,149 +1444,155 @@ */ private void assignRegions(HServerInfo info, String serverName, ArrayList returnMsgs) { - - long now = System.currentTimeMillis(); - TreeSet regionsToAssign = new TreeSet(); - for (Map.Entry e: assignAttempts.entrySet()) { - if (now - e.getValue() > maxRegionOpenTime) { - regionsToAssign.add(e.getKey()); - } - } + + TreeSet regionsToAssign = getRegionsToAssign(); int nRegionsToAssign = regionsToAssign.size(); - - if (nRegionsToAssign > 0) { - if (serversToServerInfo.size() == 1) { - // Only one server. An unlikely case but still possible. - // Assign all unassigned regions to it. - - for (Text regionName: regionsToAssign) { - HRegionInfo regionInfo = unassignedRegions.get(regionName); - LOG.info("assigning region " + regionName + " to server " + - serverName); - - assignAttempts.put(regionName, Long.valueOf(now)); - returnMsgs.add(new HMsg(HMsg.MSG_REGION_OPEN, regionInfo)); - } - - } else { - // Multiple servers in play. - // We need to allocate regions only to most lightly loaded servers. - - HServerLoad thisServersLoad = info.getLoad(); - - synchronized (serversToServerInfo) { - SortedMap> lightServers = - loadToServers.headMap(thisServersLoad); - - // How many regions we can assign to more lightly loaded servers? - - int nregions = 0; - for (Map.Entry> e: lightServers.entrySet()) { - HServerLoad lightLoad = - new HServerLoad(e.getKey().getNumberOfRequests(), - e.getKey().getNumberOfRegions()); - - do { - lightLoad.setNumberOfRegions(lightLoad.getNumberOfRegions() + 1); - nregions += 1; - - } while (lightLoad.compareTo(thisServersLoad) <= 0 && - nregions < nRegionsToAssign); - - nregions *= e.getValue().size(); - - if (nregions >= nRegionsToAssign) { - break; - } + if (nRegionsToAssign <= 0) { + // No regions to assign. Return. + return; + } + + if (this.serversToServerInfo.size() == 1) { + assignRegionsToOneServer(regionsToAssign, serverName, returnMsgs); + // Finished. Return. + return; + } + + // Multiple servers in play. + // We need to allocate regions only to most lightly loaded servers. + HServerLoad thisServersLoad = info.getLoad(); + synchronized (this.serversToServerInfo) { + int nregions = regionsPerServer(nRegionsToAssign, thisServersLoad); + nRegionsToAssign -= nregions; + if (nRegionsToAssign > 0) { + // We still have more regions to assign. See how many we can assign + // before this server becomes more heavily loaded than the next + // most heavily loaded server. + SortedMap> heavyServers = + this.loadToServers.tailMap(thisServersLoad); + int nservers = 0; + HServerLoad heavierLoad = null; + for (Map.Entry> e : heavyServers.entrySet()) { + Set servers = e.getValue(); + nservers += servers.size(); + if (e.getKey().compareTo(thisServersLoad) == 0) { + // This is the load factor of the server we are considering + nservers -= 1; + continue; } - nRegionsToAssign -= nregions; - if (nRegionsToAssign > 0) { - // We still have more regions to assign. See how many we can assign - // before this server becomes more heavily loaded than the next - // most heavily loaded server. - - SortedMap> heavyServers = - loadToServers.tailMap(thisServersLoad); - int nservers = 0; - HServerLoad heavierLoad = null; - for (Map.Entry> e: - heavyServers.entrySet()) { - - Set servers = e.getValue(); - nservers += servers.size(); - - if (e.getKey().compareTo(thisServersLoad) == 0) { - // This is the load factor of the server we are considering - - nservers -= 1; - continue; - } - - // If we get here, we are at the first load entry that is a - // heavier load than the server we are considering - - heavierLoad = e.getKey(); - break; - } + // If we get here, we are at the first load entry that is a + // heavier load than the server we are considering + heavierLoad = e.getKey(); + break; + } - nregions = 0; - if (heavierLoad != null) { - // There is a more heavily loaded server - - for (HServerLoad load = - new HServerLoad(thisServersLoad.getNumberOfRequests(), - thisServersLoad.getNumberOfRegions()); - load.compareTo(heavierLoad) <= 0 && nregions < nRegionsToAssign; - load.setNumberOfRegions(load.getNumberOfRegions() + 1), + nregions = 0; + if (heavierLoad != null) { + // There is a more heavily loaded server + for (HServerLoad load = + new HServerLoad(thisServersLoad.getNumberOfRequests(), + thisServersLoad.getNumberOfRegions()); + load.compareTo(heavierLoad) <= 0 && + nregions < nRegionsToAssign; + load.setNumberOfRegions(load.getNumberOfRegions() + 1), nregions++) { - } - } - - if (nregions < nRegionsToAssign) { - // There are some more heavily loaded servers - // but we can't assign all the regions to this server. - - if (nservers > 0) { - // There are other servers that can share the load. - // Split regions that need assignment across the servers. - - nregions = - (int) Math.ceil((1.0 * nRegionsToAssign) / (1.0 * nservers)); - - } else { - // No other servers with same load. - // Split regions over all available servers - - nregions = (int) Math.ceil((1.0 * nRegionsToAssign) - / (1.0 * serversToServerInfo.size())); - - } - - } else { - // Assign all regions to this server - - nregions = nRegionsToAssign; - } - - for (Map.Entry e: unassignedRegions.entrySet()) { - Text regionName = e.getKey(); - HRegionInfo regionInfo = e.getValue(); - LOG.info("assigning region " + regionName + " to server " + - serverName); + // continue; + } + } - assignAttempts.put(regionName, Long.valueOf(now)); - returnMsgs.add(new HMsg(HMsg.MSG_REGION_OPEN, regionInfo)); + if (nregions < nRegionsToAssign) { + // There are some more heavily loaded servers + // but we can't assign all the regions to this server. + if (nservers > 0) { + // There are other servers that can share the load. + // Split regions that need assignment across the servers. + nregions = (int) Math.ceil((1.0 * nRegionsToAssign) + / (1.0 * nservers)); + } else { + // No other servers with same load. + // Split regions over all available servers + nregions = (int) Math.ceil((1.0 * nRegionsToAssign) + / (1.0 * serversToServerInfo.size())); + } + } else { + // Assign all regions to this server + nregions = nRegionsToAssign; + } - if (--nregions <= 0) { - break; - } - } + long now = System.currentTimeMillis(); + for (Text regionName: regionsToAssign) { + HRegionInfo regionInfo = this.unassignedRegions.get(regionName); + LOG.info("assigning region " + regionName + " to server " + + serverName); + this.assignAttempts.put(regionName, Long.valueOf(now)); + returnMsgs.add(new HMsg(HMsg.MSG_REGION_OPEN, regionInfo)); + if (--nregions <= 0) { + break; } } } } } + + /* + * @param nRegionsToAssign + * @param thisServersLoad + * @return How many regions we can assign to more lightly loaded servers + */ + private int regionsPerServer(final int nRegionsToAssign, + final HServerLoad thisServersLoad) { + SortedMap> lightServers = + this.loadToServers.headMap(thisServersLoad); + + int nRegions = 0; + for (Map.Entry> e : lightServers.entrySet()) { + HServerLoad lightLoad = new HServerLoad(e.getKey() + .getNumberOfRequests(), e.getKey().getNumberOfRegions()); + do { + lightLoad.setNumberOfRegions(lightLoad.getNumberOfRegions() + 1); + nRegions += 1; + } while (lightLoad.compareTo(thisServersLoad) <= 0 + && nRegions < nRegionsToAssign); + + nRegions *= e.getValue().size(); + if (nRegions >= nRegionsToAssign) { + break; + } + } + return nRegions; + } + + /* + * Assign all to the only server. An unlikely case but still possible. @param + * regionsToAssign @param serverName @param returnMsgs + */ + private void assignRegionsToOneServer(final TreeSet regionsToAssign, + final String serverName, final ArrayList returnMsgs) { + long now = System.currentTimeMillis(); + for (Text regionName: regionsToAssign) { + HRegionInfo regionInfo = this.unassignedRegions.get(regionName); + LOG.info("assigning region " + regionName + " to the only server " + + serverName); + this.assignAttempts.put(regionName, Long.valueOf(now)); + returnMsgs.add(new HMsg(HMsg.MSG_REGION_OPEN, regionInfo)); + } + } + + /* + * @return List of regions to assign. + */ + private TreeSet getRegionsToAssign() { + long now = System.currentTimeMillis(); + TreeSet regionsToAssign = new TreeSet(); + for (Map.Entry e: this.assignAttempts.entrySet()) { + long diff = now - e.getValue().longValue(); + if (diff > this.maxRegionOpenTime) { + regionsToAssign.add(e.getKey()); + } + } + return regionsToAssign; + } /* * Some internal classes to manage msg-passing and client operations @@ -2114,7 +2113,8 @@ private HServerAddress serverAddress; private byte [] startCode; - PendingOpenReport(HServerInfo info, HRegionInfo region) throws IOException { + PendingOpenReport(HServerInfo info, HRegionInfo region) + throws IOException { if (region.tableDesc.getName().equals(META_TABLE_NAME)) { // The region which just came on-line is a META region. // We need to look in the ROOT region for its information. @@ -2147,7 +2147,6 @@ this.serverAddress.toString()); // Register the newly-available Region's location. - Text metaRegionName; HRegionInterface server; if (rootRegion) { @@ -2163,7 +2162,6 @@ } metaRegionName = HGlobals.rootRegionInfo.regionName; server = connection.getHRegionConnection(rootRegionLocation.get()); - } else { if (!rootScanned || numberOfMetaRegions.get() != onlineMetaRegions.size()) { @@ -2185,7 +2183,6 @@ MetaRegion r = null; if (onlineMetaRegions.containsKey(region.getRegionName())) { r = onlineMetaRegions.get(region.getRegionName()); - } else { r = onlineMetaRegions.get(onlineMetaRegions.headMap( region.getRegionName()).lastKey()); @@ -2194,15 +2191,15 @@ server = connection.getHRegionConnection(r.server); } LOG.info("updating row " + region.getRegionName() + " in table " + - metaRegionName); + metaRegionName + " with startcode " + + Writables.bytesToLong(this.startCode) + " and server "+ + serverAddress.toString()); try { BatchUpdate b = new BatchUpdate(); long lockid = b.startUpdate(region.getRegionName()); - b.put(lockid, COL_SERVER, - Writables.stringToBytes(serverAddress.toString())); - + Writables.stringToBytes(serverAddress.toString())); b.put(lockid, COL_STARTCODE, startCode); server.batchUpdate(metaRegionName, System.currentTimeMillis(), b); @@ -2376,8 +2373,8 @@ // 5. Get it assigned to a server - unassignedRegions.put(regionName, info); - assignAttempts.put(regionName, Long.valueOf(0L)); + this.unassignedRegions.put(regionName, info); + this.assignAttempts.put(regionName, Long.valueOf(0L)); } finally { synchronized (tableInCreation) {