hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From d...@apache.org
Subject svn commit: r1481477 [2/2] - in /hbase/branches/0.95: hbase-client/src/main/java/org/apache/hadoop/hbase/ hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ hbase-protoc...
Date Sun, 12 May 2013 06:55:39 GMT
Modified: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Sun May 12 06:55:38 2013
@@ -62,6 +62,8 @@ import org.apache.hadoop.hbase.exception
 import org.apache.hadoop.hbase.executor.EventHandler;
 import org.apache.hadoop.hbase.executor.EventType;
 import org.apache.hadoop.hbase.executor.ExecutorService;
+import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
+import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
 import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
 import org.apache.hadoop.hbase.master.handler.DisableTableHandler;
 import org.apache.hadoop.hbase.master.handler.EnableTableHandler;
@@ -73,6 +75,7 @@ import org.apache.hadoop.hbase.util.Envi
 import org.apache.hadoop.hbase.util.KeyLocker;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.util.Triple;
 import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKTable;
@@ -106,6 +109,8 @@ public class AssignmentManager extends Z
 
   private ServerManager serverManager;
 
+  private boolean shouldAssignRegionsWithFavoredNodes;
+
   private CatalogTracker catalogTracker;
 
   protected final TimeoutMonitor timeoutMonitor;
@@ -218,6 +223,10 @@ public class AssignmentManager extends Z
     this.regionsToReopen = Collections.synchronizedMap
                            (new HashMap<String, HRegionInfo> ());
     Configuration conf = server.getConfiguration();
+    // Only read favored nodes if using the favored nodes load balancer.
+    this.shouldAssignRegionsWithFavoredNodes = conf.getClass(
+           HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals(
+           FavoredNodeLoadBalancer.class);
     this.tomActivated = conf.getBoolean("hbase.assignment.timeout.management", false);
     if (tomActivated){
       this.serversInUpdatingTimer =  new ConcurrentSkipListSet<ServerName>();
@@ -971,6 +980,24 @@ public class AssignmentManager extends Z
     return false;
   }
 
+  // TODO: processFavoredNodes might throw an exception, for e.g., if the
+  // meta could not be contacted/updated. We need to see how seriously to treat
+  // this problem as. Should we fail the current assignment. We should be able
+  // to recover from this problem eventually (if the meta couldn't be updated
+  // things should work normally and eventually get fixed up).
+  void processFavoredNodes(List<HRegionInfo> regions) throws IOException {
+    if (!shouldAssignRegionsWithFavoredNodes) return;
+    // The AM gets the favored nodes info for each region and updates the meta
+    // table with that info
+    Map<HRegionInfo, List<ServerName>> regionToFavoredNodes =
+        new HashMap<HRegionInfo, List<ServerName>>();
+    for (HRegionInfo region : regions) {
+      regionToFavoredNodes.put(region,
+          ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region));
+    }
+    FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes, catalogTracker);
+  }
+
   /**
    * If the passed regionState is in PENDING_CLOSE, clean up PENDING_CLOSE
    * state and convert it to SPLITTING instead.
@@ -1495,8 +1522,8 @@ public class AssignmentManager extends Z
       // that unnecessary timeout on RIT is reduced.
       this.addPlans(plans);
 
-      List<Pair<HRegionInfo, Integer>> regionOpenInfos =
-        new ArrayList<Pair<HRegionInfo, Integer>>(states.size());
+      List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos =
+        new ArrayList<Triple<HRegionInfo, Integer, List<ServerName>>>(states.size());
       for (RegionState state: states) {
         HRegionInfo region = state.getRegion();
         String encodedRegionName = region.getEncodedName();
@@ -1509,8 +1536,12 @@ public class AssignmentManager extends Z
         } else {
           regionStates.updateRegionState(region,
             RegionState.State.PENDING_OPEN, destination);
-          regionOpenInfos.add(new Pair<HRegionInfo, Integer>(
-            region, nodeVersion));
+          List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
+          if (this.shouldAssignRegionsWithFavoredNodes) {
+            favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
+          }
+          regionOpenInfos.add(new Triple<HRegionInfo, Integer,  List<ServerName>>(
+            region, nodeVersion, favoredNodes));
         }
       }
 
@@ -1787,8 +1818,12 @@ public class AssignmentManager extends Z
       final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
           " to " + plan.getDestination();
       try {
+        List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
+        if (this.shouldAssignRegionsWithFavoredNodes) {
+          favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
+        }
         regionOpenState = serverManager.sendRegionOpen(
-            plan.getDestination(), region, versionOfOfflineNode);
+            plan.getDestination(), region, versionOfOfflineNode, favoredNodes);
 
         if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
           // Failed opening this region, looping again on a new server.
@@ -2027,6 +2062,15 @@ public class AssignmentManager extends Z
         newPlan = true;
         randomPlan = new RegionPlan(region, null,
             balancer.randomAssignment(region, destServers));
+        if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) {
+          List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
+          regions.add(region);
+          try {
+            processFavoredNodes(regions);
+          } catch (IOException ie) {
+            LOG.warn("Ignoring exception in processFavoredNodes " + ie);
+          }
+        }
         this.regionPlans.put(encodedName, randomPlan);
       }
     }
@@ -2345,6 +2389,7 @@ public class AssignmentManager extends Z
     // Generate a round-robin bulk assignment plan
     Map<ServerName, List<HRegionInfo>> bulkPlan
       = balancer.roundRobinAssignment(regions, servers);
+    processFavoredNodes(regions);
 
     assign(regions.size(), servers.size(),
       "round-robin=true", bulkPlan);
@@ -2402,8 +2447,14 @@ public class AssignmentManager extends Z
     Set<String> disabledOrDisablingOrEnabling = ZKTable.getDisabledOrDisablingTables(watcher);
     disabledOrDisablingOrEnabling.addAll(ZKTable.getEnablingTables(watcher));
     // Scan META for all user regions, skipping any disabled tables
-    Map<HRegionInfo, ServerName> allRegions = MetaReader.fullScan(
-      catalogTracker, disabledOrDisablingOrEnabling, true);
+    Map<HRegionInfo, ServerName> allRegions = null;
+    if (this.shouldAssignRegionsWithFavoredNodes) {
+      allRegions = FavoredNodeAssignmentHelper.fullScan(
+        catalogTracker, disabledOrDisablingOrEnabling, true, (FavoredNodeLoadBalancer)balancer);
+    } else {
+      allRegions = MetaReader.fullScan(
+        catalogTracker, disabledOrDisablingOrEnabling, true);
+    }
     if (allRegions == null || allRegions.isEmpty()) return;
 
     // Determine what type of assignment to do on startup

Added: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java?rev=1481477&view=auto
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java (added)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RackManager.java Sun May 12 06:55:38 2013
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.util.ReflectionUtils;
+import org.apache.hadoop.net.DNSToSwitchMapping;
+import org.apache.hadoop.net.ScriptBasedMapping;
+/**
+ * Wrapper over the rack resolution utility in Hadoop. The rack resolution
+ * utility in Hadoop does resolution from hosts to the racks they belong to.
+ *
+ */
+@InterfaceAudience.Private
+public class RackManager {
+  static final Log LOG = LogFactory.getLog(RackManager.class);
+  public static final String UNKNOWN_RACK = "Unknown Rack";
+
+  private DNSToSwitchMapping switchMapping;
+
+  public RackManager(Configuration conf) {
+    switchMapping = ReflectionUtils.instantiateWithCustomCtor(
+        conf.getClass("hbase.util.ip.to.rack.determiner", ScriptBasedMapping.class,
+             DNSToSwitchMapping.class).getName(), new Class<?>[]{Configuration.class},
+               new Object[]{conf});
+  }
+
+  /**
+   * Get the name of the rack containing a server, according to the DNS to
+   * switch mapping.
+   * @param server the server for which to get the rack name
+   * @return the rack name of the server
+   */
+  public String getRack(ServerName server) {
+    if (server == null) {
+      return UNKNOWN_RACK;
+    }
+    // just a note - switchMapping caches results (at least the implementation should unless the
+    // resolution is really a lightweight process)
+    List<String> racks = switchMapping.resolve(Arrays.asList(server.getHostname()));
+    if (racks != null && !racks.isEmpty()) {
+      return racks.get(0);
+    }
+
+    return UNKNOWN_RACK;
+  }
+}

Modified: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java (original)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java Sun May 12 06:55:38 2013
@@ -61,6 +61,7 @@ import org.apache.hadoop.hbase.protobuf.
 import org.apache.hadoop.hbase.regionserver.RegionOpeningState;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
 
 import com.google.protobuf.ServiceException;
 
@@ -587,9 +588,10 @@ public class ServerManager {
    * @param region region to open
    * @param versionOfOfflineNode that needs to be present in the offline node
    * when RS tries to change the state from OFFLINE to other states.
+   * @param favoredNodes
    */
   public RegionOpeningState sendRegionOpen(final ServerName server,
-      HRegionInfo region, int versionOfOfflineNode)
+      HRegionInfo region, int versionOfOfflineNode, List<ServerName> favoredNodes)
   throws IOException {
     AdminService.BlockingInterface admin = getRsAdmin(server);
     if (admin == null) {
@@ -598,7 +600,7 @@ public class ServerManager {
       return RegionOpeningState.FAILED_OPENING;
     }
     OpenRegionRequest request =
-      RequestConverter.buildOpenRegionRequest(region, versionOfOfflineNode);
+      RequestConverter.buildOpenRegionRequest(region, versionOfOfflineNode, favoredNodes);
     try {
       OpenRegionResponse response = admin.openRegion(null, request);
       return ResponseConverter.getRegionOpeningState(response);
@@ -617,7 +619,7 @@ public class ServerManager {
    * @return a list of region opening states
    */
   public List<RegionOpeningState> sendRegionOpen(ServerName server,
-      List<Pair<HRegionInfo, Integer>> regionOpenInfos)
+      List<Triple<HRegionInfo, Integer, List<ServerName>>> regionOpenInfos)
   throws IOException {
     AdminService.BlockingInterface admin = getRsAdmin(server);
     if (admin == null) {

Added: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java?rev=1481477&view=auto
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java (added)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeAssignmentHelper.java Sun May 12 06:55:38 2013
@@ -0,0 +1,444 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeMap;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.catalog.CatalogTracker;
+import org.apache.hadoop.hbase.catalog.MetaEditor;
+import org.apache.hadoop.hbase.catalog.MetaReader;
+import org.apache.hadoop.hbase.catalog.MetaReader.Visitor;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.master.RackManager;
+import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
+import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
+import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.FavoredNodes;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
+import org.apache.hadoop.hbase.util.Pair;
+
+import com.google.protobuf.InvalidProtocolBufferException;
+
+/**
+ * Helper class for {@link FavoredNodeLoadBalancer} that has all the intelligence
+ * for racks, meta scans, etc. Instantiated by the {@link FavoredNodeLoadBalancer}
+ * when needed (from within calls like
+ * {@link FavoredNodeLoadBalancer#randomAssignment(HRegionInfo, List)}).
+ *
+ */
+@InterfaceAudience.Private
+public class FavoredNodeAssignmentHelper {
+  private static final Log LOG = LogFactory.getLog(FavoredNodeAssignmentHelper.class);
+  private RackManager rackManager;
+  private Map<String, List<ServerName>> rackToRegionServerMap;
+  private List<String> uniqueRackList;
+  private Map<ServerName, String> regionServerToRackMap;
+  private Random random;
+  private List<ServerName> servers;
+  public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("fn");
+  public final static short FAVORED_NODES_NUM = 3;
+
+  public FavoredNodeAssignmentHelper(final List<ServerName> servers, Configuration conf) {
+    this.servers = servers;
+    this.rackManager = new RackManager(conf);
+    this.rackToRegionServerMap = new HashMap<String, List<ServerName>>();
+    this.regionServerToRackMap = new HashMap<ServerName, String>();
+    this.uniqueRackList = new ArrayList<String>();
+    this.random = new Random();
+  }
+
+  // For unit tests
+  void setRackManager(RackManager rackManager) {
+    this.rackManager = rackManager;
+  }
+
+  /**
+   * Perform full scan of the meta table similar to
+   * {@link MetaReader#fullScan(CatalogTracker, Set, boolean)} except that this is
+   * aware of the favored nodes
+   * @param catalogTracker
+   * @param disabledTables
+   * @param excludeOfflinedSplitParents
+   * @param balancer required because we need to let the balancer know about the
+   * current favored nodes from meta scan
+   * @return Returns a map of every region to it's currently assigned server,
+   * according to META.  If the region does not have an assignment it will have
+   * a null value in the map.
+   * @throws IOException
+   */
+  public static Map<HRegionInfo, ServerName> fullScan(
+      CatalogTracker catalogTracker, final Set<String> disabledTables,
+      final boolean excludeOfflinedSplitParents,
+      FavoredNodeLoadBalancer balancer) throws IOException {
+    final Map<HRegionInfo, ServerName> regions =
+        new TreeMap<HRegionInfo, ServerName>();
+    final Map<HRegionInfo, ServerName[]> favoredNodesMap =
+        new HashMap<HRegionInfo, ServerName[]>();
+    Visitor v = new Visitor() {
+      @Override
+      public boolean visit(Result r) throws IOException {
+        if (r ==  null || r.isEmpty()) return true;
+        Pair<HRegionInfo, ServerName> region = HRegionInfo.getHRegionInfoAndServerName(r);
+        HRegionInfo hri = region.getFirst();
+        if (hri  == null) return true;
+        if (hri.getTableNameAsString() == null) return true;
+        if (disabledTables.contains(
+            hri.getTableNameAsString())) return true;
+        // Are we to include split parents in the list?
+        if (excludeOfflinedSplitParents && hri.isSplitParent()) return true;
+        regions.put(hri, region.getSecond());
+        byte[] favoredNodes = r.getValue(HConstants.CATALOG_FAMILY,
+               FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER);
+        if (favoredNodes != null) {
+          ServerName[] favoredServerList =
+            FavoredNodeAssignmentHelper.getFavoredNodesList(favoredNodes);
+          favoredNodesMap.put(hri, favoredServerList);
+        }
+        return true;
+      }
+    };
+    MetaReader.fullScan(catalogTracker, v);
+    balancer.noteFavoredNodes(favoredNodesMap);
+    return regions;
+  }
+
+  public static void updateMetaWithFavoredNodesInfo(
+      Map<HRegionInfo, List<ServerName>> regionToFavoredNodes,
+      CatalogTracker catalogTracker) throws IOException {
+    List<Put> puts = new ArrayList<Put>();
+    for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
+      Put put = makePutFromRegionInfo(entry.getKey(), entry.getValue());
+      if (put != null) {
+        puts.add(put);
+      }
+    }
+    MetaEditor.putsToMetaTable(catalogTracker, puts);
+    LOG.info("Added " + puts.size() + " regions in META");
+  }
+
+  /**
+   * Generates and returns a Put containing the region info for the catalog table
+   * and the servers
+   * @param regionInfo
+   * @param favoredNodeList
+   * @return Put object
+   */
+  static Put makePutFromRegionInfo(HRegionInfo regionInfo, List<ServerName>favoredNodeList)
+  throws IOException {
+    Put put = null;
+    if (favoredNodeList != null) {
+      put = MetaEditor.makePutFromRegionInfo(regionInfo);
+      byte[] favoredNodes = getFavoredNodes(favoredNodeList);
+      put.add(HConstants.CATALOG_FAMILY, FAVOREDNODES_QUALIFIER,
+          EnvironmentEdgeManager.currentTimeMillis(), favoredNodes);
+      LOG.info("Create the region " + regionInfo.getRegionNameAsString() +
+          " with favored nodes " + favoredNodes);
+    }
+    return put;
+  }
+
+  /**
+   * @param favoredNodes The PB'ed bytes of favored nodes
+   * @return the array of {@link ServerName} for the byte array of favored nodes.
+   * @throws InvalidProtocolBufferException
+   */
+  public static ServerName[] getFavoredNodesList(byte[] favoredNodes)
+      throws InvalidProtocolBufferException {
+    FavoredNodes f = FavoredNodes.parseFrom(favoredNodes);
+    List<HBaseProtos.ServerName> protoNodes = f.getFavoredNodeList();
+    ServerName[] servers = new ServerName[protoNodes.size()];
+    int i = 0;
+    for (HBaseProtos.ServerName node : protoNodes) {
+      servers[i++] = ProtobufUtil.toServerName(node);
+    }
+    return servers;
+  }
+
+  /**
+   * @param serverList
+   * @return PB'ed bytes of {@link FavoredNodes} generated by the server list.
+   */
+  static byte[] getFavoredNodes(List<ServerName> serverAddrList) {
+    FavoredNodes.Builder f = FavoredNodes.newBuilder();
+    for (ServerName s : serverAddrList) {
+      HBaseProtos.ServerName.Builder b = HBaseProtos.ServerName.newBuilder();
+      b.setHostName(s.getHostname());
+      b.setPort(s.getPort());
+      b.setStartCode(s.getStartcode());
+      f.addFavoredNode(b.build());
+    }
+    return f.build().toByteArray();
+  }
+
+  // Place the regions round-robin across the racks picking one server from each
+  // rack at a time. For example, if 2 racks (r1 and r2) with 8 servers (s1..s8) each, it will
+  // choose s1 from r1, s1 from r2, s2 from r1, s2 from r2, ...
+  void placePrimaryRSAsRoundRobin(Map<ServerName, List<HRegionInfo>> assignmentMap,
+      Map<HRegionInfo, ServerName> primaryRSMap, List<HRegionInfo> regions) {
+    List<String> rackList = new ArrayList<String>(rackToRegionServerMap.size());
+    rackList.addAll(rackToRegionServerMap.keySet());
+    Map<String, Integer> currentProcessIndexMap = new HashMap<String, Integer>();
+    int rackIndex = 0;
+    for (HRegionInfo regionInfo : regions) {
+      String rackName = rackList.get(rackIndex);
+      // Initialize the current processing host index.
+      int serverIndex = 0;
+      // Restore the current process index from the currentProcessIndexMap
+      Integer currentProcessIndex = currentProcessIndexMap.get(rackName);
+      if (currentProcessIndex != null) {
+        serverIndex = currentProcessIndex.intValue();
+      }
+      // Get the server list for the current rack
+      List<ServerName> currentServerList = rackToRegionServerMap.get(rackName);
+
+      // Get the current process region server
+      ServerName currentServer = currentServerList.get(serverIndex);
+
+      // Place the current region with the current primary region server
+      primaryRSMap.put(regionInfo, currentServer);
+      List<HRegionInfo> regionsForServer = assignmentMap.get(currentServer);
+      if (regionsForServer == null) {
+        regionsForServer = new ArrayList<HRegionInfo>();
+        assignmentMap.put(currentServer, regionsForServer);
+      }
+      regionsForServer.add(regionInfo);
+
+      // Set the next processing index
+      if ((++serverIndex) >= currentServerList.size()) {
+        // Reset the server index for the current rack
+        serverIndex = 0;
+      }
+      // Keep track of the next processing index
+      currentProcessIndexMap.put(rackName, serverIndex);
+      if ((++rackIndex) >= rackList.size()) {
+        rackIndex = 0; // reset the rack index to 0
+      }
+    }
+  }
+
+  Map<HRegionInfo, ServerName[]> placeSecondaryAndTertiaryRS(
+      Map<HRegionInfo, ServerName> primaryRSMap) {
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
+        new HashMap<HRegionInfo, ServerName[]>();
+    for (Map.Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) {
+      // Get the target region and its primary region server rack
+      HRegionInfo regionInfo = entry.getKey();
+      ServerName primaryRS = entry.getValue();
+      try {
+        // Create the secondary and tertiary region server pair object.
+        ServerName[] favoredNodes;
+        // Get the rack for the primary region server
+        String primaryRack = rackManager.getRack(primaryRS);
+
+        if (getTotalNumberOfRacks() == 1) {
+          favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack);
+        } else {
+          favoredNodes = multiRackCase(regionInfo, primaryRS, primaryRack);
+        }
+        if (favoredNodes != null) {
+          secondaryAndTertiaryMap.put(regionInfo, favoredNodes);
+          LOG.debug("Place the secondary and tertiary region server for region "
+              + regionInfo.getRegionNameAsString());
+        }
+      } catch (Exception e) {
+        LOG.warn("Cannot place the favored nodes for region " +
+            regionInfo.getRegionNameAsString() + " because " + e);
+        continue;
+      }
+    }
+    return secondaryAndTertiaryMap;
+  }
+
+  private ServerName[] singleRackCase(HRegionInfo regionInfo,
+      ServerName primaryRS,
+      String primaryRack) throws IOException {
+    // Single rack case: have to pick the secondary and tertiary
+    // from the same rack
+    List<ServerName> serverList = getServersFromRack(primaryRack);
+    if (serverList.size() <= 2) {
+      // Single region server case: cannot not place the favored nodes
+      // on any server; !domain.canPlaceFavoredNodes()
+      return null;
+    } else {
+      // Randomly select two region servers from the server list and make sure
+      // they are not overlap with the primary region server;
+     Set<ServerName> serverSkipSet = new HashSet<ServerName>();
+     serverSkipSet.add(primaryRS);
+
+     // Place the secondary RS
+     ServerName secondaryRS = getOneRandomServer(primaryRack, serverSkipSet);
+     // Skip the secondary for the tertiary placement
+     serverSkipSet.add(secondaryRS);
+
+     // Place the tertiary RS
+     ServerName tertiaryRS =
+       getOneRandomServer(primaryRack, serverSkipSet);
+
+     if (secondaryRS == null || tertiaryRS == null) {
+       LOG.error("Cannot place the secondary and terinary" +
+           "region server for region " +
+           regionInfo.getRegionNameAsString());
+     }
+     // Create the secondary and tertiary pair
+     ServerName[] favoredNodes = new ServerName[2];
+     favoredNodes[0] = secondaryRS;
+     favoredNodes[1] = tertiaryRS;
+     return favoredNodes;
+    }
+  }
+
+  private ServerName[] multiRackCase(HRegionInfo regionInfo,
+      ServerName primaryRS,
+      String primaryRack) throws IOException {
+
+    // Random to choose the secondary and tertiary region server
+    // from another rack to place the secondary and tertiary
+
+    // Random to choose one rack except for the current rack
+    Set<String> rackSkipSet = new HashSet<String>();
+    rackSkipSet.add(primaryRack);
+    ServerName[] favoredNodes = new ServerName[2];
+    String secondaryRack = getOneRandomRack(rackSkipSet);
+    List<ServerName> serverList = getServersFromRack(secondaryRack);
+    if (serverList.size() >= 2) {
+      // Randomly pick up two servers from this secondary rack
+
+      // Place the secondary RS
+      ServerName secondaryRS = getOneRandomServer(secondaryRack);
+
+      // Skip the secondary for the tertiary placement
+      Set<ServerName> skipServerSet = new HashSet<ServerName>();
+      skipServerSet.add(secondaryRS);
+      // Place the tertiary RS
+      ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet);
+
+      if (secondaryRS == null || tertiaryRS == null) {
+        LOG.error("Cannot place the secondary and terinary" +
+            "region server for region " +
+            regionInfo.getRegionNameAsString());
+      }
+      // Create the secondary and tertiary pair
+      favoredNodes[0] = secondaryRS;
+      favoredNodes[1] = tertiaryRS;
+    } else {
+      // Pick the secondary rs from this secondary rack
+      // and pick the tertiary from another random rack
+      favoredNodes[0] = getOneRandomServer(secondaryRack);
+
+      // Pick the tertiary
+      if (getTotalNumberOfRacks() == 2) {
+        // Pick the tertiary from the same rack of the primary RS
+        Set<ServerName> serverSkipSet = new HashSet<ServerName>();
+        serverSkipSet.add(primaryRS);
+        favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet);
+      } else {
+        // Pick the tertiary from another rack
+        rackSkipSet.add(secondaryRack);
+        String tertiaryRandomRack = getOneRandomRack(rackSkipSet);
+        favoredNodes[1] = getOneRandomServer(tertiaryRandomRack);
+      }
+    }
+    return favoredNodes;
+  }
+
+  boolean canPlaceFavoredNodes() {
+    int serverSize = this.regionServerToRackMap.size();
+    return (serverSize >= FAVORED_NODES_NUM);
+  }
+
+  void initialize() {
+    for (ServerName sn : this.servers) {
+      String rackName = this.rackManager.getRack(sn);
+      List<ServerName> serverList = this.rackToRegionServerMap.get(rackName);
+      if (serverList == null) {
+        serverList = new ArrayList<ServerName>();
+        // Add the current rack to the unique rack list
+        this.uniqueRackList.add(rackName);
+      }
+      if (!serverList.contains(sn)) {
+        serverList.add(sn);
+        this.rackToRegionServerMap.put(rackName, serverList);
+        this.regionServerToRackMap.put(sn, rackName);
+      }
+    }
+  }
+
+  private int getTotalNumberOfRacks() {
+    return this.uniqueRackList.size();
+  }
+
+  private List<ServerName> getServersFromRack(String rack) {
+    return this.rackToRegionServerMap.get(rack);
+  }
+
+  private ServerName getOneRandomServer(String rack,
+      Set<ServerName> skipServerSet) throws IOException {
+    if(rack == null) return null;
+    List<ServerName> serverList = this.rackToRegionServerMap.get(rack);
+    if (serverList == null) return null;
+
+    // Get a random server except for any servers from the skip set
+    if (skipServerSet != null && serverList.size() <= skipServerSet.size()) {
+      throw new IOException("Cannot randomly pick another random server");
+    }
+
+    ServerName randomServer;
+    do {
+      int randomIndex = random.nextInt(serverList.size());
+      randomServer = serverList.get(randomIndex);
+    } while (skipServerSet != null && skipServerSet.contains(randomServer));
+
+    return randomServer;
+  }
+
+  private ServerName getOneRandomServer(String rack) throws IOException {
+    return this.getOneRandomServer(rack, null);
+  }
+
+  private String getOneRandomRack(Set<String> skipRackSet) throws IOException {
+    if (skipRackSet == null || uniqueRackList.size() <= skipRackSet.size()) {
+      throw new IOException("Cannot randomly pick another random server");
+    }
+
+    String randomRack;
+    do {
+      int randomIndex = random.nextInt(this.uniqueRackList.size());
+      randomRack = this.uniqueRackList.get(randomIndex);
+    } while (skipRackSet.contains(randomRack));
+
+    return randomRack;
+  }
+}

Added: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java?rev=1481477&view=auto
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java (added)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodeLoadBalancer.java Sun May 12 06:55:38 2013
@@ -0,0 +1,155 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.master.LoadBalancer;
+import org.apache.hadoop.hbase.master.RegionPlan;
+
+/**
+ * An implementation of the {@link LoadBalancer} that assigns favored nodes for
+ * each region. There is a Primary RegionServer that hosts the region, and then
+ * there is Secondary and Tertiary RegionServers. Currently, the favored nodes
+ * information is used in creating HDFS files - the Primary RegionServer passes
+ * the primary, secondary, tertiary node addresses as hints to the DistributedFileSystem
+ * API for creating files on the filesystem. These nodes are treated as hints by
+ * the HDFS to place the blocks of the file. This alleviates the problem to do with
+ * reading from remote nodes (since we can make the Secondary RegionServer as the new
+ * Primary RegionServer) after a region is recovered. This should help provide consistent
+ * read latencies for the regions even when their primary region servers die.
+ *
+ */
+@InterfaceAudience.Private
+public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
+  private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class);
+
+  private FavoredNodes globalFavoredNodesAssignmentPlan;
+  private Configuration configuration;
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.configuration = conf;
+    globalFavoredNodesAssignmentPlan = new FavoredNodes();
+  }
+
+  @Override
+  public List<RegionPlan> balanceCluster(Map<ServerName, List<HRegionInfo>> clusterState) {
+    //TODO. At a high level, this should look at the block locality per region, and
+    //then reassign regions based on which nodes have the most blocks of the region
+    //file(s). There could be different ways like minimize region movement, or, maximum
+    //locality, etc. The other dimension to look at is whether Stochastic loadbalancer
+    //can be integrated with this
+    throw new UnsupportedOperationException("Not implemented yet");
+  }
+
+  @Override
+  public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(List<HRegionInfo> regions,
+      List<ServerName> servers) {
+    Map<ServerName, List<HRegionInfo>> assignmentMap;
+    try {
+      FavoredNodeAssignmentHelper assignmentHelper =
+          new FavoredNodeAssignmentHelper(servers, configuration);
+      assignmentHelper.initialize();
+      if (!assignmentHelper.canPlaceFavoredNodes()) {
+        return super.roundRobinAssignment(regions, servers);
+      }
+      assignmentMap = new HashMap<ServerName, List<HRegionInfo>>();
+      roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regions, servers);
+    } catch (Exception ex) {
+      LOG.warn("Encountered exception while doing favored-nodes assignment " + ex +
+          " Falling back to regular assignment");
+      assignmentMap = super.roundRobinAssignment(regions, servers);
+    }
+    return assignmentMap;
+  }
+
+  @Override
+  public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
+    try {
+      FavoredNodeAssignmentHelper assignmentHelper =
+          new FavoredNodeAssignmentHelper(servers, configuration);
+      assignmentHelper.initialize();
+      ServerName primary = super.randomAssignment(regionInfo, servers);
+      if (!assignmentHelper.canPlaceFavoredNodes()) {
+        return primary;
+      }
+      List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
+      regions.add(regionInfo);
+      Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>(1);
+      primaryRSMap.put(regionInfo, primary);
+      assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
+      return primary;
+    } catch (Exception ex) {
+      LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex +
+          " Falling back to regular assignment");
+      return super.randomAssignment(regionInfo, servers);
+    }
+  }
+
+  public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
+    return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
+  }
+
+  private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
+      Map<ServerName, List<HRegionInfo>> assignmentMap,
+      List<HRegionInfo> regions, List<ServerName> servers) throws IOException {
+    Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
+    // figure the primary RSs
+    assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
+    assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap);
+  }
+
+  private void assignSecondaryAndTertiaryNodesForRegion(
+      FavoredNodeAssignmentHelper assignmentHelper,
+      List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) {
+    // figure the secondary and tertiary RSs
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
+        assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
+    // now record all the assignments so that we can serve queries later
+    for (HRegionInfo region : regions) {
+      List<ServerName> favoredNodesForRegion = new ArrayList<ServerName>(3);
+      favoredNodesForRegion.add(primaryRSMap.get(region));
+      ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region);
+      if (secondaryAndTertiaryNodes != null) {
+        favoredNodesForRegion.add(secondaryAndTertiaryNodes[0]);
+        favoredNodesForRegion.add(secondaryAndTertiaryNodes[1]);
+      }
+      globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion);
+    }
+  }
+
+  void noteFavoredNodes(final Map<HRegionInfo, ServerName[]> favoredNodesMap) {
+    for (Map.Entry<HRegionInfo, ServerName[]> entry : favoredNodesMap.entrySet()) {
+      globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(entry.getKey(),
+          Arrays.asList(entry.getValue()));
+    }
+  }
+}
\ No newline at end of file

Added: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java?rev=1481477&view=auto
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java (added)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/FavoredNodes.java Sun May 12 06:55:38 2013
@@ -0,0 +1,76 @@
+/**
+ * Copyright 2012 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.balancer;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.ServerName;
+import org.jboss.netty.util.internal.ConcurrentHashMap;
+
+/**
+ * This class contains the mapping information between each region and
+ * its favored region server list. Used by {@link FavoredNodeLoadBalancer} set
+ * of classes and from unit tests (hence the class is public)
+ *
+ * All the access to this class is thread-safe.
+ */
+@InterfaceAudience.Private
+public class FavoredNodes {
+  protected static final Log LOG = LogFactory.getLog(
+      FavoredNodes.class.getName());
+
+  /** the map between each region and its favored region server list */
+  private Map<HRegionInfo, List<ServerName>> favoredNodesMap;
+
+  public static enum Position {
+    PRIMARY,
+    SECONDARY,
+    TERTIARY;
+  };
+
+  public FavoredNodes() {
+    favoredNodesMap = new ConcurrentHashMap<HRegionInfo, List<ServerName>>();
+  }
+
+  /**
+   * Add an assignment to the plan
+   * @param region
+   * @param servers
+   */
+  public synchronized void updateFavoredNodesMap(HRegionInfo region,
+      List<ServerName> servers) {
+    if (region == null || servers == null || servers.size() ==0)
+      return;
+    this.favoredNodesMap.put(region, servers);
+  }
+
+  /**
+   * @param region
+   * @return the list of favored region server for this region based on the plan
+   */
+  public synchronized List<ServerName> getFavoredNodes(HRegionInfo region) {
+    return favoredNodesMap.get(region);
+  }
+}

Modified: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadBalancerFactory.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadBalancerFactory.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadBalancerFactory.java (original)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/LoadBalancerFactory.java Sun May 12 06:55:38 2013
@@ -30,7 +30,7 @@ import org.apache.hadoop.util.Reflection
 public class LoadBalancerFactory {
 
   /**
-   * Create a loadblanacer from the given conf.
+   * Create a loadbalancer from the given conf.
    * @param conf
    * @return A {@link LoadBalancer}
    */

Modified: hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.95/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Sun May 12 06:55:38 2013
@@ -280,6 +280,18 @@ public class HRegionServer implements Cl
   protected final Map<String, HRegion> onlineRegions =
     new ConcurrentHashMap<String, HRegion>();
 
+  /**
+   * Map of encoded region names to the DataNode locations they should be hosted on
+   * We store the value as InetSocketAddress since this is used only in HDFS
+   * API (create() that takes favored nodes as hints for placing file blocks).
+   * We could have used ServerName here as the value class, but we'd need to
+   * convert it to InetSocketAddress at some point before the HDFS API call, and
+   * it seems a bit weird to store ServerName since ServerName refers to RegionServers
+   * and here we really mean DataNode locations.
+   */
+  protected final Map<String, InetSocketAddress[]> regionFavoredNodesMap =
+      new ConcurrentHashMap<String, InetSocketAddress[]>();
+
   // Leases
   protected Leases leases;
 
@@ -2425,6 +2437,10 @@ public class HRegionServer implements Cl
     return this.onlineRegions.get(encodedRegionName);
   }
 
+  public InetSocketAddress[] getRegionBlockLocations(final String encodedRegionName) {
+    return this.regionFavoredNodesMap.get(encodedRegionName);
+  }
+
   @Override
   public HRegion getFromOnlineRegions(final String encodedRegionName) {
     return this.onlineRegions.get(encodedRegionName);
@@ -2447,6 +2463,7 @@ public class HRegionServer implements Cl
       }
       addToMovedRegions(r.getRegionInfo().getEncodedName(), destination, closeSeqNum);
     }
+    this.regionFavoredNodesMap.remove(r.getRegionInfo().getEncodedName());
     return toReturn != null;
   }
 
@@ -3438,6 +3455,8 @@ public class HRegionServer implements Cl
             this.service.submit(new OpenMetaHandler(this, this, region, htd,
                 versionOfOfflineNode));
           } else {
+            updateRegionFavoredNodesMapping(region.getEncodedName(),
+                regionOpenInfo.getFavoredNodesList());
             this.service.submit(new OpenRegionHandler(this, this, region, htd,
                 versionOfOfflineNode));
           }
@@ -3458,6 +3477,28 @@ public class HRegionServer implements Cl
     return builder.build();
   }
 
+  private void updateRegionFavoredNodesMapping(String encodedRegionName,
+      List<org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName> favoredNodes) {
+    InetSocketAddress[] addr = new InetSocketAddress[favoredNodes.size()];
+    // Refer to the comment on the declaration of regionFavoredNodesMap on why
+    // it is a map of region name to InetSocketAddress[]
+    for (int i = 0; i < favoredNodes.size(); i++) {
+      addr[i] = InetSocketAddress.createUnresolved(favoredNodes.get(i).getHostName(),
+          favoredNodes.get(i).getPort());
+    }
+    regionFavoredNodesMap.put(encodedRegionName, addr);
+  }
+
+  /**
+   * Return the favored nodes for a region given its encoded name. Look at the
+   * comment around {@link #regionFavoredNodesMap} on why it is InetSocketAddress[]
+   * @param encodedRegionName
+   * @return array of favored locations
+   */
+  public InetSocketAddress[] getFavoredNodesForRegion(String encodedRegionName) {
+    return regionFavoredNodesMap.get(encodedRegionName);
+  }
+
   /**
    * Close a region on the region server.
    *

Modified: hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original)
+++ hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Sun May 12 06:55:38 2013
@@ -159,9 +159,9 @@ public class TestAssignmentManager {
     Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)).
       thenReturn(true);
     // Ditto on open.
-    Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1)).
+    Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)).
       thenReturn(RegionOpeningState.OPENED);
-    Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1)).
+    Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)).
       thenReturn(RegionOpeningState.OPENED);
     this.master = Mockito.mock(HMaster.class);
 

Modified: hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java (original)
+++ hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java Sun May 12 06:55:38 2013
@@ -188,7 +188,7 @@ public class TestMasterNoCluster {
         // Fake a successful open.
         Mockito.doReturn(RegionOpeningState.OPENED).when(spy).
           sendRegionOpen((ServerName)Mockito.any(), (HRegionInfo)Mockito.any(),
-            Mockito.anyInt());
+            Mockito.anyInt(), Mockito.anyListOf(ServerName.class));
         return spy;
       }
 
@@ -274,7 +274,7 @@ public class TestMasterNoCluster {
         // Fake a successful open.
         Mockito.doReturn(RegionOpeningState.OPENED).when(spy).
           sendRegionOpen((ServerName)Mockito.any(), (HRegionInfo)Mockito.any(),
-            Mockito.anyInt());
+            Mockito.anyInt(), Mockito.anyListOf(ServerName.class));
         return spy;
       }
 

Added: hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java?rev=1481477&view=auto
==============================================================================
--- hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java (added)
+++ hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java Sun May 12 06:55:38 2013
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.MiniHBaseCluster;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.MetaScanner;
+import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
+import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
+import org.apache.hadoop.hbase.master.balancer.FavoredNodes.Position;
+import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category(MediumTests.class)
+public class TestRegionPlacement {
+  final static Log LOG = LogFactory.getLog(TestRegionPlacement.class);
+  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private final static int SLAVES = 4;
+  private static HBaseAdmin admin;
+  private static Position[] positions = Position.values();
+  private int REGION_NUM = 10;
+  private Map<HRegionInfo, ServerName[]> favoredNodesAssignmentPlan =
+      new HashMap<HRegionInfo, ServerName[]>();
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    Configuration conf = TEST_UTIL.getConfiguration();
+    // Enable the favored nodes based load balancer
+    conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
+        FavoredNodeLoadBalancer.class, LoadBalancer.class);
+    TEST_UTIL.startMiniCluster(SLAVES);
+    admin = new HBaseAdmin(conf);
+  }
+
+  @AfterClass
+  public static void tearDownAfterClass() throws Exception {
+    TEST_UTIL.shutdownMiniCluster();
+  }
+
+  @Test
+  public void testGetFavoredNodes() {
+    LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration());
+    HRegionInfo regionInfo = new HRegionInfo("oneregion".getBytes());
+    List<ServerName> servers = new ArrayList<ServerName>();
+    for (int i = 0; i < 10; i++) {
+      ServerName server = new ServerName("foo"+i+":1234",-1);
+      servers.add(server);
+    }
+    // test that we have enough favored nodes after we call randomAssignment
+    balancer.randomAssignment(regionInfo, servers);
+    assertTrue(((FavoredNodeLoadBalancer)balancer).getFavoredNodes(regionInfo).size() == 3);
+    List<HRegionInfo> regions = new ArrayList<HRegionInfo>(100);
+    for (int i = 0; i < 100; i++) {
+      HRegionInfo region = new HRegionInfo(("foobar"+i).getBytes());
+      regions.add(region);
+    }
+    // test that we have enough favored nodes after we call roundRobinAssignment
+    balancer.roundRobinAssignment(regions, servers);
+    for (int i = 0; i < 100; i++) {
+      assertTrue(((FavoredNodeLoadBalancer)balancer).getFavoredNodes(regions.get(i)).size() == 3);
+    }
+  }
+
+  @Test(timeout = 180000)
+  public void testRegionPlacement() throws Exception {
+    // Create a table with REGION_NUM regions.
+    createTable("testRegionAssignment", REGION_NUM);
+
+    TEST_UTIL.waitTableAvailable(Bytes.toBytes("testRegionAssignment"));
+
+    // Verify all the user regions are assigned to the primary region server
+    // based on the plan
+    countRegionOnPrimaryRS(REGION_NUM);
+
+    // Verify all the region server are update with the latest favored nodes
+    verifyRegionServerUpdated();
+  }
+
+  /**
+   * Verify the number of user regions is assigned to the primary
+   * region server based on the plan is expected
+   * @param expectedNum.
+   * @throws IOException
+   */
+  private void countRegionOnPrimaryRS(int expectedNum)
+      throws IOException {
+    int lastRegionOnPrimaryRSCount = getNumRegionisOnPrimaryRS();
+    assertEquals("Only " +  expectedNum + " of user regions running " +
+        "on the primary region server", expectedNum ,
+        lastRegionOnPrimaryRSCount);
+  }
+
+  /**
+   * Verify all the online region servers has been updated to the
+   * latest assignment plan
+   * @param plan
+   * @throws IOException
+   */
+  private void verifyRegionServerUpdated() throws IOException {
+    // Verify all region servers contain the correct favored nodes information
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    for (int i = 0; i < SLAVES; i++) {
+      HRegionServer rs = cluster.getRegionServer(i);
+      for (HRegion region: rs.getOnlineRegions(Bytes.toBytes("testRegionAssignment"))) {
+        InetSocketAddress[] favoredSocketAddress = rs.getFavoredNodesForRegion(
+            region.getRegionInfo().getEncodedName());
+        ServerName[] favoredServerList = favoredNodesAssignmentPlan.get(region.getRegionInfo());
+
+        // All regions are supposed to have favored nodes,
+        // except for META and ROOT
+        if (favoredServerList == null) {
+          HTableDescriptor desc = region.getTableDesc();
+          // Verify they are ROOT and META regions since no favored nodes
+          assertNull(favoredSocketAddress);
+          assertTrue("User region " +
+              region.getTableDesc().getNameAsString() +
+              " should have favored nodes",
+              (desc.isRootRegion() || desc.isMetaRegion()));
+        } else {
+          // For user region, the favored nodes in the region server should be
+          // identical to favored nodes in the assignmentPlan
+          assertTrue(favoredSocketAddress.length == favoredServerList.length);
+          assertTrue(favoredServerList.length > 0);
+          for (int j = 0; j < favoredServerList.length; j++) {
+            InetSocketAddress addrFromRS = favoredSocketAddress[j];
+            InetSocketAddress addrFromPlan = InetSocketAddress.createUnresolved(
+                favoredServerList[j].getHostname(), favoredServerList[j].getPort());
+
+            assertNotNull(addrFromRS);
+            assertNotNull(addrFromPlan);
+            assertTrue("Region server " + rs.getServerName().getHostAndPort()
+                + " has the " + positions[j] +
+                " for region " + region.getRegionNameAsString() + " is " +
+                addrFromRS + " which is inconsistent with the plan "
+                + addrFromPlan, addrFromRS.equals(addrFromPlan));
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Check whether regions are assigned to servers consistent with the explicit
+   * hints that are persisted in the META table.
+   * Also keep track of the number of the regions are assigned to the
+   * primary region server.
+   * @return the number of regions are assigned to the primary region server
+   * @throws IOException
+   */
+  private int getNumRegionisOnPrimaryRS() throws IOException {
+    final AtomicInteger regionOnPrimaryNum = new AtomicInteger(0);
+    final AtomicInteger totalRegionNum = new AtomicInteger(0);
+    LOG.info("The start of region placement verification");
+    MetaScannerVisitor visitor = new MetaScannerVisitor() {
+      public boolean processRow(Result result) throws IOException {
+        try {
+          HRegionInfo info = MetaScanner.getHRegionInfo(result);
+          byte[] server = result.getValue(HConstants.CATALOG_FAMILY,
+              HConstants.SERVER_QUALIFIER);
+          byte[] startCode = result.getValue(HConstants.CATALOG_FAMILY,
+              HConstants.STARTCODE_QUALIFIER);
+          byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY,
+              FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER);
+          // Add the favored nodes into assignment plan
+          ServerName[] favoredServerList =
+              FavoredNodeAssignmentHelper.getFavoredNodesList(favoredNodes);
+          favoredNodesAssignmentPlan.put(info, favoredServerList);
+
+          Position[] positions = Position.values();
+          if (info != null) {
+            totalRegionNum.incrementAndGet();
+            if (server != null) {
+              ServerName serverName =
+                  new ServerName(Bytes.toString(server),Bytes.toLong(startCode));
+              if (favoredNodes != null) {
+                String placement = "[NOT FAVORED NODE]";
+                for (int i = 0; i < favoredServerList.length; i++) {
+                  if (favoredServerList[i].equals(serverName)) {
+                    placement = positions[i].toString();
+                    if (i == Position.PRIMARY.ordinal()) {
+                      regionOnPrimaryNum.incrementAndGet();
+                    }
+                    break;
+                  }
+                }
+                LOG.info(info.getRegionNameAsString() + " on " +
+                    serverName + " " + placement);
+              } else {
+                LOG.info(info.getRegionNameAsString() + " running on " +
+                    serverName + " but there is no favored region server");
+              }
+            } else {
+              LOG.info(info.getRegionNameAsString() +
+                  " not assigned to any server");
+            }
+          }
+          return true;
+        } catch (RuntimeException e) {
+          LOG.error("Result=" + result);
+          throw e;
+        }
+      }
+
+      @Override
+      public void close() throws IOException {}
+    };
+    MetaScanner.metaScan(TEST_UTIL.getConfiguration(), visitor);
+    LOG.info("There are " + regionOnPrimaryNum.intValue() + " out of " +
+        totalRegionNum.intValue() + " regions running on the primary" +
+        " region servers" );
+    return regionOnPrimaryNum.intValue() ;
+  }
+
+  /**
+   * Create a table with specified table name and region number.
+   * @param table
+   * @param regionNum
+   * @return
+   * @throws IOException
+   */
+  private static void createTable(String table, int regionNum)
+      throws IOException {
+    byte[] tableName = Bytes.toBytes(table);
+    int expectedRegions = regionNum;
+    byte[][] splitKeys = new byte[expectedRegions - 1][];
+    for (int i = 1; i < expectedRegions; i++) {
+      byte splitKey = (byte) i;
+      splitKeys[i - 1] = new byte[] { splitKey, splitKey, splitKey };
+    }
+
+    HTableDescriptor desc = new HTableDescriptor(tableName);
+    desc.addFamily(new HColumnDescriptor(HConstants.CATALOG_FAMILY));
+    admin.createTable(desc, splitKeys);
+
+    HTable ht = new HTable(TEST_UTIL.getConfiguration(), tableName);
+    Map<HRegionInfo, ServerName> regions = ht.getRegionLocations();
+    assertEquals("Tried to create " + expectedRegions + " regions "
+        + "but only found " + regions.size(), expectedRegions, regions.size());
+  }
+}

Added: hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java?rev=1481477&view=auto
==============================================================================
--- hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java (added)
+++ hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestFavoredNodeAssignmentHelper.java Sun May 12 06:55:38 2013
@@ -0,0 +1,311 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.master.balancer;
+
+import static org.junit.Assert.assertTrue;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.master.RackManager;
+import org.apache.hadoop.hbase.util.Triple;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.mockito.Mockito;
+
+@Category(MediumTests.class)
+public class TestFavoredNodeAssignmentHelper {
+
+  private static List<ServerName> servers = new ArrayList<ServerName>();
+  private static Map<String, List<ServerName>> rackToServers = new HashMap<String,
+      List<ServerName>>();
+  private static RackManager rackManager = Mockito.mock(RackManager.class);
+
+  @BeforeClass
+  public static void setupBeforeClass() throws Exception {
+    // Set up some server -> rack mappings
+    // Have three racks in the cluster with 10 hosts each.
+    for (int i = 0; i < 40; i++) {
+      ServerName server = new ServerName("foo"+i+":1234",-1);
+      if (i < 10) {
+        Mockito.when(rackManager.getRack(server)).thenReturn("rack1");
+        if (rackToServers.get("rack1") == null) {
+          List<ServerName> servers = new ArrayList<ServerName>();
+          rackToServers.put("rack1", servers);
+        }
+        rackToServers.get("rack1").add(server);
+      }
+      if (i >= 10 && i < 20) {
+        Mockito.when(rackManager.getRack(server)).thenReturn("rack2");
+        if (rackToServers.get("rack2") == null) {
+          List<ServerName> servers = new ArrayList<ServerName>();
+          rackToServers.put("rack2", servers);
+        }
+        rackToServers.get("rack2").add(server);
+      }
+      if (i >= 20 && i < 30) {
+        Mockito.when(rackManager.getRack(server)).thenReturn("rack3");
+        if (rackToServers.get("rack3") == null) {
+          List<ServerName> servers = new ArrayList<ServerName>();
+          rackToServers.put("rack3", servers);
+        }
+        rackToServers.get("rack3").add(server);
+      }
+      servers.add(server);
+    }
+  }
+
+  // The tests decide which racks to work with, and how many machines to
+  // work with from any given rack
+  // Return a rondom 'count' number of servers from 'rack'
+  private static List<ServerName> getServersFromRack(Map<String, Integer> rackToServerCount) {
+    List<ServerName> chosenServers = new ArrayList<ServerName>();
+    for (Map.Entry<String, Integer> entry : rackToServerCount.entrySet()) {
+      List<ServerName> servers = rackToServers.get(entry.getKey());
+      for (int i = 0; i < entry.getValue(); i++) {
+        chosenServers.add(servers.get(i));
+      }
+    }
+    return chosenServers;
+  }
+
+  @Test
+  public void testSmallCluster() {
+    // Test the case where we cannot assign favored nodes (because the number
+    // of nodes in the cluster is too less)
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 2);
+    List<ServerName> servers = getServersFromRack(rackToServerCount);
+    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers,
+        new Configuration());
+    assertTrue(helper.canPlaceFavoredNodes() == false);
+  }
+
+  @Test
+  public void testPlacePrimaryRSAsRoundRobin() {
+    // Test the regular case where there are many servers in different racks
+    // Test once for few regions and once for many regions
+    primaryRSPlacement(6, null);
+    // now create lots of regions and try to place them on the limited number of machines
+    primaryRSPlacement(600, null);
+  }
+
+  //@Test
+  public void testSecondaryAndTertiaryPlacementWithSingleRack() {
+    // Test the case where there is a single rack and we need to choose
+    // Primary/Secondary/Tertiary from a single rack.
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 10);
+    // have lots of regions to test with
+    Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+      primaryRSMapAndHelper = secondaryAndTertiaryRSPlacementHelper(60000, rackToServerCount);
+    FavoredNodeAssignmentHelper helper = primaryRSMapAndHelper.getSecond();
+    Map<HRegionInfo, ServerName> primaryRSMap = primaryRSMapAndHelper.getFirst();
+    List<HRegionInfo> regions = primaryRSMapAndHelper.getThird();
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
+        helper.placeSecondaryAndTertiaryRS(primaryRSMap);
+    // although we created lots of regions we should have no overlap on the
+    // primary/secondary/tertiary for any given region
+    for (HRegionInfo region : regions) {
+      ServerName[] secondaryAndTertiaryServers = secondaryAndTertiaryMap.get(region);
+      assertTrue(!secondaryAndTertiaryServers[0].equals(primaryRSMap.get(region)));
+      assertTrue(!secondaryAndTertiaryServers[1].equals(primaryRSMap.get(region)));
+      assertTrue(!secondaryAndTertiaryServers[0].equals(secondaryAndTertiaryServers[1]));
+    }
+  }
+
+  @Test
+  public void testSecondaryAndTertiaryPlacementWithSingleServer() {
+    // Test the case where we have a single node in the cluster. In this case
+    // the primary can be assigned but the secondary/tertiary would be null
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 1);
+    Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+      primaryRSMapAndHelper = secondaryAndTertiaryRSPlacementHelper(1, rackToServerCount);
+    FavoredNodeAssignmentHelper helper = primaryRSMapAndHelper.getSecond();
+    Map<HRegionInfo, ServerName> primaryRSMap = primaryRSMapAndHelper.getFirst();
+    List<HRegionInfo> regions = primaryRSMapAndHelper.getThird();
+
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
+        helper.placeSecondaryAndTertiaryRS(primaryRSMap);
+    // no secondary/tertiary placement in case of a single RegionServer
+    assertTrue(secondaryAndTertiaryMap.get(regions.get(0)) == null);
+  }
+
+  @Test
+  public void testSecondaryAndTertiaryPlacementWithMultipleRacks() {
+    // Test the case where we have multiple racks and the region servers
+    // belong to multiple racks
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 10);
+    rackToServerCount.put("rack2", 10);
+
+    Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+      primaryRSMapAndHelper = secondaryAndTertiaryRSPlacementHelper(60000, rackToServerCount);
+    FavoredNodeAssignmentHelper helper = primaryRSMapAndHelper.getSecond();
+    Map<HRegionInfo, ServerName> primaryRSMap = primaryRSMapAndHelper.getFirst();
+
+    assertTrue(primaryRSMap.size() == 60000);
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
+        helper.placeSecondaryAndTertiaryRS(primaryRSMap);
+    assertTrue(secondaryAndTertiaryMap.size() == 60000);
+    // for every region, the primary should be on one rack and the secondary/tertiary
+    // on another (we create a lot of regions just to increase probability of failure)
+    for (Map.Entry<HRegionInfo, ServerName[]> entry : secondaryAndTertiaryMap.entrySet()) {
+      ServerName[] allServersForRegion = entry.getValue();
+      String primaryRSRack = rackManager.getRack(primaryRSMap.get(entry.getKey()));
+      String secondaryRSRack = rackManager.getRack(allServersForRegion[0]);
+      String tertiaryRSRack = rackManager.getRack(allServersForRegion[1]);
+      assertTrue(!primaryRSRack.equals(secondaryRSRack));
+      assertTrue(secondaryRSRack.equals(tertiaryRSRack));
+    }
+  }
+
+  @Test
+  public void testSecondaryAndTertiaryPlacementWithLessThanTwoServersInRacks() {
+    // Test the case where we have two racks but with less than two servers in each
+    // We will not have enough machines to select secondary/tertiary
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 1);
+    rackToServerCount.put("rack2", 1);
+    Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+      primaryRSMapAndHelper = secondaryAndTertiaryRSPlacementHelper(6, rackToServerCount);
+    FavoredNodeAssignmentHelper helper = primaryRSMapAndHelper.getSecond();
+    Map<HRegionInfo, ServerName> primaryRSMap = primaryRSMapAndHelper.getFirst();
+    List<HRegionInfo> regions = primaryRSMapAndHelper.getThird();
+    assertTrue(primaryRSMap.size() == 6);
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
+          helper.placeSecondaryAndTertiaryRS(primaryRSMap);
+    for (HRegionInfo region : regions) {
+      // not enough secondary/tertiary room to place the regions
+      assertTrue(secondaryAndTertiaryMap.get(region) == null);
+    }
+  }
+
+  @Test
+  public void testSecondaryAndTertiaryPlacementWithMoreThanOneServerInPrimaryRack() {
+    // Test the case where there is only one server in one rack and another rack
+    // has more servers. We try to choose secondary/tertiary on different
+    // racks than what the primary is on. But if the other rack doesn't have
+    // enough nodes to have both secondary/tertiary RSs, the tertiary is placed
+    // on the same rack as the primary server is on
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 2);
+    rackToServerCount.put("rack2", 1);
+    Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+      primaryRSMapAndHelper = secondaryAndTertiaryRSPlacementHelper(6, rackToServerCount);
+    FavoredNodeAssignmentHelper helper = primaryRSMapAndHelper.getSecond();
+    Map<HRegionInfo, ServerName> primaryRSMap = primaryRSMapAndHelper.getFirst();
+    List<HRegionInfo> regions = primaryRSMapAndHelper.getThird();
+    assertTrue(primaryRSMap.size() == 6);
+    Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
+          helper.placeSecondaryAndTertiaryRS(primaryRSMap);
+    for (HRegionInfo region : regions) {
+      ServerName s = primaryRSMap.get(region);
+      ServerName secondaryRS = secondaryAndTertiaryMap.get(region)[0];
+      ServerName tertiaryRS = secondaryAndTertiaryMap.get(region)[1];
+      if (rackManager.getRack(s).equals("rack1")) {
+        assertTrue(rackManager.getRack(secondaryRS).equals("rack2") &&
+            rackManager.getRack(tertiaryRS).equals("rack1"));
+      }
+      if (rackManager.getRack(s).equals("rack2")) {
+        assertTrue(rackManager.getRack(secondaryRS).equals("rack1") &&
+            rackManager.getRack(tertiaryRS).equals("rack1"));
+      }
+    }
+  }
+
+  private Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+  secondaryAndTertiaryRSPlacementHelper(
+      int regionCount, Map<String, Integer> rackToServerCount) {
+    Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
+    List<ServerName> servers = getServersFromRack(rackToServerCount);
+    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers,
+        new Configuration());
+    helper = new FavoredNodeAssignmentHelper(servers, new Configuration());
+    Map<ServerName, List<HRegionInfo>> assignmentMap =
+        new HashMap<ServerName, List<HRegionInfo>>();
+    helper.setRackManager(rackManager);
+    helper.initialize();
+    // create regions
+    List<HRegionInfo> regions = new ArrayList<HRegionInfo>(regionCount);
+    for (int i = 0; i < regionCount; i++) {
+      HRegionInfo region = new HRegionInfo(("foobar"+i).getBytes());
+      regions.add(region);
+    }
+    // place the regions
+    helper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
+    return new Triple<Map<HRegionInfo, ServerName>, FavoredNodeAssignmentHelper, List<HRegionInfo>>
+                   (primaryRSMap, helper, regions);
+  }
+
+  private void primaryRSPlacement(int regionCount, Map<HRegionInfo, ServerName> primaryRSMap) {
+    Map<String,Integer> rackToServerCount = new HashMap<String,Integer>();
+    rackToServerCount.put("rack1", 10);
+    rackToServerCount.put("rack2", 10);
+    rackToServerCount.put("rack3", 10);
+    List<ServerName> servers = getServersFromRack(rackToServerCount);
+    FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers,
+        new Configuration());
+    helper.setRackManager(rackManager);
+    helper.initialize();
+
+    assertTrue(helper.canPlaceFavoredNodes());
+
+    Map<ServerName, List<HRegionInfo>> assignmentMap =
+        new HashMap<ServerName, List<HRegionInfo>>();
+    if (primaryRSMap == null) primaryRSMap = new HashMap<HRegionInfo, ServerName>();
+    // create some regions
+    List<HRegionInfo> regions = new ArrayList<HRegionInfo>(regionCount);
+    for (int i = 0; i < regionCount; i++) {
+      HRegionInfo region = new HRegionInfo(("foobar" + i).getBytes());
+      regions.add(region);
+    }
+    // place those regions in primary RSs
+    helper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
+
+    // we should have all the regions nicely spread across the racks
+    int regionsOnRack1 = 0;
+    int regionsOnRack2 = 0;
+    int regionsOnRack3 = 0;
+    for (Map.Entry<HRegionInfo, ServerName> entry : primaryRSMap.entrySet()) {
+      if (rackManager.getRack(entry.getValue()).equals("rack1")) {
+        regionsOnRack1++;
+      } else if (rackManager.getRack(entry.getValue()).equals("rack2")) {
+        regionsOnRack2++;
+      } else if (rackManager.getRack(entry.getValue()).equals("rack3")) {
+        regionsOnRack3++;
+      }
+    }
+    int numRegionsPerRack = (int)Math.ceil((double)regionCount/3); //since there are 3 servers
+    assertTrue(regionsOnRack1 == numRegionsPerRack && regionsOnRack2 == numRegionsPerRack
+        && regionsOnRack3 == numRegionsPerRack);
+    int numServersPerRack = (int)Math.ceil((double)regionCount/30); //since there are 30 servers
+    for (Map.Entry<ServerName, List<HRegionInfo>> entry : assignmentMap.entrySet()) {
+      assertTrue(entry.getValue().size() == numServersPerRack);
+    }
+  }
+}

Modified: hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java?rev=1481477&r1=1481476&r2=1481477&view=diff
==============================================================================
--- hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java (original)
+++ hbase/branches/0.95/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionServerNoMaster.java Sun May 12 06:55:38 2013
@@ -103,7 +103,7 @@ public class TestRegionServerNoMaster {
     // We reopen. We need a ZK node here, as a open is always triggered by a master.
     ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName());
     // first version is '0'
-    AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0);
+    AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
     AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
     Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
     Assert.assertTrue(responseOpen.getOpeningState(0).
@@ -220,7 +220,7 @@ public class TestRegionServerNoMaster {
 
     // We're sending multiple requests in a row. The region server must handle this nicely.
     for (int i = 0; i < 10; i++) {
-      AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0);
+      AdminProtos.OpenRegionRequest orr = RequestConverter.buildOpenRegionRequest(hri, 0, null);
       AdminProtos.OpenRegionResponse responseOpen = getRS().openRegion(null, orr);
       Assert.assertTrue(responseOpen.getOpeningStateCount() == 1);
 



Mime
View raw message