hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jxi...@apache.org
Subject git commit: HBASE-11689 Track meta in transition (Andrey Stepachev and Jimmy Xiang)
Date Fri, 29 Aug 2014 22:51:52 GMT
Repository: hbase
Updated Branches:
  refs/heads/master 2cd45eb9a -> b7f751476


HBASE-11689 Track meta in transition (Andrey Stepachev and Jimmy Xiang)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b7f75147
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b7f75147
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b7f75147

Branch: refs/heads/master
Commit: b7f7514762433a7a02635e141e60d0e2ba333049
Parents: 2cd45eb
Author: Jimmy Xiang <jxiang@cloudera.com>
Authored: Thu Aug 28 10:23:31 2014 -0700
Committer: Jimmy Xiang <jxiang@cloudera.com>
Committed: Fri Aug 29 13:23:35 2014 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/ServerName.java     |  22 +-
 .../apache/hadoop/hbase/master/RegionState.java | 224 +++++++++--------
 .../hbase/zookeeper/MetaTableLocator.java       | 150 ++++++-----
 .../apache/hadoop/hbase/zookeeper/ZKUtil.java   |  30 ---
 .../protobuf/generated/ZooKeeperProtos.java     | 246 +++++++++++++++----
 .../src/main/protobuf/ZooKeeper.proto           |   7 +-
 .../hadoop/hbase/master/AssignmentManager.java  |  34 +--
 .../org/apache/hadoop/hbase/master/HMaster.java |  57 ++---
 .../hadoop/hbase/master/RegionStateStore.java   |  40 ++-
 .../handler/MetaServerShutdownHandler.java      |   6 -
 .../hbase/regionserver/HRegionServer.java       |  14 +-
 .../hadoop/hbase/TestMetaTableLocator.java      |  73 ++++--
 .../master/TestAssignmentManagerOnCluster.java  |  23 +-
 .../hadoop/hbase/master/TestMasterFailover.java | 122 ++++++++-
 .../hbase/master/TestMasterNoCluster.java       |   5 +-
 15 files changed, 691 insertions(+), 362 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
index 48b4a79..dde9202 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerName.java
@@ -18,21 +18,21 @@
  */
 package org.apache.hadoop.hbase;
 
-import com.google.common.net.InetAddresses;
-import com.google.protobuf.InvalidProtocolBufferException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
-import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
+import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
 import org.apache.hadoop.hbase.util.Addressing;
 import org.apache.hadoop.hbase.util.Bytes;
 
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Pattern;
+import com.google.common.net.InetAddresses;
+import com.google.protobuf.InvalidProtocolBufferException;
 
 /**
  * Instance of an HBase ServerName.
@@ -54,6 +54,8 @@ import java.util.regex.Pattern;
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
 public class ServerName implements Comparable<ServerName>, Serializable {
+  private static final long serialVersionUID = 1367463982557264981L;
+
   /**
    * Version for this class.
    * Its a short rather than a byte so I can for sure distinguish between this
@@ -370,9 +372,9 @@ public class ServerName implements Comparable<ServerName>, Serializable {
     if (ProtobufUtil.isPBMagicPrefix(data)) {
       int prefixLen = ProtobufUtil.lengthOfPBMagic();
       try {
-        MetaRegionServer rss =
-          MetaRegionServer.PARSER.parseFrom(data, prefixLen, data.length - prefixLen);
-        org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName sn = rss.getServer();
+        ZooKeeperProtos.Master rss =
+          ZooKeeperProtos.Master.PARSER.parseFrom(data, prefixLen, data.length - prefixLen);
+        org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName sn = rss.getMaster();
         return valueOf(sn.getHostName(), sn.getPort(), sn.getStartCode());
       } catch (InvalidProtocolBufferException e) {
         // A failed parse of the znode is pretty catastrophic. Rather than loop

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
index d660db7..0a9c123 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/master/RegionState.java
@@ -51,9 +51,125 @@ public class RegionState {
     SPLITTING_NEW,  // new region to be created when RS splits a parent
                     // region but hasn't be created yet, or master doesn't
                     // know it's already created
-    MERGING_NEW     // new region to be created when RS merges two
+    MERGING_NEW;    // new region to be created when RS merges two
                     // daughter regions but hasn't be created yet, or
                     // master doesn't know it's already created
+
+    /**
+     * Convert to protobuf ClusterStatusProtos.RegionState.State
+     */
+    public ClusterStatusProtos.RegionState.State convert() {
+      ClusterStatusProtos.RegionState.State rs;
+      switch (this) {
+      case OFFLINE:
+        rs = ClusterStatusProtos.RegionState.State.OFFLINE;
+        break;
+      case PENDING_OPEN:
+        rs = ClusterStatusProtos.RegionState.State.PENDING_OPEN;
+        break;
+      case OPENING:
+        rs = ClusterStatusProtos.RegionState.State.OPENING;
+        break;
+      case OPEN:
+        rs = ClusterStatusProtos.RegionState.State.OPEN;
+        break;
+      case PENDING_CLOSE:
+        rs = ClusterStatusProtos.RegionState.State.PENDING_CLOSE;
+        break;
+      case CLOSING:
+        rs = ClusterStatusProtos.RegionState.State.CLOSING;
+        break;
+      case CLOSED:
+        rs = ClusterStatusProtos.RegionState.State.CLOSED;
+        break;
+      case SPLITTING:
+        rs = ClusterStatusProtos.RegionState.State.SPLITTING;
+        break;
+      case SPLIT:
+        rs = ClusterStatusProtos.RegionState.State.SPLIT;
+        break;
+      case FAILED_OPEN:
+        rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN;
+        break;
+      case FAILED_CLOSE:
+        rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE;
+        break;
+      case MERGING:
+        rs = ClusterStatusProtos.RegionState.State.MERGING;
+        break;
+      case MERGED:
+        rs = ClusterStatusProtos.RegionState.State.MERGED;
+        break;
+      case SPLITTING_NEW:
+        rs = ClusterStatusProtos.RegionState.State.SPLITTING_NEW;
+        break;
+      case MERGING_NEW:
+        rs = ClusterStatusProtos.RegionState.State.MERGING_NEW;
+        break;
+      default:
+        throw new IllegalStateException("");
+      }
+      return rs;
+    }
+
+    /**
+     * Convert a protobuf HBaseProtos.RegionState.State to a RegionState.State
+     *
+     * @return the RegionState.State
+     */
+    public static State convert(ClusterStatusProtos.RegionState.State protoState) {
+      State state;
+      switch (protoState) {
+      case OFFLINE:
+        state = OFFLINE;
+        break;
+      case PENDING_OPEN:
+        state = PENDING_OPEN;
+        break;
+      case OPENING:
+        state = OPENING;
+        break;
+      case OPEN:
+        state = OPEN;
+        break;
+      case PENDING_CLOSE:
+        state = PENDING_CLOSE;
+        break;
+      case CLOSING:
+        state = CLOSING;
+        break;
+      case CLOSED:
+        state = CLOSED;
+        break;
+      case SPLITTING:
+        state = SPLITTING;
+        break;
+      case SPLIT:
+        state = SPLIT;
+        break;
+      case FAILED_OPEN:
+        state = FAILED_OPEN;
+        break;
+      case FAILED_CLOSE:
+        state = FAILED_CLOSE;
+        break;
+      case MERGING:
+        state = MERGING;
+        break;
+      case MERGED:
+        state = MERGED;
+        break;
+      case SPLITTING_NEW:
+        state = SPLITTING_NEW;
+        break;
+      case MERGING_NEW:
+        state = MERGING_NEW;
+        break;
+      default:
+        throw new IllegalStateException("");
+      }
+      return state;
+    }
   }
 
   private final long stamp;
@@ -250,58 +366,8 @@ public class RegionState {
    */
   public ClusterStatusProtos.RegionState convert() {
     ClusterStatusProtos.RegionState.Builder regionState = ClusterStatusProtos.RegionState.newBuilder();
-    ClusterStatusProtos.RegionState.State rs;
-    switch (this.state) {
-    case OFFLINE:
-      rs = ClusterStatusProtos.RegionState.State.OFFLINE;
-      break;
-    case PENDING_OPEN:
-      rs = ClusterStatusProtos.RegionState.State.PENDING_OPEN;
-      break;
-    case OPENING:
-      rs = ClusterStatusProtos.RegionState.State.OPENING;
-      break;
-    case OPEN:
-      rs = ClusterStatusProtos.RegionState.State.OPEN;
-      break;
-    case PENDING_CLOSE:
-      rs = ClusterStatusProtos.RegionState.State.PENDING_CLOSE;
-      break;
-    case CLOSING:
-      rs = ClusterStatusProtos.RegionState.State.CLOSING;
-      break;
-    case CLOSED:
-      rs = ClusterStatusProtos.RegionState.State.CLOSED;
-      break;
-    case SPLITTING:
-      rs = ClusterStatusProtos.RegionState.State.SPLITTING;
-      break;
-    case SPLIT:
-      rs = ClusterStatusProtos.RegionState.State.SPLIT;
-      break;
-    case FAILED_OPEN:
-      rs = ClusterStatusProtos.RegionState.State.FAILED_OPEN;
-      break;
-    case FAILED_CLOSE:
-      rs = ClusterStatusProtos.RegionState.State.FAILED_CLOSE;
-      break;
-    case MERGING:
-      rs = ClusterStatusProtos.RegionState.State.MERGING;
-      break;
-    case MERGED:
-      rs = ClusterStatusProtos.RegionState.State.MERGED;
-      break;
-    case SPLITTING_NEW:
-      rs = ClusterStatusProtos.RegionState.State.SPLITTING_NEW;
-      break;
-    case MERGING_NEW:
-      rs = ClusterStatusProtos.RegionState.State.MERGING_NEW;
-      break;
-    default:
-      throw new IllegalStateException("");
-    }
     regionState.setRegionInfo(HRegionInfo.convert(hri));
-    regionState.setState(rs);
+    regionState.setState(state.convert());
     regionState.setStamp(getStamp());
     return regionState.build();
   }
@@ -312,58 +378,8 @@ public class RegionState {
    * @return the RegionState
    */
   public static RegionState convert(ClusterStatusProtos.RegionState proto) {
-    RegionState.State state;
-    switch (proto.getState()) {
-    case OFFLINE:
-      state = State.OFFLINE;
-      break;
-    case PENDING_OPEN:
-      state = State.PENDING_OPEN;
-      break;
-    case OPENING:
-      state = State.OPENING;
-      break;
-    case OPEN:
-      state = State.OPEN;
-      break;
-    case PENDING_CLOSE:
-      state = State.PENDING_CLOSE;
-      break;
-    case CLOSING:
-      state = State.CLOSING;
-      break;
-    case CLOSED:
-      state = State.CLOSED;
-      break;
-    case SPLITTING:
-      state = State.SPLITTING;
-      break;
-    case SPLIT:
-      state = State.SPLIT;
-      break;
-    case FAILED_OPEN:
-      state = State.FAILED_OPEN;
-      break;
-    case FAILED_CLOSE:
-      state = State.FAILED_CLOSE;
-      break;
-    case MERGING:
-      state = State.MERGING;
-      break;
-    case MERGED:
-      state = State.MERGED;
-      break;
-    case SPLITTING_NEW:
-      state = State.SPLITTING_NEW;
-      break;
-    case MERGING_NEW:
-      state = State.MERGING_NEW;
-      break;
-    default:
-      throw new IllegalStateException("");
-    }
-
-    return new RegionState(HRegionInfo.convert(proto.getRegionInfo()),state,proto.getStamp(),null);
+    return new RegionState(HRegionInfo.convert(proto.getRegionInfo()),
+      State.convert(proto.getState()), proto.getStamp(), null);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
index f1d17c2..f0c1d87 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java
@@ -17,36 +17,42 @@
  */
 package org.apache.hadoop.hbase.zookeeper;
 
-import com.google.common.base.Stopwatch;
+import java.io.EOFException;
+import java.io.IOException;
+import java.net.ConnectException;
+import java.net.NoRouteToHostException;
+import java.net.SocketException;
+import java.net.SocketTimeoutException;
+import java.rmi.UnknownHostException;
+
+import javax.annotation.Nullable;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.RetriesExhaustedException;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
-import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.ipc.RpcClient;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
+import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
+import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.MetaRegionServer;
 import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.zookeeper.KeeperException;
 
-import java.io.EOFException;
-import java.io.IOException;
-import java.net.ConnectException;
-import java.net.NoRouteToHostException;
-import java.net.SocketException;
-import java.net.SocketTimeoutException;
-import java.rmi.UnknownHostException;
+import com.google.common.base.Stopwatch;
+import com.google.protobuf.InvalidProtocolBufferException;
 
 /**
  * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper
@@ -78,15 +84,7 @@ public class MetaTableLocator {
    * @return true if meta region location is available, false if not
    */
   public boolean isLocationAvailable(ZooKeeperWatcher zkw) {
-    try {
-      return ZKUtil.getData(zkw, zkw.metaServerZNode) != null;
-    } catch(KeeperException e) {
-      LOG.error("ZK error trying to get hbase:meta from ZooKeeper");
-      return false;
-    } catch (InterruptedException e) {
-      LOG.error("ZK error trying to get hbase:meta from ZooKeeper");
-      return false;
-    }
+    return getMetaRegionLocation(zkw) != null;
   }
 
   /**
@@ -94,18 +92,13 @@ public class MetaTableLocator {
    * @param zkw zookeeper connection to use
    * @return server name or null if we failed to get the data.
    */
+  @Nullable
   public ServerName getMetaRegionLocation(final ZooKeeperWatcher zkw) {
     try {
-      try {
-        return ServerName.parseFrom(ZKUtil.getData(zkw, zkw.metaServerZNode));
-      } catch (DeserializationException e) {
-        throw ZKUtil.convert(e);
-      } catch (InterruptedException e) {
-        Thread.currentThread().interrupt();
-        return null;
-      }
+      RegionState state = getMetaRegionState(zkw);
+      return state.isOpened() ? state.getServerName() : null;
     } catch (KeeperException ke) {
-        return null;
+      return null;
     }
   }
 
@@ -189,9 +182,8 @@ public class MetaTableLocator {
     } catch (RegionServerStoppedException e) {
       // Pass -- server name sends us to a server that is dying or already dead.
     }
-    return (service == null)? false:
-      verifyRegionLocation(service,
-          getMetaRegionLocation(zkw), META_REGION_NAME);
+    return (service != null) && verifyRegionLocation(service,
+            getMetaRegionLocation(zkw), META_REGION_NAME);
   }
 
   /**
@@ -315,44 +307,65 @@ public class MetaTableLocator {
    * Sets the location of <code>hbase:meta</code> in ZooKeeper to the
    * specified server address.
    * @param zookeeper zookeeper reference
-   * @param location The server hosting <code>hbase:meta</code>
+   * @param serverName The server hosting <code>hbase:meta</code>
+   * @param state The region transition state
    * @throws KeeperException unexpected zookeeper exception
    */
   public static void setMetaLocation(ZooKeeperWatcher zookeeper,
-                                     final ServerName location)
-  throws KeeperException {
-    LOG.info("Setting hbase:meta region location in ZooKeeper as " + location);
+      ServerName serverName, RegionState.State state) throws KeeperException {
+    LOG.info("Setting hbase:meta region location in ZooKeeper as " + serverName);
     // Make the MetaRegionServer pb and then get its bytes and save this as
     // the znode content.
-    byte [] data = toByteArray(location);
+    MetaRegionServer pbrsr = MetaRegionServer.newBuilder()
+      .setServer(ProtobufUtil.toServerName(serverName))
+      .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
+      .setState(state.convert()).build();
+    byte[] data = ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
     try {
-      ZKUtil.createAndWatch(zookeeper, zookeeper.metaServerZNode, data);
-    } catch(KeeperException.NodeExistsException nee) {
-      LOG.debug("META region location already existed, updated location");
       ZKUtil.setData(zookeeper, zookeeper.metaServerZNode, data);
+    } catch(KeeperException.NoNodeException nne) {
+      LOG.debug("META region location doesn't existed, create it");
+      ZKUtil.createAndWatch(zookeeper, zookeeper.metaServerZNode, data);
     }
   }
 
   /**
-   * Build up the znode content.
-   * @param sn What to put into the znode.
-   * @return The content of the meta-region-server znode
+   * Load the meta region state from the meta server ZNode.
    */
-  private static byte [] toByteArray(final ServerName sn) {
-    // ZNode content is a pb message preceded by some pb magic.
-    HBaseProtos.ServerName pbsn =
-      HBaseProtos.ServerName.newBuilder()
-                            .setHostName(sn.getHostname())
-                            .setPort(sn.getPort())
-                            .setStartCode(sn.getStartcode())
-                            .build();
-
-    ZooKeeperProtos.MetaRegionServer pbrsr =
-      ZooKeeperProtos.MetaRegionServer.newBuilder()
-                                      .setServer(pbsn)
-                                      .setRpcVersion(HConstants.RPC_CURRENT_VERSION)
-                                      .build();
-    return ProtobufUtil.prependPBMagic(pbrsr.toByteArray());
+  public static RegionState getMetaRegionState(ZooKeeperWatcher zkw) throws KeeperException {
+    RegionState.State state = RegionState.State.OPEN;
+    ServerName serverName = null;
+    try {
+      byte[] data = ZKUtil.getData(zkw, zkw.metaServerZNode);
+      if (data != null && data.length > 0 && ProtobufUtil.isPBMagicPrefix(data)) {
+        try {
+          int prefixLen = ProtobufUtil.lengthOfPBMagic();
+          ZooKeeperProtos.MetaRegionServer rl =
+            ZooKeeperProtos.MetaRegionServer.PARSER.parseFrom
+              (data, prefixLen, data.length - prefixLen);
+          if (rl.hasState()) {
+            state = RegionState.State.convert(rl.getState());
+          }
+          HBaseProtos.ServerName sn = rl.getServer();
+          serverName = ServerName.valueOf(
+            sn.getHostName(), sn.getPort(), sn.getStartCode());
+        } catch (InvalidProtocolBufferException e) {
+          throw new DeserializationException("Unable to parse meta region location");
+        }
+      } else {
+        // old style of meta region location?
+        serverName = ServerName.parseFrom(data);
+      }
+    } catch (DeserializationException e) {
+      throw ZKUtil.convert(e);
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+    }
+    if (serverName == null) {
+      state = RegionState.State.OFFLINE;
+    }
+    return new RegionState(HRegionInfo.FIRST_META_REGIONINFO,
+      state, serverName);
   }
 
   /**
@@ -362,7 +375,7 @@ public class MetaTableLocator {
    */
   public void deleteMetaLocation(ZooKeeperWatcher zookeeper)
   throws KeeperException {
-    LOG.info("Unsetting hbase:meta region location in ZooKeeper");
+    LOG.info("Deleting hbase:meta region location in ZooKeeper");
     try {
       // Just delete the node.  Don't need any watches.
       ZKUtil.deleteNode(zookeeper, zookeeper.metaServerZNode);
@@ -372,7 +385,7 @@ public class MetaTableLocator {
   }
 
   /**
-   * Wait until the meta region is available.
+   * Wait until the meta region is available and is not in transition.
    * @param zkw zookeeper connection to use
    * @param timeout maximum time to wait, in millis
    * @return ServerName or null if we timed out.
@@ -381,14 +394,23 @@ public class MetaTableLocator {
   public ServerName blockUntilAvailable(final ZooKeeperWatcher zkw,
       final long timeout)
   throws InterruptedException {
-    byte [] data = ZKUtil.blockUntilAvailable(zkw, zkw.metaServerZNode, timeout);
-    if (data == null) return null;
+    if (timeout < 0) throw new IllegalArgumentException();
+    if (zkw == null) throw new IllegalArgumentException();
+    Stopwatch sw = new Stopwatch().start();
+    ServerName sn = null;
     try {
-      return ServerName.parseFrom(data);
-    } catch (DeserializationException e) {
-      LOG.warn("Failed parse", e);
-      return null;
+      while (true) {
+        sn = getMetaRegionLocation(zkw);
+        if (sn != null || sw.elapsedMillis()
+            > timeout - HConstants.SOCKET_RETRY_WAIT_MS) {
+          break;
+        }
+        Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
+      }
+    } finally {
+      sw.stop();
     }
+    return sn;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java
index b13667f..79fa4ba 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java
@@ -1831,36 +1831,6 @@ public class ZKUtil {
     }
   }
 
-
-  public static byte[] blockUntilAvailable(
-    final ZooKeeperWatcher zkw, final String znode, final long timeout)
-    throws InterruptedException {
-    if (timeout < 0) throw new IllegalArgumentException();
-    if (zkw == null) throw new IllegalArgumentException();
-    if (znode == null) throw new IllegalArgumentException();
-
-    byte[] data = null;
-    boolean finished = false;
-    final long endTime = System.currentTimeMillis() + timeout;
-    while (!finished) {
-      try {
-        data = ZKUtil.getData(zkw, znode);
-      } catch(KeeperException e) {
-        LOG.warn("Unexpected exception handling blockUntilAvailable", e);
-      }
-
-      if (data == null && (System.currentTimeMillis() +
-        HConstants.SOCKET_RETRY_WAIT_MS < endTime)) {
-        Thread.sleep(HConstants.SOCKET_RETRY_WAIT_MS);
-      } else {
-        finished = true;
-      }
-    }
-
-    return data;
-  }
-
-
   /**
    * Convert a {@link DeserializationException} to a more palatable {@link KeeperException}.
    * Used when can't let a {@link DeserializationException} out w/o changing public API.

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java
index 10274b4..6da497e 100644
--- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java
+++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java
@@ -16,7 +16,8 @@ public final class ZooKeeperProtos {
      * <code>required .ServerName server = 1;</code>
      *
      * <pre>
-     * The ServerName hosting the meta region currently.
+     * The ServerName hosting the meta region currently, or destination server,
+     * if meta region is in transition.
      * </pre>
      */
     boolean hasServer();
@@ -24,7 +25,8 @@ public final class ZooKeeperProtos {
      * <code>required .ServerName server = 1;</code>
      *
      * <pre>
-     * The ServerName hosting the meta region currently.
+     * The ServerName hosting the meta region currently, or destination server,
+     * if meta region is in transition.
      * </pre>
      */
     org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServer();
@@ -32,7 +34,8 @@ public final class ZooKeeperProtos {
      * <code>required .ServerName server = 1;</code>
      *
      * <pre>
-     * The ServerName hosting the meta region currently.
+     * The ServerName hosting the meta region currently, or destination server,
+     * if meta region is in transition.
      * </pre>
      */
     org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerOrBuilder();
@@ -58,6 +61,24 @@ public final class ZooKeeperProtos {
      * </pre>
      */
     int getRpcVersion();
+
+    // optional .RegionState.State state = 3;
+    /**
+     * <code>optional .RegionState.State state = 3;</code>
+     *
+     * <pre>
+     * State of the region transition. OPEN means fully operational 'hbase:meta'
+     * </pre>
+     */
+    boolean hasState();
+    /**
+     * <code>optional .RegionState.State state = 3;</code>
+     *
+     * <pre>
+     * State of the region transition. OPEN means fully operational 'hbase:meta'
+     * </pre>
+     */
+    org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State getState();
   }
   /**
    * Protobuf type {@code MetaRegionServer}
@@ -133,6 +154,17 @@ public final class ZooKeeperProtos {
               rpcVersion_ = input.readUInt32();
               break;
             }
+            case 24: {
+              int rawValue = input.readEnum();
+              org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State value = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.valueOf(rawValue);
+              if (value == null) {
+                unknownFields.mergeVarintField(3, rawValue);
+              } else {
+                bitField0_ |= 0x00000004;
+                state_ = value;
+              }
+              break;
+            }
           }
         }
       } catch (com.google.protobuf.InvalidProtocolBufferException e) {
@@ -180,7 +212,8 @@ public final class ZooKeeperProtos {
      * <code>required .ServerName server = 1;</code>
      *
      * <pre>
-     * The ServerName hosting the meta region currently.
+     * The ServerName hosting the meta region currently, or destination server,
+     * if meta region is in transition.
      * </pre>
      */
     public boolean hasServer() {
@@ -190,7 +223,8 @@ public final class ZooKeeperProtos {
      * <code>required .ServerName server = 1;</code>
      *
      * <pre>
-     * The ServerName hosting the meta region currently.
+     * The ServerName hosting the meta region currently, or destination server,
+     * if meta region is in transition.
      * </pre>
      */
     public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServer() {
@@ -200,7 +234,8 @@ public final class ZooKeeperProtos {
      * <code>required .ServerName server = 1;</code>
      *
      * <pre>
-     * The ServerName hosting the meta region currently.
+     * The ServerName hosting the meta region currently, or destination server,
+     * if meta region is in transition.
      * </pre>
      */
     public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerOrBuilder() {
@@ -235,9 +270,34 @@ public final class ZooKeeperProtos {
       return rpcVersion_;
     }
 
+    // optional .RegionState.State state = 3;
+    public static final int STATE_FIELD_NUMBER = 3;
+    private org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State state_;
+    /**
+     * <code>optional .RegionState.State state = 3;</code>
+     *
+     * <pre>
+     * State of the region transition. OPEN means fully operational 'hbase:meta'
+     * </pre>
+     */
+    public boolean hasState() {
+      return ((bitField0_ & 0x00000004) == 0x00000004);
+    }
+    /**
+     * <code>optional .RegionState.State state = 3;</code>
+     *
+     * <pre>
+     * State of the region transition. OPEN means fully operational 'hbase:meta'
+     * </pre>
+     */
+    public org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State getState() {
+      return state_;
+    }
+
     private void initFields() {
       server_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance();
       rpcVersion_ = 0;
+      state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE;
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -265,6 +325,9 @@ public final class ZooKeeperProtos {
       if (((bitField0_ & 0x00000002) == 0x00000002)) {
         output.writeUInt32(2, rpcVersion_);
       }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        output.writeEnum(3, state_.getNumber());
+      }
       getUnknownFields().writeTo(output);
     }
 
@@ -282,6 +345,10 @@ public final class ZooKeeperProtos {
         size += com.google.protobuf.CodedOutputStream
           .computeUInt32Size(2, rpcVersion_);
       }
+      if (((bitField0_ & 0x00000004) == 0x00000004)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeEnumSize(3, state_.getNumber());
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -315,6 +382,11 @@ public final class ZooKeeperProtos {
         result = result && (getRpcVersion()
             == other.getRpcVersion());
       }
+      result = result && (hasState() == other.hasState());
+      if (hasState()) {
+        result = result &&
+            (getState() == other.getState());
+      }
       result = result &&
           getUnknownFields().equals(other.getUnknownFields());
       return result;
@@ -336,6 +408,10 @@ public final class ZooKeeperProtos {
         hash = (37 * hash) + RPC_VERSION_FIELD_NUMBER;
         hash = (53 * hash) + getRpcVersion();
       }
+      if (hasState()) {
+        hash = (37 * hash) + STATE_FIELD_NUMBER;
+        hash = (53 * hash) + hashEnum(getState());
+      }
       hash = (29 * hash) + getUnknownFields().hashCode();
       memoizedHashCode = hash;
       return hash;
@@ -459,6 +535,8 @@ public final class ZooKeeperProtos {
         bitField0_ = (bitField0_ & ~0x00000001);
         rpcVersion_ = 0;
         bitField0_ = (bitField0_ & ~0x00000002);
+        state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE;
+        bitField0_ = (bitField0_ & ~0x00000004);
         return this;
       }
 
@@ -499,6 +577,10 @@ public final class ZooKeeperProtos {
           to_bitField0_ |= 0x00000002;
         }
         result.rpcVersion_ = rpcVersion_;
+        if (((from_bitField0_ & 0x00000004) == 0x00000004)) {
+          to_bitField0_ |= 0x00000004;
+        }
+        result.state_ = state_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -521,6 +603,9 @@ public final class ZooKeeperProtos {
         if (other.hasRpcVersion()) {
           setRpcVersion(other.getRpcVersion());
         }
+        if (other.hasState()) {
+          setState(other.getState());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -564,7 +649,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public boolean hasServer() {
@@ -574,7 +660,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServer() {
@@ -588,7 +675,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public Builder setServer(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName value) {
@@ -608,7 +696,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public Builder setServer(
@@ -626,7 +715,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public Builder mergeServer(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName value) {
@@ -649,7 +739,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public Builder clearServer() {
@@ -666,7 +757,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder getServerBuilder() {
@@ -678,7 +770,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerOrBuilder() {
@@ -692,7 +785,8 @@ public final class ZooKeeperProtos {
        * <code>required .ServerName server = 1;</code>
        *
        * <pre>
-       * The ServerName hosting the meta region currently.
+       * The ServerName hosting the meta region currently, or destination server,
+       * if meta region is in transition.
        * </pre>
        */
       private com.google.protobuf.SingleFieldBuilder<
@@ -766,6 +860,58 @@ public final class ZooKeeperProtos {
         return this;
       }
 
+      // optional .RegionState.State state = 3;
+      private org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE;
+      /**
+       * <code>optional .RegionState.State state = 3;</code>
+       *
+       * <pre>
+       * State of the region transition. OPEN means fully operational 'hbase:meta'
+       * </pre>
+       */
+      public boolean hasState() {
+        return ((bitField0_ & 0x00000004) == 0x00000004);
+      }
+      /**
+       * <code>optional .RegionState.State state = 3;</code>
+       *
+       * <pre>
+       * State of the region transition. OPEN means fully operational 'hbase:meta'
+       * </pre>
+       */
+      public org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State getState() {
+        return state_;
+      }
+      /**
+       * <code>optional .RegionState.State state = 3;</code>
+       *
+       * <pre>
+       * State of the region transition. OPEN means fully operational 'hbase:meta'
+       * </pre>
+       */
+      public Builder setState(org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State value) {
+        if (value == null) {
+          throw new NullPointerException();
+        }
+        bitField0_ |= 0x00000004;
+        state_ = value;
+        onChanged();
+        return this;
+      }
+      /**
+       * <code>optional .RegionState.State state = 3;</code>
+       *
+       * <pre>
+       * State of the region transition. OPEN means fully operational 'hbase:meta'
+       * </pre>
+       */
+      public Builder clearState() {
+        bitField0_ = (bitField0_ & ~0x00000004);
+        state_ = org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionState.State.OFFLINE;
+        onChanged();
+        return this;
+      }
+
       // @@protoc_insertion_point(builder_scope:MetaRegionServer)
     }
 
@@ -9414,39 +9560,40 @@ public final class ZooKeeperProtos {
       descriptor;
   static {
     java.lang.String[] descriptorData = {
-      "\n\017ZooKeeper.proto\032\013HBase.proto\"D\n\020MetaRe" +
-      "gionServer\022\033\n\006server\030\001 \002(\0132\013.ServerName\022" +
-      "\023\n\013rpc_version\030\002 \001(\r\":\n\006Master\022\033\n\006master" +
-      "\030\001 \002(\0132\013.ServerName\022\023\n\013rpc_version\030\002 \001(\r" +
-      "\"\037\n\tClusterUp\022\022\n\nstart_date\030\001 \002(\t\"\214\002\n\014Sp" +
-      "litLogTask\022\"\n\005state\030\001 \002(\0162\023.SplitLogTask" +
-      ".State\022 \n\013server_name\030\002 \002(\0132\013.ServerName" +
-      "\0221\n\004mode\030\003 \001(\0162\032.SplitLogTask.RecoveryMo" +
-      "de:\007UNKNOWN\"C\n\005State\022\016\n\nUNASSIGNED\020\000\022\t\n\005" +
-      "OWNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n\004DONE\020\003\022\007\n\003ERR\020\004",
-      "\">\n\014RecoveryMode\022\013\n\007UNKNOWN\020\000\022\021\n\rLOG_SPL" +
-      "ITTING\020\001\022\016\n\nLOG_REPLAY\020\002\"n\n\005Table\022$\n\005sta" +
-      "te\030\001 \002(\0162\014.Table.State:\007ENABLED\"?\n\005State" +
-      "\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\022\r\n\tDISABLING" +
-      "\020\002\022\014\n\010ENABLING\020\003\"\215\001\n\017ReplicationPeer\022\022\n\n" +
-      "clusterkey\030\001 \002(\t\022\037\n\027replicationEndpointI" +
-      "mpl\030\002 \001(\t\022\035\n\004data\030\003 \003(\0132\017.BytesBytesPair" +
-      "\022&\n\rconfiguration\030\004 \003(\0132\017.NameStringPair" +
-      "\"^\n\020ReplicationState\022&\n\005state\030\001 \002(\0162\027.Re" +
-      "plicationState.State\"\"\n\005State\022\013\n\007ENABLED",
-      "\020\000\022\014\n\010DISABLED\020\001\"+\n\027ReplicationHLogPosit" +
-      "ion\022\020\n\010position\030\001 \002(\003\"%\n\017ReplicationLock" +
-      "\022\022\n\nlock_owner\030\001 \002(\t\"\230\001\n\tTableLock\022\036\n\nta" +
-      "ble_name\030\001 \001(\0132\n.TableName\022\037\n\nlock_owner" +
-      "\030\002 \001(\0132\013.ServerName\022\021\n\tthread_id\030\003 \001(\003\022\021" +
-      "\n\tis_shared\030\004 \001(\010\022\017\n\007purpose\030\005 \001(\t\022\023\n\013cr" +
-      "eate_time\030\006 \001(\003\";\n\017StoreSequenceId\022\023\n\013fa" +
-      "mily_name\030\001 \002(\014\022\023\n\013sequence_id\030\002 \002(\004\"g\n\026" +
-      "RegionStoreSequenceIds\022 \n\030last_flushed_s" +
-      "equence_id\030\001 \002(\004\022+\n\021store_sequence_id\030\002 ",
-      "\003(\0132\020.StoreSequenceIdBE\n*org.apache.hado" +
-      "op.hbase.protobuf.generatedB\017ZooKeeperPr" +
-      "otosH\001\210\001\001\240\001\001"
+      "\n\017ZooKeeper.proto\032\013HBase.proto\032\023ClusterS" +
+      "tatus.proto\"g\n\020MetaRegionServer\022\033\n\006serve" +
+      "r\030\001 \002(\0132\013.ServerName\022\023\n\013rpc_version\030\002 \001(" +
+      "\r\022!\n\005state\030\003 \001(\0162\022.RegionState.State\":\n\006" +
+      "Master\022\033\n\006master\030\001 \002(\0132\013.ServerName\022\023\n\013r" +
+      "pc_version\030\002 \001(\r\"\037\n\tClusterUp\022\022\n\nstart_d" +
+      "ate\030\001 \002(\t\"\214\002\n\014SplitLogTask\022\"\n\005state\030\001 \002(" +
+      "\0162\023.SplitLogTask.State\022 \n\013server_name\030\002 " +
+      "\002(\0132\013.ServerName\0221\n\004mode\030\003 \001(\0162\032.SplitLo" +
+      "gTask.RecoveryMode:\007UNKNOWN\"C\n\005State\022\016\n\n",
+      "UNASSIGNED\020\000\022\t\n\005OWNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n" +
+      "\004DONE\020\003\022\007\n\003ERR\020\004\">\n\014RecoveryMode\022\013\n\007UNKN" +
+      "OWN\020\000\022\021\n\rLOG_SPLITTING\020\001\022\016\n\nLOG_REPLAY\020\002" +
+      "\"n\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007" +
+      "ENABLED\"?\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLE" +
+      "D\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\"\215\001\n\017Rep" +
+      "licationPeer\022\022\n\nclusterkey\030\001 \002(\t\022\037\n\027repl" +
+      "icationEndpointImpl\030\002 \001(\t\022\035\n\004data\030\003 \003(\0132" +
+      "\017.BytesBytesPair\022&\n\rconfiguration\030\004 \003(\0132" +
+      "\017.NameStringPair\"^\n\020ReplicationState\022&\n\005",
+      "state\030\001 \002(\0162\027.ReplicationState.State\"\"\n\005" +
+      "State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\"+\n\027Repl" +
+      "icationHLogPosition\022\020\n\010position\030\001 \002(\003\"%\n" +
+      "\017ReplicationLock\022\022\n\nlock_owner\030\001 \002(\t\"\230\001\n" +
+      "\tTableLock\022\036\n\ntable_name\030\001 \001(\0132\n.TableNa" +
+      "me\022\037\n\nlock_owner\030\002 \001(\0132\013.ServerName\022\021\n\tt" +
+      "hread_id\030\003 \001(\003\022\021\n\tis_shared\030\004 \001(\010\022\017\n\007pur" +
+      "pose\030\005 \001(\t\022\023\n\013create_time\030\006 \001(\003\";\n\017Store" +
+      "SequenceId\022\023\n\013family_name\030\001 \002(\014\022\023\n\013seque" +
+      "nce_id\030\002 \002(\004\"g\n\026RegionStoreSequenceIds\022 ",
+      "\n\030last_flushed_sequence_id\030\001 \002(\004\022+\n\021stor" +
+      "e_sequence_id\030\002 \003(\0132\020.StoreSequenceIdBE\n" +
+      "*org.apache.hadoop.hbase.protobuf.genera" +
+      "tedB\017ZooKeeperProtosH\001\210\001\001\240\001\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -9458,7 +9605,7 @@ public final class ZooKeeperProtos {
           internal_static_MetaRegionServer_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_MetaRegionServer_descriptor,
-              new java.lang.String[] { "Server", "RpcVersion", });
+              new java.lang.String[] { "Server", "RpcVersion", "State", });
           internal_static_Master_descriptor =
             getDescriptor().getMessageTypes().get(1);
           internal_static_Master_fieldAccessorTable = new
@@ -9532,6 +9679,7 @@ public final class ZooKeeperProtos {
       .internalBuildGeneratedFileFrom(descriptorData,
         new com.google.protobuf.Descriptors.FileDescriptor[] {
           org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.getDescriptor(),
+          org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.getDescriptor(),
         }, assigner);
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-protocol/src/main/protobuf/ZooKeeper.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/protobuf/ZooKeeper.proto b/hbase-protocol/src/main/protobuf/ZooKeeper.proto
index 4d727c6..8acd778 100644
--- a/hbase-protocol/src/main/protobuf/ZooKeeper.proto
+++ b/hbase-protocol/src/main/protobuf/ZooKeeper.proto
@@ -26,17 +26,22 @@ option java_generate_equals_and_hash = true;
 option optimize_for = SPEED;
 
 import "HBase.proto";
+import "ClusterStatus.proto";
 
 /**
  * Content of the meta-region-server znode.
  */
 message MetaRegionServer {
-  // The ServerName hosting the meta region currently.
+  // The ServerName hosting the meta region currently, or destination server,
+  // if meta region is in transition.
   required ServerName server = 1;
   // The major version of the rpc the server speaks.  This is used so that
   // clients connecting to the cluster can have prior knowledge of what version
   // to send to a RegionServer.  AsyncHBase will use this to detect versions.
   optional uint32 rpc_version = 2;
+
+  // State of the region transition. OPEN means fully operational 'hbase:meta'
+  optional RegionState.State state = 3;
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 53f159a..feedfef 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -524,7 +524,14 @@ public class AssignmentManager {
     regionStateStore.start();
 
     if (failover) {
-      processDeadServers(deadServers);
+      if (deadServers != null && !deadServers.isEmpty()) {
+        for (ServerName serverName: deadServers) {
+          if (!serverManager.isServerDead(serverName)) {
+            serverManager.expireServer(serverName); // Let SSH do region re-assign
+          }
+        }
+      }
+      processRegionsInTransition(regionStates.getRegionsInTransition().values());
     }
 
     // Now we can safely claim failover cleanup completed and enable
@@ -1399,13 +1406,9 @@ public class AssignmentManager {
    * <p>
    * Assumes that hbase:meta is currently closed and is not being actively served by
    * any RegionServer.
-   * <p>
-   * Forcibly unsets the current meta region location in ZooKeeper and assigns
-   * hbase:meta to a random RegionServer.
-   * @throws KeeperException
    */
   public void assignMeta() throws KeeperException {
-    this.server.getMetaTableLocator().deleteMetaLocation(this.server.getZooKeeper());
+    regionStates.updateRegionState(HRegionInfo.FIRST_META_REGIONINFO, State.OFFLINE);
     assign(HRegionInfo.FIRST_META_REGIONINFO);
   }
 
@@ -1709,28 +1712,15 @@ public class AssignmentManager {
   }
 
   /**
-   * Processes list of dead servers from result of hbase:meta scan and regions in RIT
-   *
-   * @param deadServers
-   *          The list of dead servers which failed while there was no active
-   *          master. Can be null.
+   * Processes list of regions in transition at startup
    */
-  private void processDeadServers(Set<ServerName> deadServers) {
-    if (deadServers != null && !deadServers.isEmpty()) {
-      for (ServerName serverName: deadServers) {
-        if (!serverManager.isServerDead(serverName)) {
-          serverManager.expireServer(serverName); // Let SSH do region re-assign
-        }
-      }
-    }
-
+  void processRegionsInTransition(Collection<RegionState> regionStates) {
     // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions
     // in case the RPC call is not sent out yet before the master was shut down
     // since we update the state before we send the RPC call. We can't update
     // the state after the RPC call. Otherwise, we don't know what's happened
     // to the region if the master dies right after the RPC call is out.
-    Map<String, RegionState> rits = regionStates.getRegionsInTransition();
-    for (RegionState regionState: rits.values()) {
+    for (RegionState regionState: regionStates) {
       if (!serverManager.isServerOnline(regionState.getServerName())) {
         continue; // SSH will handle it
       }

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index add3b1f..5024313 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -26,6 +26,7 @@ import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
@@ -46,6 +47,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.ClusterStatus;
 import org.apache.hadoop.hbase.CoordinatedStateException;
+import org.apache.hadoop.hbase.CoordinatedStateManager;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseIOException;
 import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -53,6 +55,7 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.MasterNotRunningException;
+import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
 import org.apache.hadoop.hbase.NamespaceNotFoundException;
 import org.apache.hadoop.hbase.PleaseHoldException;
@@ -64,12 +67,10 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableNotDisabledException;
 import org.apache.hadoop.hbase.TableNotFoundException;
 import org.apache.hadoop.hbase.UnknownRegionException;
-import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.client.MetaScanner;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
 import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
 import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.CoordinatedStateManager;
 import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
 import org.apache.hadoop.hbase.exceptions.DeserializationException;
 import org.apache.hadoop.hbase.executor.ExecutorType;
@@ -77,7 +78,6 @@ import org.apache.hadoop.hbase.ipc.RequestContext;
 import org.apache.hadoop.hbase.ipc.RpcServer;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
-import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.balancer.BalancerChore;
 import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
 import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
@@ -100,8 +100,8 @@ import org.apache.hadoop.hbase.monitoring.TaskMonitor;
 import org.apache.hadoop.hbase.procedure.MasterProcedureManagerHost;
 import org.apache.hadoop.hbase.procedure.flush.MasterFlushTableProcedureManager;
 import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
-import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
+import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.RSRpcServices;
 import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy;
@@ -647,37 +647,29 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
     long timeout = this.conf.getLong("hbase.catalog.verification.timeout", 1000);
     status.setStatus("Assigning hbase:meta region");
 
+    // Get current meta state from zk.
+    RegionState metaState = MetaTableLocator.getMetaRegionState(getZooKeeper());
+
     RegionStates regionStates = assignmentManager.getRegionStates();
-    regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO);
-    boolean metaRegionLocation = metaTableLocator.verifyMetaRegionLocation(
-      this.getShortCircuitConnection(), this.getZooKeeper(), timeout);
-    ServerName currentMetaServer = metaTableLocator.getMetaRegionLocation(this.getZooKeeper());
-    if (!metaRegionLocation) {
-      // Meta location is not verified. It should be in transition, or offline.
-      // We will wait for it to be assigned in enableSSHandWaitForMeta below.
-      if (currentMetaServer != null) {
-        // If the meta server is not known to be dead or online,
-        // just split the meta log, and don't expire it since this
-        // could be a full cluster restart. Otherwise, we will think
-        // this is a failover and lose previous region locations.
-        // If it is really a failover case, AM will find out in rebuilding
-        // user regions. Otherwise, we are good since all logs are split
-        // or known to be replayed before user regions are assigned.
-        if (serverManager.isServerOnline(currentMetaServer)) {
-          LOG.info("Forcing expire of " + currentMetaServer);
-          serverManager.expireServer(currentMetaServer);
+    regionStates.createRegionState(HRegionInfo.FIRST_META_REGIONINFO,
+      metaState.getState(), metaState.getServerName(), null);
+
+    if (!metaState.isOpened() || !metaTableLocator.verifyMetaRegionLocation(
+        this.getShortCircuitConnection(), this.getZooKeeper(), timeout)) {
+      ServerName currentMetaServer = metaState.getServerName();
+      if (serverManager.isServerOnline(currentMetaServer)) {
+        LOG.info("Meta was in transition on " + currentMetaServer);
+        assignmentManager.processRegionsInTransition(Arrays.asList(metaState));
+      } else {
+        if (currentMetaServer != null) {
+          splitMetaLogBeforeAssignment(currentMetaServer);
+          regionStates.logSplit(HRegionInfo.FIRST_META_REGIONINFO);
+          previouslyFailedMetaRSs.add(currentMetaServer);
         }
-        splitMetaLogBeforeAssignment(currentMetaServer);
-        previouslyFailedMetaRSs.add(currentMetaServer);
+        LOG.info("Re-assigning hbase:meta, it was on " + currentMetaServer);
+        assignmentManager.assignMeta();
       }
-      assignmentManager.assignMeta();
       assigned++;
-    } else {
-      // Region already assigned. We didn't assign it. Add to in-memory state.
-      regionStates.updateRegionState(
-        HRegionInfo.FIRST_META_REGIONINFO, State.OPEN, currentMetaServer);
-      this.assignmentManager.regionOnline(
-        HRegionInfo.FIRST_META_REGIONINFO, currentMetaServer);
     }
 
     enableMeta(TableName.META_TABLE_NAME);
@@ -737,9 +729,6 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
 
     if (waitForMeta) {
       metaTableLocator.waitMetaRegionLocation(this.getZooKeeper());
-      // Above check waits for general meta availability but this does not
-      // guarantee that the transition has completed
-      this.assignmentManager.waitForAssignment(HRegionInfo.FIRST_META_REGIONINFO);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java
index fb16dd3..e5370c5 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStateStore.java
@@ -40,6 +40,9 @@ import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.MultiHConnection;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
+import org.apache.zookeeper.KeeperException;
+
 import com.google.common.base.Preconditions;
 
 /**
@@ -158,20 +161,31 @@ public class RegionStateStore {
 
   void updateRegionState(long openSeqNum,
       RegionState newState, RegionState oldState) {
-    if (!initialized) {
-      return;
-    }
+    try {
+      HRegionInfo hri = newState.getRegion();
 
-    HRegionInfo hri = newState.getRegion();
-    if (!shouldPersistStateChange(hri, newState, oldState)) {
-      return;
-    }
+      // update meta before checking for initialization.
+      // meta state stored in zk.
+      if (hri.isMetaRegion()) {
+        // persist meta state in MetaTableLocator (which in turn is zk storage currently)
+        try {
+          MetaTableLocator.setMetaLocation(server.getZooKeeper(),
+            newState.getServerName(), newState.getState());
+          return; // Done
+        } catch (KeeperException e) {
+          throw new IOException("Failed to update meta ZNode", e);
+        }
+      }
 
-    ServerName oldServer = oldState != null ? oldState.getServerName() : null;
-    ServerName serverName = newState.getServerName();
-    State state = newState.getState();
+      if (!initialized
+          || !shouldPersistStateChange(hri, newState, oldState)) {
+        return;
+      }
+
+      ServerName oldServer = oldState != null ? oldState.getServerName() : null;
+      ServerName serverName = newState.getServerName();
+      State state = newState.getState();
 
-    try {
       int replicaId = hri.getReplicaId();
       Put put = new Put(MetaTableAccessor.getMetaKeyForRegion(hri));
       StringBuilder info = new StringBuilder("Updating row ");
@@ -217,13 +231,13 @@ public class RegionStateStore {
       }
       // Called when meta is not on master
       multiHConnection.processBatchCallback(Arrays.asList(put), TableName.META_TABLE_NAME, null, null);
-        
+
     } catch (IOException ioe) {
       LOG.error("Failed to persist region state " + newState, ioe);
       server.abort("Failed to update region location", ioe);
     }
   }
-  
+
   void splitRegion(HRegionInfo p,
       HRegionInfo a, HRegionInfo b, ServerName sn) throws IOException {
     MetaTableAccessor.splitRegion(server.getShortCircuitConnection(), p, a, b, sn);

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
index a24e387..06cad0e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java
@@ -89,12 +89,6 @@ public class MetaServerShutdownHandler extends ServerShutdownHandler {
       // timeout
       if (am.isCarryingMeta(serverName)) {
         LOG.info("Server " + serverName + " was carrying META. Trying to assign.");
-        am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO);
-        verifyAndAssignMetaWithRetries();
-      } else if (!server.getMetaTableLocator().isLocationAvailable(this.server.getZooKeeper())) {
-        // the meta location as per master is null. This could happen in case when meta assignment
-        // in previous run failed, while meta znode has been updated to null. We should try to
-        // assign the meta again.
         verifyAndAssignMetaWithRetries();
       } else {
         LOG.info("META has been assigned to otherwhere, skip assigning.");

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 8e62620..deb5ed1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -88,6 +88,7 @@ import org.apache.hadoop.hbase.ipc.RpcClient;
 import org.apache.hadoop.hbase.ipc.RpcServerInterface;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
 import org.apache.hadoop.hbase.master.HMaster;
+import org.apache.hadoop.hbase.master.RegionState.State;
 import org.apache.hadoop.hbase.master.SplitLogManager;
 import org.apache.hadoop.hbase.master.TableLockManager;
 import org.apache.hadoop.hbase.procedure.RegionServerProcedureManagerHost;
@@ -1719,10 +1720,6 @@ public class HRegionServer extends HasThread implements
     // Update flushed sequence id of a recovering region in ZK
     updateRecoveringRegionLastFlushedSequenceId(r);
 
-    if (r.getRegionInfo().isMetaRegion()) {
-      MetaTableLocator.setMetaLocation(getZooKeeper(), serverName);
-    }
-
     // Notify master
     if (!reportRegionStateTransition(
         TransitionCode.OPENED, openSeqNum, r.getRegionInfo())) {
@@ -1746,7 +1743,14 @@ public class HRegionServer extends HasThread implements
       // to handle the region transition report at all.
       if (code == TransitionCode.OPENED) {
         Preconditions.checkArgument(hris != null && hris.length == 1);
-        if (!hris[0].isMetaRegion()) {
+        if (hris[0].isMetaRegion()) {
+          try {
+            MetaTableLocator.setMetaLocation(getZooKeeper(), serverName, State.OPEN);
+          } catch (KeeperException e) {
+            LOG.info("Failed to update meta location", e);
+            return false;
+          }
+        } else {
           try {
             MetaTableAccessor.updateRegionLocation(shortCircuitConnection,
               hris[0], serverName, openSeqNum);

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java
index dccd7ca..8a439a8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableLocator.java
@@ -18,13 +18,14 @@
  */
 package org.apache.hadoop.hbase;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
 
 import java.io.IOException;
 import java.net.ConnectException;
 
-import junit.framework.Assert;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -32,6 +33,7 @@ import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
 import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
+import org.apache.hadoop.hbase.master.RegionState;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoRequest;
 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
@@ -106,6 +108,39 @@ public class TestMetaTableLocator {
   }
 
   /**
+   * Test normal operations
+   */
+  @Test public void testMetaLookup()
+          throws IOException, InterruptedException, ServiceException, KeeperException {
+    final ClientProtos.ClientService.BlockingInterface client =
+            Mockito.mock(ClientProtos.ClientService.BlockingInterface.class);
+
+    Mockito.when(client.get((RpcController)Mockito.any(), (GetRequest)Mockito.any())).
+            thenReturn(GetResponse.newBuilder().build());
+
+    final MetaTableLocator mtl = new MetaTableLocator();
+    assertNull(mtl.getMetaRegionLocation(this.watcher));
+    for (RegionState.State state : RegionState.State.values()) {
+      if (state.equals(RegionState.State.OPEN))
+        continue;
+      MetaTableLocator.setMetaLocation(this.watcher, SN, state);
+      assertNull(mtl.getMetaRegionLocation(this.watcher));
+      assertEquals(state, MetaTableLocator.getMetaRegionState(this.watcher).getState());
+    }
+    MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPEN);
+    assertEquals(mtl.getMetaRegionLocation(this.watcher), SN);
+    assertEquals(RegionState.State.OPEN,
+      MetaTableLocator.getMetaRegionState(this.watcher).getState());
+
+    mtl.deleteMetaLocation(this.watcher);
+    assertNull(MetaTableLocator.getMetaRegionState(this.watcher).getServerName());
+    assertEquals(MetaTableLocator.getMetaRegionState(this.watcher).getState(),
+      RegionState.State.OFFLINE);
+    assertNull(mtl.getMetaRegionLocation(this.watcher));
+  }
+
+
+  /**
    * Test interruptable while blocking wait on meta.
    * @throws IOException
    * @throws ServiceException
@@ -121,7 +156,7 @@ public class TestMetaTableLocator {
 
     final MetaTableLocator mtl = new MetaTableLocator();
     ServerName meta = new MetaTableLocator().getMetaRegionLocation(this.watcher);
-    Assert.assertNull(meta);
+    assertNull(meta);
     Thread t = new Thread() {
       @Override
       public void run() {
@@ -153,11 +188,15 @@ public class TestMetaTableLocator {
     Mockito.when(implementation.get((RpcController) Mockito.any(), (GetRequest) Mockito.any())).
       thenThrow(new ServiceException(ex));
 
-    MetaTableLocator.setMetaLocation(this.watcher, SN);
     long timeout = UTIL.getConfiguration().
-      getLong("hbase.catalog.verification.timeout", 1000);
-    Assert.assertFalse(new MetaTableLocator().verifyMetaRegionLocation(
+            getLong("hbase.catalog.verification.timeout", 1000);
+    MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPENING);
+    assertFalse(new MetaTableLocator().verifyMetaRegionLocation(
       connection, watcher, timeout));
+
+    MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPEN);
+    assertFalse(new MetaTableLocator().verifyMetaRegionLocation(
+            connection, watcher, timeout));
   }
 
   /**
@@ -213,9 +252,13 @@ public class TestMetaTableLocator {
     Mockito.when(connection.getAdmin(Mockito.any(ServerName.class), Mockito.anyBoolean())).
       thenReturn(implementation);
 
+    ServerName sn = ServerName.valueOf("example.com", 1234, System.currentTimeMillis());
     MetaTableLocator.setMetaLocation(this.watcher,
-      ServerName.valueOf("example.com", 1234, System.currentTimeMillis()));
-    Assert.assertFalse(new MetaTableLocator().verifyMetaRegionLocation(connection, watcher, 100));
+            sn,
+            RegionState.State.OPENING);
+    assertFalse(new MetaTableLocator().verifyMetaRegionLocation(connection, watcher, 100));
+    MetaTableLocator.setMetaLocation(this.watcher, sn, RegionState.State.OPEN);
+    assertFalse(new MetaTableLocator().verifyMetaRegionLocation(connection, watcher, 100));
   }
 
   @Test (expected = NotAllMetaRegionsOnlineException.class)
@@ -234,22 +277,18 @@ public class TestMetaTableLocator {
   throws IOException, InterruptedException, KeeperException {
     final MetaTableLocator mtl = new MetaTableLocator();
     ServerName hsa = mtl.getMetaRegionLocation(watcher);
-    Assert.assertNull(hsa);
+    assertNull(hsa);
 
     // Now test waiting on meta location getting set.
     Thread t = new WaitOnMetaThread();
     startWaitAliveThenWaitItLives(t, 1);
     // Set a meta location.
-    hsa = setMetaLocation();
+    MetaTableLocator.setMetaLocation(this.watcher, SN, RegionState.State.OPEN);
+    hsa = SN;
     // Join the thread... should exit shortly.
     t.join();
     // Now meta is available.
-    Assert.assertTrue(mtl.getMetaRegionLocation(watcher).equals(hsa));
-  }
-
-  private ServerName setMetaLocation() throws KeeperException {
-    MetaTableLocator.setMetaLocation(this.watcher, SN);
-    return SN;
+    assertTrue(mtl.getMetaRegionLocation(watcher).equals(hsa));
   }
 
   /**
@@ -302,7 +341,7 @@ public class TestMetaTableLocator {
     }
     // Wait one second.
     Threads.sleep(ms);
-    Assert.assertTrue("Assert " + t.getName() + " still waiting", t.isAlive());
+    assertTrue("Assert " + t.getName() + " still waiting", t.isAlive());
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
index 9ebf5ec..519f806 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java
@@ -67,7 +67,7 @@ import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
-import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
@@ -120,27 +120,42 @@ public class TestAssignmentManagerOnCluster {
         metaServerName = cluster.getLiveRegionServerThreads()
           .get(0).getRegionServer().getServerName();
         master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
-          Bytes.toBytes(metaServerName.getServerName()));
+                Bytes.toBytes(metaServerName.getServerName()));
         TEST_UTIL.waitUntilNoRegionsInTransition(60000);
       }
+      RegionState metaState =
+        MetaTableLocator.getMetaRegionState(master.getZooKeeper());
+      assertEquals("Meta should be not in transition", metaState.getState(), State.OPEN);
       assertNotEquals("Meta should be moved off master",
-        metaServerName, master.getServerName());
+        metaState.getServerName(), master.getServerName());
+      assertEquals("Meta should be on the meta server",
+        metaState.getServerName(), metaServerName);
       cluster.killRegionServer(metaServerName);
       stoppedARegionServer = true;
       cluster.waitForRegionServerToStop(metaServerName, 60000);
 
       // Wait for SSH to finish
+      final ServerName oldServerName = metaServerName;
       final ServerManager serverManager = master.getServerManager();
       TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() {
         @Override
         public boolean evaluate() throws Exception {
-          return !serverManager.areDeadServersInProgress();
+          return serverManager.isServerDead(oldServerName)
+            && !serverManager.areDeadServersInProgress();
         }
       });
 
+      TEST_UTIL.waitUntilNoRegionsInTransition(60000);
       // Now, make sure meta is assigned
       assertTrue("Meta should be assigned",
         regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
+      // Now, make sure meta is registered in zk
+      metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
+      assertEquals("Meta should be not in transition", metaState.getState(), State.OPEN);
+      assertEquals("Meta should be assigned", metaState.getServerName(),
+        regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
+      assertNotEquals("Meta should be assigned on a different server",
+        metaState.getServerName(), metaServerName);
     } finally {
       if (stoppedARegionServer) {
         cluster.startRegionServer();

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
index 5c35611..ef244b6 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java
@@ -44,11 +44,14 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.master.RegionState.State;
+import org.apache.hadoop.hbase.protobuf.RequestConverter;
 import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSTableDescriptors;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
+import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
@@ -235,7 +238,8 @@ public class TestMasterFailover {
     // at this point we only expect 3 regions to be assigned out
     // (catalogs and namespace, + 1 online region)
     assertEquals(3, cluster.countServedRegions());
-    HRegionInfo hriOnline = onlineTable.getRegionLocation("").getRegionInfo();
+    HRegionInfo hriOnline = onlineTable.getRegionLocation(
+      HConstants.EMPTY_START_ROW).getRegionInfo();
 
     RegionStates regionStates = master.getAssignmentManager().getRegionStates();
     RegionStateStore stateStore = master.getAssignmentManager().getRegionStateStore();
@@ -281,5 +285,121 @@ public class TestMasterFailover {
     // Done, shutdown the cluster
     TEST_UTIL.shutdownMiniCluster();
   }
+
+  /**
+   * Test meta in transition when master failover
+   */
+  @Test(timeout = 180000)
+  public void testMetaInTransitionWhenMasterFailover() throws Exception {
+    final int NUM_MASTERS = 1;
+    final int NUM_RS = 1;
+
+    // Start the cluster
+    HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+    TEST_UTIL.startMiniCluster(NUM_MASTERS, NUM_RS);
+    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
+    log("Cluster started");
+
+    log("Moving meta off the master");
+    HMaster activeMaster = cluster.getMaster();
+    HRegionServer rs = cluster.getRegionServer(0);
+    ServerName metaServerName = cluster.getLiveRegionServerThreads()
+      .get(0).getRegionServer().getServerName();
+    activeMaster.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
+      Bytes.toBytes(metaServerName.getServerName()));
+    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
+    assertEquals("Meta should be assigned on expected regionserver",
+      metaServerName, activeMaster.getMetaTableLocator()
+        .getMetaRegionLocation(activeMaster.getZooKeeper()));
+
+    // Now kill master, meta should remain on rs, where we placed it before.
+    log("Aborting master");
+    activeMaster.abort("test-kill");
+    cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
+    log("Master has aborted");
+
+    // meta should remain where it was
+    RegionState metaState =
+      MetaTableLocator.getMetaRegionState(rs.getZooKeeper());
+    assertEquals("hbase:meta should be onlined on RS",
+      metaState.getServerName(), rs.getServerName());
+    assertEquals("hbase:meta should be onlined on RS",
+      metaState.getState(), State.OPEN);
+
+    // Start up a new master
+    log("Starting up a new master");
+    activeMaster = cluster.startMaster().getMaster();
+    log("Waiting for master to be ready");
+    cluster.waitForActiveAndReadyMaster();
+    log("Master is ready");
+
+    // ensure meta is still deployed on RS
+    metaState =
+      MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
+    assertEquals("hbase:meta should be onlined on RS",
+      metaState.getServerName(), rs.getServerName());
+    assertEquals("hbase:meta should be onlined on RS",
+      metaState.getState(), State.OPEN);
+
+    // Update meta state as PENDING_OPEN, then kill master
+    // that simulates, that RS successfully deployed, but
+    // RPC was lost right before failure.
+    // region server should expire (how it can be verified?)
+    MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
+      rs.getServerName(), State.PENDING_OPEN);
+    HRegion meta = rs.getFromOnlineRegions(HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
+    rs.removeFromOnlineRegions(meta, null);
+    meta.close();
+
+    log("Aborting master");
+    activeMaster.abort("test-kill");
+    cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
+    log("Master has aborted");
+
+    // Start up a new master
+    log("Starting up a new master");
+    activeMaster = cluster.startMaster().getMaster();
+    log("Waiting for master to be ready");
+    cluster.waitForActiveAndReadyMaster();
+    log("Master is ready");
+
+    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
+    log("Meta was assigned");
+
+    metaState =
+      MetaTableLocator.getMetaRegionState(activeMaster.getZooKeeper());
+    assertEquals("hbase:meta should be onlined on RS",
+      metaState.getServerName(), rs.getServerName());
+    assertEquals("hbase:meta should be onlined on RS",
+      metaState.getState(), State.OPEN);
+
+    // Update meta state as PENDING_CLOSE, then kill master
+    // that simulates, that RS successfully deployed, but
+    // RPC was lost right before failure.
+    // region server should expire (how it can be verified?)
+    MetaTableLocator.setMetaLocation(activeMaster.getZooKeeper(),
+      rs.getServerName(), State.PENDING_CLOSE);
+
+    log("Aborting master");
+    activeMaster.abort("test-kill");
+    cluster.waitForMasterToStop(activeMaster.getServerName(), 30000);
+    log("Master has aborted");
+
+    rs.getRSRpcServices().closeRegion(null, RequestConverter.buildCloseRegionRequest(
+      rs.getServerName(), HRegionInfo.FIRST_META_REGIONINFO.getEncodedName()));
+
+    // Start up a new master
+    log("Starting up a new master");
+    activeMaster = cluster.startMaster().getMaster();
+    log("Waiting for master to be ready");
+    cluster.waitForActiveAndReadyMaster();
+    log("Master is ready");
+
+    TEST_UTIL.waitUntilNoRegionsInTransition(60000);
+    log("Meta was assigned");
+
+    // Done, shutdown the cluster
+    TEST_UTIL.shutdownMiniCluster();
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/b7f75147/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
index 9f18f87..e643a5b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
@@ -39,12 +39,12 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.MetaMockingUtil;
 import org.apache.hadoop.hbase.Server;
 import org.apache.hadoop.hbase.ServerLoad;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.ZooKeeperConnectionException;
-import org.apache.hadoop.hbase.MetaMockingUtil;
 import org.apache.hadoop.hbase.client.HConnection;
 import org.apache.hadoop.hbase.client.HConnectionTestingUtility;
 import org.apache.hadoop.hbase.client.Result;
@@ -157,7 +157,8 @@ public class TestMasterNoCluster {
     final MockRegionServer rs2 = new MockRegionServer(conf, sn2);
     // Put some data into the servers.  Make it look like sn0 has the metaH
     // Put data into sn2 so it looks like it has a few regions for a table named 't'.
-    MetaTableLocator.setMetaLocation(rs0.getZooKeeper(), rs0.getServerName());
+    MetaTableLocator.setMetaLocation(rs0.getZooKeeper(),
+      rs0.getServerName(), RegionState.State.OPEN);
     final TableName tableName = TableName.valueOf("t");
     Result [] results = new Result [] {
       MetaMockingUtil.getMetaTableRowResult(


Mime
View raw message