hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From apurt...@apache.org
Subject hbase git commit: HBASE-19326 Remove decommissioned servers from rsgroup
Date Fri, 01 Dec 2017 19:19:38 GMT
Repository: hbase
Updated Branches:
  refs/heads/branch-2 752be198e -> 01d366da4


HBASE-19326 Remove decommissioned servers from rsgroup

Signed-off-by: Michael Stack <stack@apache.org>

Conflicts:
	hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/01d366da
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/01d366da
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/01d366da

Branch: refs/heads/branch-2
Commit: 01d366da4ac8925b827b85619836fc3e1bdcde74
Parents: 752be19
Author: Guangxu Cheng <guangxucheng@gmail.com>
Authored: Fri Dec 1 03:48:29 2017 +0800
Committer: Andrew Purtell <apurtell@apache.org>
Committed: Fri Dec 1 11:12:34 2017 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/rsgroup/RSGroupAdmin.java      |  10 ++
 .../hbase/rsgroup/RSGroupAdminClient.java       |  20 ++++
 .../hbase/rsgroup/RSGroupAdminEndpoint.java     |  36 ++++++
 .../hbase/rsgroup/RSGroupAdminServer.java       |  52 +++++++++
 .../hbase/rsgroup/RSGroupInfoManager.java       |   6 +
 .../hbase/rsgroup/RSGroupInfoManagerImpl.java   |  26 +++++
 .../src/main/protobuf/RSGroupAdmin.proto        |  10 ++
 .../hadoop/hbase/rsgroup/TestRSGroups.java      |   6 +-
 .../hadoop/hbase/rsgroup/TestRSGroupsBase.java  | 112 ++++++++++++++++++-
 .../rsgroup/VerifyingRSGroupAdminClient.java    |   6 +
 .../hbase/coprocessor/MasterObserver.java       |  18 +++
 .../hbase/master/MasterCoprocessorHost.java     |  24 ++++
 .../hbase/security/access/AccessController.java |   6 +
 .../src/main/ruby/hbase/rsgroup_admin.rb        |  12 ++
 hbase-shell/src/main/ruby/shell.rb              |   1 +
 .../shell/commands/remove_servers_rsgroup.rb    |  35 ++++++
 16 files changed, 376 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java
index 5f38d39..453ef54 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdmin.java
@@ -88,4 +88,14 @@ public interface RSGroupAdmin {
    */
   void moveServersAndTables(Set<Address> servers, Set<TableName> tables,
                             String targetGroup) throws IOException;
+
+  /**
+   * Remove decommissioned servers from rsgroup.
+   * 1. Sometimes we may find the server aborted due to some hardware failure and we must
offline
+   * the server for repairing. Or we need to move some servers to join other clusters.
+   * So we need to remove these servers from the rsgroup.
+   * 2. Dead/recovering/live servers will be disallowed.
+   * @param servers set of servers to remove
+   */
+  void removeServers(Set<Address> servers) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java
index 9949704..be83a7b 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminClient.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveServers
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveTablesRequest;
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RSGroupAdminService;
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveRSGroupRequest;
+import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveServersRequest;
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupProtos;
 
 import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
@@ -205,4 +206,23 @@ class RSGroupAdminClient implements RSGroupAdmin {
       throw ProtobufUtil.handleRemoteException(e);
     }
   }
+
+  @Override
+  public void removeServers(Set<Address> servers) throws IOException {
+    Set<HBaseProtos.ServerName> hostPorts = Sets.newHashSet();
+    for(Address el: servers) {
+      hostPorts.add(HBaseProtos.ServerName.newBuilder()
+          .setHostName(el.getHostname())
+          .setPort(el.getPort())
+          .build());
+    }
+    RemoveServersRequest request = RemoveServersRequest.newBuilder()
+        .addAllServers(hostPorts)
+        .build();
+    try {
+      stub.removeServers(null, request);
+    } catch (ServiceException e) {
+      throw ProtobufUtil.handleRemoteException(e);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java
index 3a7d03d..9e3dcac 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminEndpoint.java
@@ -21,8 +21,10 @@ package org.apache.hadoop.hbase.rsgroup;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Optional;
 import java.util.Set;
+import java.util.stream.Collectors;
 
 import com.google.protobuf.RpcCallback;
 import com.google.protobuf.RpcController;
@@ -32,6 +34,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.CoprocessorEnvironment;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.NamespaceDescriptor;
+import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.SnapshotDescription;
@@ -70,6 +73,8 @@ import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.MoveTablesR
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RSGroupAdminService;
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveRSGroupRequest;
 import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveRSGroupResponse;
+import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveServersRequest;
+import org.apache.hadoop.hbase.protobuf.generated.RSGroupAdminProtos.RemoveServersResponse;
 import org.apache.hadoop.hbase.protobuf.generated.TableProtos;
 import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -290,6 +295,26 @@ public class RSGroupAdminEndpoint implements MasterCoprocessor, MasterObserver
{
       }
       done.run(builder.build());
     }
+
+    @Override
+    public void removeServers(RpcController controller,
+        RemoveServersRequest request,
+        RpcCallback<RemoveServersResponse> done) {
+      RemoveServersResponse.Builder builder =
+          RemoveServersResponse.newBuilder();
+      try {
+        Set<Address> servers = Sets.newHashSet();
+        for (HBaseProtos.ServerName el : request.getServersList()) {
+          servers.add(Address.fromParts(el.getHostName(), el.getPort()));
+        }
+        LOG.info(master.getClientIdAuditPrefix()
+            + " remove decommissioned servers from rsgroup: " + servers);
+        groupAdminServer.removeServers(servers);
+      } catch (IOException e) {
+        CoprocessorRpcUtils.setControllerException(controller, e);
+      }
+      done.run(builder.build());
+    }
   }
 
   void assignTableToGroup(TableDescriptor desc) throws IOException {
@@ -358,5 +383,16 @@ public class RSGroupAdminEndpoint implements MasterCoprocessor, MasterObserver
{
     assignTableToGroup(desc);
   }
 
+  @Override
+  public void postClearDeadServers(ObserverContext<MasterCoprocessorEnvironment> ctx,
+      List<ServerName> servers, List<ServerName> notClearedServers)
+      throws IOException {
+    Set<Address> clearedServer = servers.stream().
+        filter(server -> !notClearedServers.contains(server)).
+        map(ServerName::getAddress).
+        collect(Collectors.toSet());
+    groupAdminServer.removeServers(clearedServer);
+  }
+
   /////////////////////////////////////////////////////////////////////////////
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
index 4a9a885..45421e3 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java
@@ -583,6 +583,29 @@ public class RSGroupAdminServer implements RSGroupAdmin {
             + servers + " , Tables : " + tables + " => " +  targetGroup);
   }
 
+  @Override
+  public void removeServers(Set<Address> servers) throws IOException {
+    {
+      if (servers == null || servers.isEmpty()) {
+        throw new ConstraintException("The set of servers to remove cannot be null or empty.");
+      }
+      // Hold a lock on the manager instance while moving servers to prevent
+      // another writer changing our state while we are working.
+      synchronized (rsGroupInfoManager) {
+        if (master.getMasterCoprocessorHost() != null) {
+          master.getMasterCoprocessorHost().preRemoveServers(servers);
+        }
+        //check the set of servers
+        checkForDeadOrOnlineServers(servers);
+        rsGroupInfoManager.removeServers(servers);
+        if (master.getMasterCoprocessorHost() != null) {
+          master.getMasterCoprocessorHost().postRemoveServers(servers);
+        }
+        LOG.info("Remove decommissioned servers " + servers + " from rsgroup done.");
+      }
+    }
+  }
+
   private Map<String, RegionState> rsGroupGetRegionsInTransition(String groupName)
       throws IOException {
     Map<String, RegionState> rit = Maps.newTreeMap();
@@ -634,4 +657,33 @@ public class RSGroupAdminServer implements RSGroupAdmin {
 
     return result;
   }
+
+  /**
+   * Check if the set of servers are belong to dead servers list or online servers list.
+   * @param servers servers to remove
+   */
+  private void checkForDeadOrOnlineServers(Set<Address> servers) throws ConstraintException
{
+    // This uglyness is because we only have Address, not ServerName.
+    Set<Address> onlineServers = new HashSet<>();
+    for(ServerName server: master.getServerManager().getOnlineServers().keySet()) {
+      onlineServers.add(server.getAddress());
+    }
+
+    Set<Address> deadServers = new HashSet<>();
+    for(ServerName server: master.getServerManager().getDeadServers().copyServerNames())
{
+      deadServers.add(server.getAddress());
+    }
+
+    for (Address address: servers) {
+      if (onlineServers.contains(address)) {
+        throw new ConstraintException(
+            "Server " + address + " is an online server, not allowed to remove.");
+      }
+      if (deadServers.contains(address)) {
+        throw new ConstraintException(
+            "Server " + address + " is on the dead servers list,"
+                + " Maybe it will come back again, not allowed to remove.");
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java
index 3fb40da..5eee236 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManager.java
@@ -117,4 +117,10 @@ public interface RSGroupInfoManager {
    */
   void moveServersAndTables(Set<Address> servers, Set<TableName> tables,
       String srcGroup, String dstGroup) throws IOException;
+
+  /**
+   * Remove decommissioned servers from rsgroup
+   * @param servers set of servers to remove
+   */
+  void removeServers(Set<Address> servers) throws IOException;
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
index 5fca659..4d8ff92 100644
--- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
+++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java
@@ -309,6 +309,32 @@ class RSGroupInfoManagerImpl implements RSGroupInfoManager {
     flushConfig(newGroupMap);
   }
 
+  @Override
+  public synchronized void removeServers(Set<Address> servers) throws IOException {
+    Map<String, RSGroupInfo> rsGroupInfos = new HashMap<String, RSGroupInfo>();
+    for (Address el: servers) {
+      RSGroupInfo rsGroupInfo = getRSGroupOfServer(el);
+      if (rsGroupInfo != null) {
+        RSGroupInfo newRsGroupInfo = rsGroupInfos.get(rsGroupInfo.getName());
+        if (newRsGroupInfo == null) {
+          rsGroupInfo.removeServer(el);
+          rsGroupInfos.put(rsGroupInfo.getName(), rsGroupInfo);
+        } else {
+          newRsGroupInfo.removeServer(el);
+          rsGroupInfos.put(newRsGroupInfo.getName(), newRsGroupInfo);
+        }
+      }else {
+        LOG.warn("Server " + el + " does not belong to any rsgroup.");
+      }
+    }
+
+    if (rsGroupInfos.size() > 0) {
+      Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
+      newGroupMap.putAll(rsGroupInfos);
+      flushConfig(newGroupMap);
+    }
+  }
+
   List<RSGroupInfo> retrieveGroupListFromGroupTable() throws IOException {
     List<RSGroupInfo> rsGroupInfoList = Lists.newArrayList();
     for (Result result : rsGroupTable.getScanner(new Scan())) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto b/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto
index 65da657..fbd55ad 100644
--- a/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto
+++ b/hbase-rsgroup/src/main/protobuf/RSGroupAdmin.proto
@@ -116,6 +116,13 @@ message MoveServersAndTablesRequest {
 message MoveServersAndTablesResponse {
 }
 
+message RemoveServersRequest {
+  repeated ServerName servers = 1;
+}
+
+message RemoveServersResponse {
+}
+
 service RSGroupAdminService {
   rpc GetRSGroupInfo(GetRSGroupInfoRequest)
     returns (GetRSGroupInfoResponse);
@@ -146,4 +153,7 @@ service RSGroupAdminService {
 
   rpc MoveServersAndTables(MoveServersAndTablesRequest)
     returns (MoveServersAndTablesResponse);
+
+  rpc RemoveServers(RemoveServersRequest)
+    returns (RemoveServersResponse);
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
index fffdeb7..9a58097 100644
--- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
+++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroups.java
@@ -74,10 +74,10 @@ public class TestRSGroups extends TestRSGroupsBase {
         RSGroupBasedLoadBalancer.class.getName());
     TEST_UTIL.getConfiguration().set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
         RSGroupAdminEndpoint.class.getName());
-    TEST_UTIL.startMiniCluster(NUM_SLAVES_BASE);
-    TEST_UTIL.getConfiguration().set(
+    TEST_UTIL.startMiniCluster(NUM_SLAVES_BASE - 1);
+    TEST_UTIL.getConfiguration().setInt(
         ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART,
-        ""+NUM_SLAVES_BASE);
+        NUM_SLAVES_BASE - 1);
     TEST_UTIL.getConfiguration().setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true);
 
     admin = TEST_UTIL.getAdmin();

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
index 6aa3acd..bbcf120 100644
--- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
+++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsBase.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
 
+import com.google.common.collect.Lists;
 import java.io.IOException;
 import java.security.SecureRandom;
 import java.util.EnumSet;
@@ -50,6 +51,7 @@ import org.apache.hadoop.hbase.client.Admin;
 import org.apache.hadoop.hbase.client.ClusterConnection;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.constraint.ConstraintException;
+import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.net.Address;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.Assert;
@@ -62,6 +64,7 @@ import org.apache.hadoop.hbase.shaded.com.google.common.collect.Maps;
 import org.apache.hadoop.hbase.shaded.com.google.common.collect.Sets;
 import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos;
+import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetServerInfoRequest;
 
 public abstract class TestRSGroupsBase {
   protected static final Log LOG = LogFactory.getLog(TestRSGroupsBase.class);
@@ -863,4 +866,111 @@ public abstract class TestRSGroupsBase {
     //verify that all region still assgin on targetServer
     Assert.assertEquals(5, getTableServerRegionMap().get(tableName).get(targetServer).size());
   }
-}
+
+  @Test
+  public void testClearDeadServers() throws Exception {
+    LOG.info("testClearDeadServers");
+    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3);
+
+    ServerName targetServer = ServerName.parseServerName(
+        newGroup.getServers().iterator().next().toString());
+    AdminProtos.AdminService.BlockingInterface targetRS =
+        ((ClusterConnection) admin.getConnection()).getAdmin(targetServer);
+    try {
+      targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null,
+          GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName());
+      //stopping may cause an exception
+      //due to the connection loss
+      targetRS.stopServer(null,
+          AdminProtos.StopServerRequest.newBuilder().setReason("Die").build());
+    } catch(Exception e) {
+    }
+    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+    //wait for stopped regionserver to dead server list
+    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return !master.getServerManager().areDeadServersInProgress()
+            && cluster.getClusterStatus().getDeadServerNames().size() > 0;
+      }
+    });
+    assertFalse(cluster.getClusterStatus().getServers().contains(targetServer));
+    assertTrue(cluster.getClusterStatus().getDeadServerNames().contains(targetServer));
+    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
+
+    //clear dead servers list
+    List<ServerName> notClearedServers = admin.clearDeadServers(Lists.newArrayList(targetServer));
+    assertEquals(0, notClearedServers.size());
+
+    Set<Address> newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers();
+    assertFalse(newGroupServers.contains(targetServer.getAddress()));
+    assertEquals(2, newGroupServers.size());
+  }
+
+  @Test
+  public void testRemoveServers() throws Exception {
+    LOG.info("testRemoveServers");
+    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3);
+    ServerName targetServer = ServerName.parseServerName(
+        newGroup.getServers().iterator().next().toString());
+    try {
+      rsGroupAdmin.removeServers(Sets.newHashSet(targetServer.getAddress()));
+      fail("Online servers shouldn't have been successfully removed.");
+    } catch(IOException ex) {
+      String exp = "Server " + targetServer.getAddress()
+          + " is an online server, not allowed to remove.";
+      String msg = "Expected '" + exp + "' in exception message: ";
+      assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp));
+    }
+    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
+
+    AdminProtos.AdminService.BlockingInterface targetRS =
+        ((ClusterConnection) admin.getConnection()).getAdmin(targetServer);
+    try {
+      targetServer = ProtobufUtil.toServerName(targetRS.getServerInfo(null,
+          GetServerInfoRequest.newBuilder().build()).getServerInfo().getServerName());
+      //stopping may cause an exception
+      //due to the connection loss
+      targetRS.stopServer(null,
+          AdminProtos.StopServerRequest.newBuilder().setReason("Die").build());
+    } catch(Exception e) {
+    }
+
+    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
+    //wait for stopped regionserver to dead server list
+    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
+      @Override
+      public boolean evaluate() throws Exception {
+        return !master.getServerManager().areDeadServersInProgress()
+            && cluster.getClusterStatus().getDeadServerNames().size() > 0;
+      }
+    });
+
+    try {
+      rsGroupAdmin.removeServers(Sets.newHashSet(targetServer.getAddress()));
+      fail("Dead servers shouldn't have been successfully removed.");
+    } catch(IOException ex) {
+      String exp = "Server " + targetServer.getAddress() + " is on the dead servers list,"
+          + " Maybe it will come back again, not allowed to remove.";
+      String msg = "Expected '" + exp + "' in exception message: ";
+      assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp));
+    }
+    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
+
+    ServerName sn = TEST_UTIL.getHBaseClusterInterface().getClusterStatus().getMaster();
+    TEST_UTIL.getHBaseClusterInterface().stopMaster(sn);
+    TEST_UTIL.getHBaseClusterInterface().waitForMasterToStop(sn, 60000);
+    TEST_UTIL.getHBaseClusterInterface().startMaster(sn.getHostname(), 0);
+    TEST_UTIL.getHBaseClusterInterface().waitForActiveAndReadyMaster(60000);
+
+    assertEquals(3, cluster.getClusterStatus().getServersSize());
+    assertFalse(cluster.getClusterStatus().getServers().contains(targetServer));
+    assertFalse(cluster.getClusterStatus().getDeadServerNames().contains(targetServer));
+    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
+
+    rsGroupAdmin.removeServers(Sets.newHashSet(targetServer.getAddress()));
+    Set<Address> newGroupServers = rsGroupAdmin.getRSGroupInfo(newGroup.getName()).getServers();
+    assertFalse(newGroupServers.contains(targetServer.getAddress()));
+    assertEquals(2, newGroupServers.size());
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java
----------------------------------------------------------------------
diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java
b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java
index ba3534d..0a26a35 100644
--- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java
+++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/VerifyingRSGroupAdminClient.java
@@ -110,6 +110,12 @@ public class VerifyingRSGroupAdminClient implements RSGroupAdmin {
     verify();
   }
 
+  @Override
+  public void removeServers(Set<Address> servers) throws IOException {
+    wrapped.removeServers(servers);
+    verify();
+  }
+
   public void verify() throws IOException {
     Map<String, RSGroupInfo> groupMap = Maps.newHashMap();
     Set<RSGroupInfo> zList = Sets.newHashSet();

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
index 398e56b..6ef5504 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coprocessor/MasterObserver.java
@@ -1099,6 +1099,24 @@ public interface MasterObserver {
                           String groupName, boolean balancerRan) throws IOException {}
 
   /**
+   * Called before servers are removed from rsgroup
+   * @param ctx the environment to interact with the framework and master
+   * @param servers set of decommissioned servers to remove
+   */
+  default void preRemoveServers(
+      final ObserverContext<MasterCoprocessorEnvironment> ctx,
+      Set<Address> servers) throws IOException {}
+
+  /**
+   * Called after servers are removed from rsgroup
+   * @param ctx the environment to interact with the framework and master
+   * @param servers set of servers to remove
+   */
+  default void postRemoveServers(
+      final ObserverContext<MasterCoprocessorEnvironment> ctx,
+      Set<Address> servers) throws IOException {}
+
+  /**
    * Called before add a replication peer
    * @param ctx the environment to interact with the framework and master
    * @param peerId a short name that identifies the peer

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
index 4337347..734555a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterCoprocessorHost.java
@@ -1401,6 +1401,30 @@ public class MasterCoprocessorHost
     });
   }
 
+  public void preRemoveServers(final Set<Address> servers)
+      throws IOException {
+    execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {
+      @Override
+      public void call(MasterObserver observer) throws IOException {
+        if(((MasterEnvironment)getEnvironment()).supportGroupCPs) {
+          observer.preRemoveServers(this, servers);
+        }
+      }
+    });
+  }
+
+  public void postRemoveServers(final Set<Address> servers)
+      throws IOException {
+    execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {
+      @Override
+      public void call(MasterObserver observer) throws IOException {
+        if(((MasterEnvironment)getEnvironment()).supportGroupCPs) {
+          observer.postRemoveServers(this, servers);
+        }
+      }
+    });
+  }
+
   public void preAddReplicationPeer(final String peerId, final ReplicationPeerConfig peerConfig)
       throws IOException {
     execOperation(coprocEnvironments.isEmpty() ? null : new MasterObserverOperation() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
index f2b7541..3ed2ee3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/security/access/AccessController.java
@@ -2690,6 +2690,12 @@ public class AccessController implements MasterCoprocessor, RegionCoprocessor,
   }
 
   @Override
+  public void preRemoveServers(ObserverContext<MasterCoprocessorEnvironment> ctx,
+      Set<Address> servers) throws IOException {
+    requirePermission(getActiveUser(ctx), "removeServers", Action.ADMIN);
+  }
+
+  @Override
   public void preAddReplicationPeer(final ObserverContext<MasterCoprocessorEnvironment>
ctx,
       String peerId, ReplicationPeerConfig peerConfig) throws IOException {
     requirePermission(getActiveUser(ctx), "addReplicationPeer", Action.ADMIN);

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb b/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb
index befed01..1d3ca7c 100644
--- a/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb
+++ b/hbase-shell/src/main/ruby/hbase/rsgroup_admin.rb
@@ -118,5 +118,17 @@ module Hbase
       end
       @admin.moveServersAndTables(servers, tables, dest)
     end
+
+    #--------------------------------------------------------------------------
+    # remove decommissioned server from rsgroup
+    def remove_servers(*args)
+      # Flatten params array
+      args = args.flatten.compact
+      servers = java.util.HashSet.new
+      args.each do |s|
+        servers.add(org.apache.hadoop.hbase.net.Address.fromString(s))
+      end
+      @admin.removeServers(servers)
+    end
   end
 end

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-shell/src/main/ruby/shell.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/shell.rb b/hbase-shell/src/main/ruby/shell.rb
index 60ca229..7852625 100644
--- a/hbase-shell/src/main/ruby/shell.rb
+++ b/hbase-shell/src/main/ruby/shell.rb
@@ -483,5 +483,6 @@ Shell.load_command_group(
     move_servers_tables_rsgroup
     get_server_rsgroup
     get_table_rsgroup
+    remove_servers_rsgroup
   ]
 )

http://git-wip-us.apache.org/repos/asf/hbase/blob/01d366da/hbase-shell/src/main/ruby/shell/commands/remove_servers_rsgroup.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/shell/commands/remove_servers_rsgroup.rb b/hbase-shell/src/main/ruby/shell/commands/remove_servers_rsgroup.rb
new file mode 100644
index 0000000..ba8e60c
--- /dev/null
+++ b/hbase-shell/src/main/ruby/shell/commands/remove_servers_rsgroup.rb
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+module Shell
+  module Commands
+    class RemoveServersRsgroup < Command
+      def help
+        <<-EOF
+Remove decommissioned servers from rsgroup.
+Dead/recovering/live servers will be disallowed.
+Example:
+  hbase> remove_servers_rsgroup ['server1:port','server2:port']
+EOF
+      end
+
+      def command(servers)
+        rsgroup_admin.remove_servers(servers)
+      end
+    end
+  end
+end


Mime
View raw message