hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From te...@apache.org
Subject hbase git commit: HBASE-17077 Don't copy the replication queue belonging to the peer which has been deleted (Guanghao Zhang)
Date Sun, 13 Nov 2016 14:58:46 GMT
Repository: hbase
Updated Branches:
  refs/heads/master 3f919dd6c -> dba7ec1b6


HBASE-17077 Don't copy the replication queue belonging to the peer which has been deleted
(Guanghao Zhang)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/dba7ec1b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/dba7ec1b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/dba7ec1b

Branch: refs/heads/master
Commit: dba7ec1b69992b040d699ff9699f9b12d42fc053
Parents: 3f919dd
Author: tedyu <yuzhihong@gmail.com>
Authored: Sun Nov 13 06:58:39 2016 -0800
Committer: tedyu <yuzhihong@gmail.com>
Committed: Sun Nov 13 06:58:39 2016 -0800

----------------------------------------------------------------------
 .../replication/ReplicationQueuesZKImpl.java    | 30 +++++++++++++-------
 .../TestReplicationSourceManager.java           |  4 ++-
 2 files changed, 22 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/dba7ec1b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java
index bc9f86f..dcbc0f0 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java
@@ -251,9 +251,10 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements
R
   }
 
   /**
-   * It "atomically" copies all the wals queues from another region server and returns them
all
-   * sorted per peer cluster (appended with the dead server's znode).
+   * It "atomically" copies one peer's wals queue from another dead region server and returns
them
+   * all sorted. The new peer id is equal to the old peer id appended with the dead server's
znode.
    * @param znode pertaining to the region server to copy the queues from
+   * @peerId peerId pertaining to the queue need to be copied
    */
   private Pair<String, SortedSet<String>> moveQueueUsingMulti(String znode, String
peerId) {
     try {
@@ -261,18 +262,25 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase
implements R
       String deadRSZnodePath = ZKUtil.joinZNode(this.queuesZNode, znode);
       List<ZKUtilOp> listOfOps = new ArrayList<>();
       ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(peerId);
-      if (!peerExists(replicationQueueInfo.getPeerId())) {
-        // the orphaned queues must be moved, otherwise the delete op of dead rs will fail,
-        // this will cause the whole multi op fail.
-        // NodeFailoverWorker will skip the orphaned queues.
-        LOG.warn("Peer " + peerId +
-            " didn't exist, will move its queue to avoid the failure of multi op");
-      }
+
       String newPeerId = peerId + "-" + znode;
       String newPeerZnode = ZKUtil.joinZNode(this.myQueuesZnode, newPeerId);
       // check the logs queue for the old peer cluster
       String oldClusterZnode = ZKUtil.joinZNode(deadRSZnodePath, peerId);
       List<String> wals = ZKUtil.listChildrenNoWatch(this.zookeeper, oldClusterZnode);
+
+      if (!peerExists(replicationQueueInfo.getPeerId())) {
+        LOG.warn("Peer " + replicationQueueInfo.getPeerId() +
+                " didn't exist, will move its queue to avoid the failure of multi op");
+        for (String wal : wals) {
+          String oldWalZnode = ZKUtil.joinZNode(oldClusterZnode, wal);
+          listOfOps.add(ZKUtilOp.deleteNodeFailSilent(oldWalZnode));
+        }
+        listOfOps.add(ZKUtilOp.deleteNodeFailSilent(oldClusterZnode));
+        ZKUtil.multiOrSequential(this.zookeeper, listOfOps, false);
+        return null;
+      }
+
       SortedSet<String> logQueue = new TreeSet<>();
       if (wals == null || wals.size() == 0) {
         listOfOps.add(ZKUtilOp.deleteNodeFailSilent(oldClusterZnode));
@@ -297,8 +305,8 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements
R
           LOG.trace(" The multi list size is: " + listOfOps.size());
       }
       ZKUtil.multiOrSequential(this.zookeeper, listOfOps, false);
-      if (LOG.isTraceEnabled())
-        LOG.trace("Atomically moved the dead regionserver logs. ");
+
+      LOG.info("Atomically moved " + znode + "/" + peerId + "'s WALs to my queue");
       return new Pair<>(newPeerId, logQueue);
     } catch (KeeperException e) {
       // Multi call failed; it looks like some other regionserver took away the logs.

http://git-wip-us.apache.org/repos/asf/hbase/blob/dba7ec1b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
index c074048..9d1d165 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/replication/regionserver/TestReplicationSourceManager.java
@@ -543,7 +543,9 @@ public abstract class TestReplicationSourceManager {
         List<String> queues = rq.getUnClaimedQueueIds(deadRsZnode);
         for(String queue:queues){
           Pair<String, SortedSet<String>> pair = rq.claimQueue(deadRsZnode, queue);
-          logZnodesMap.put(pair.getFirst(), pair.getSecond());
+          if (pair != null) {
+            logZnodesMap.put(pair.getFirst(), pair.getSecond());
+          }
         }
         server.abort("Done with testing", null);
       } catch (Exception e) {


Mime
View raw message