geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kl...@apache.org
Subject [26/50] [abbrv] geode git commit: GEODE-2193 a member is kicked out immediately after joining
Date Wed, 14 Dec 2016 17:57:41 GMT
GEODE-2193 a member is kicked out immediately after joining

The problem is happening because we send a shutdown message, initiating
election of a new coordinator, but the old ViewCreator is allowed to
send out a view announcing a new member.  The new coordinator manages
to send out a new view before the old ViewCreator sends out the new
member's view.  Other members ignore the old ViewCreator's view
because its view ID is old.  Then the reject the new member because
it has an old view ID and it isn't in their membership view.

initial view ID is x

new coordinator prepares view x+10
old coordinator prepares view x+1
other members install x+10, reject view x+1
new member joins in view x+1 when it receives view-prepare message
new member is rejected by other members because x+1 < x+10


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/ef86239f
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/ef86239f
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/ef86239f

Branch: refs/heads/feature/GEODE-1930
Commit: ef86239f872c12c0aad38d5ae3044e22fd5e87af
Parents: ac3a822
Author: Bruce Schuchardt <bschuchardt@pivotal.io>
Authored: Fri Dec 9 14:01:46 2016 -0800
Committer: Bruce Schuchardt <bschuchardt@pivotal.io>
Committed: Fri Dec 9 14:01:46 2016 -0800

----------------------------------------------------------------------
 .../membership/gms/membership/GMSJoinLeave.java | 86 ++++++++++++--------
 .../gms/messages/JoinResponseMessage.java       |  7 ++
 .../geode/distributed/LocatorJUnitTest.java     |  3 +
 .../gms/membership/GMSJoinLeaveJUnitTest.java   | 67 +++++++++++++--
 4 files changed, 119 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/geode/blob/ef86239f/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index 6d782b1..4ee3011 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -803,13 +803,6 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     return newView;
   }
 
-  private void sendJoinResponses(NetView newView, List<InternalDistributedMember> newMbrs)
{
-    for (InternalDistributedMember mbr : newMbrs) {
-      JoinResponseMessage response = new JoinResponseMessage(mbr, newView, 0);
-      services.getMessenger().send(response);
-    }
-  }
-
   private void sendRemoveMessages(List<InternalDistributedMember> removals, List<String>
reasons,
       Set<InternalDistributedMember> oldIds) {
     Iterator<String> reason = reasons.iterator();
@@ -826,11 +819,19 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
 
   boolean prepareView(NetView view, List<InternalDistributedMember> newMembers)
       throws InterruptedException {
+    if (services.getCancelCriterion().isCancelInProgress()
+        || services.getManager().shutdownInProgress()) {
+      throw new InterruptedException("shutting down");
+    }
     return sendView(view, true, this.prepareProcessor);
   }
 
   void sendView(NetView view, List<InternalDistributedMember> newMembers)
       throws InterruptedException {
+    if (services.getCancelCriterion().isCancelInProgress()
+        || services.getManager().shutdownInProgress()) {
+      throw new InterruptedException("shutting down");
+    }
     sendView(view, false, this.viewProcessor);
   }
 
@@ -1401,7 +1402,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
           for (Iterator<DistributionMessage> it = viewRequests.iterator(); it.hasNext();)
{
             DistributionMessage m = it.next();
             if (m instanceof JoinRequestMessage) {
-              it.remove();
+              if (currentView.contains(((JoinRequestMessage) m).getMemberID())) {
+                it.remove();
+              }
             } else if (m instanceof LeaveRequestMessage) {
               if (!currentView.contains(((LeaveRequestMessage) m).getMemberID())) {
                 it.remove();
@@ -1500,6 +1503,11 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     if (viewCreator != null && !viewCreator.isShutdown()) {
       logger.debug("Shutting down ViewCreator");
       viewCreator.shutdown();
+      try {
+        viewCreator.join(1000);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
     }
   }
 
@@ -1641,8 +1649,6 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     services.getMessenger().addHandler(ViewAckMessage.class, this);
     services.getMessenger().addHandler(LeaveRequestMessage.class, this);
     services.getMessenger().addHandler(RemoveMemberMessage.class, this);
-    services.getMessenger().addHandler(JoinRequestMessage.class, this);
-    services.getMessenger().addHandler(JoinResponseMessage.class, this);
     services.getMessenger().addHandler(FindCoordinatorRequest.class, this);
     services.getMessenger().addHandler(FindCoordinatorResponse.class, this);
     services.getMessenger().addHandler(NetworkPartitionMessage.class, this);
@@ -2133,40 +2139,48 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     }
 
     synchronized boolean informToPendingJoinRequests() {
-      boolean joinResponseSent = false;
+
       if (!shutdown) {
-        return joinResponseSent;
+        return false;
       }
-      ArrayList<DistributionMessage> requests = new ArrayList<>();
+      NetView v = currentView;
+      if (v.getCoordinator().equals(localAddress)) {
+        return false;
+      }
+
+      ArrayList<JoinRequestMessage> requests = new ArrayList<>();
       synchronized (viewRequests) {
-        if (viewRequests.size() > 0) {
-          requests.addAll(viewRequests);
-        } else {
-          return joinResponseSent;
+        if (viewRequests.isEmpty()) {
+          return false;
+        }
+        for (Iterator<DistributionMessage> iterator = viewRequests.iterator(); iterator
+            .hasNext();) {
+          DistributionMessage msg = iterator.next();
+          switch (msg.getDSFID()) {
+            case JOIN_REQUEST:
+              requests.add((JoinRequestMessage) msg);
+              // TODO [bruce] if the view creator is just spinning up I don't think we should
do
+              // this remove
+              iterator.remove();
+              break;
+            default:
+              break;
+          }
         }
-        viewRequests.clear();
       }
 
-      NetView v = currentView;
-      for (DistributionMessage msg : requests) {
-        switch (msg.getDSFID()) {
-          case JOIN_REQUEST:
-            logger.debug("Informing to pending join requests {} myid {} coord {}", msg,
-                localAddress, v.getCoordinator());
-            if (!v.getCoordinator().equals(localAddress)) {
-              joinResponseSent = true;
-              // lets inform that coordinator has been changed
-              JoinResponseMessage jrm =
-                  new JoinResponseMessage(((JoinRequestMessage) msg).getMemberID(), v,
-                      ((JoinRequestMessage) msg).getRequestId());
-              services.getMessenger().send(jrm);
-            }
-          default:
-            break;
-        }
+      if (requests.isEmpty()) {
+        return false;
+      }
+
+      for (JoinRequestMessage msg : requests) {
+        logger.debug("Sending coordinator to pending join request from {} myid {} coord {}",
+            msg.getSender(), localAddress, v.getCoordinator());
+        JoinResponseMessage jrm = new JoinResponseMessage(msg.getMemberID(), v, msg.getRequestId());
+        services.getMessenger().send(jrm);
       }
 
-      return joinResponseSent;
+      return true;
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/geode/blob/ef86239f/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/messages/JoinResponseMessage.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/messages/JoinResponseMessage.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/messages/JoinResponseMessage.java
index 2a13a5e..4e0bcc8 100755
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/messages/JoinResponseMessage.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/messages/JoinResponseMessage.java
@@ -30,6 +30,13 @@ import org.apache.geode.distributed.internal.membership.NetView;
 import org.apache.geode.internal.InternalDataSerializer;
 import org.apache.geode.internal.Version;
 
+// TODO this class has been made unintelligible with different combinations of response values.
+// It needs to have an enum that indicates what type of response is in the message or it
+// needs to be broken into multiple message classes.
+// 1. a response saying the member has now joined
+// 2. a response indicating that the coordinator is now a different process
+// 3. a response containing the cluster encryption key
+
 public class JoinResponseMessage extends HighPriorityDistributionMessage {
 
   private NetView currentView;

http://git-wip-us.apache.org/repos/asf/geode/blob/ef86239f/geode-core/src/test/java/org/apache/geode/distributed/LocatorJUnitTest.java
----------------------------------------------------------------------
diff --git a/geode-core/src/test/java/org/apache/geode/distributed/LocatorJUnitTest.java b/geode-core/src/test/java/org/apache/geode/distributed/LocatorJUnitTest.java
index 368b037..8be0e7a 100644
--- a/geode-core/src/test/java/org/apache/geode/distributed/LocatorJUnitTest.java
+++ b/geode-core/src/test/java/org/apache/geode/distributed/LocatorJUnitTest.java
@@ -108,6 +108,7 @@ public class LocatorJUnitTest {
     dsprops.setProperty(JMX_MANAGER_START, "true");
     dsprops.setProperty(JMX_MANAGER_HTTP_PORT, "0");
     dsprops.setProperty(ENABLE_CLUSTER_CONFIGURATION, "false");
+    dsprops.setProperty(LOG_FILE, "");
     System.setProperty(DistributionConfig.GEMFIRE_PREFIX + "disableManagement", "false");
// not
                                                                                         
 // needed
     try {
@@ -159,6 +160,8 @@ public class LocatorJUnitTest {
         fail("expected " + threadCount + " threads or fewer but found " + Thread.activeCount()
             + ".  Check log file for a thread dump.");
       }
+    } finally {
+      JGroupsMessenger.THROW_EXCEPTION_ON_START_HOOK = false;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/geode/blob/ef86239f/geode-core/src/test/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
----------------------------------------------------------------------
diff --git a/geode-core/src/test/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
b/geode-core/src/test/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
index 453e894..14fedc6 100644
--- a/geode-core/src/test/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
+++ b/geode-core/src/test/java/org/apache/geode/distributed/internal/membership/gms/membership/GMSJoinLeaveJUnitTest.java
@@ -14,6 +14,20 @@
  */
 package org.apache.geode.distributed.internal.membership.gms.membership;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.isA;
+import static org.mockito.Mockito.atLeast;
+import static org.mockito.Mockito.doReturn;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import com.jayway.awaitility.Awaitility;
 import org.apache.geode.distributed.DistributedMember;
 import org.apache.geode.distributed.internal.DistributionConfig;
 import org.apache.geode.distributed.internal.membership.InternalDistributedMember;
@@ -32,10 +46,15 @@ import org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLe
 import org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.TcpClientWrapper;
 import org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.ViewCreator;
 import org.apache.geode.distributed.internal.membership.gms.membership.GMSJoinLeave.ViewReplyProcessor;
-import org.apache.geode.distributed.internal.membership.gms.messages.*;
+import org.apache.geode.distributed.internal.membership.gms.messages.InstallViewMessage;
+import org.apache.geode.distributed.internal.membership.gms.messages.JoinRequestMessage;
+import org.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage;
+import org.apache.geode.distributed.internal.membership.gms.messages.LeaveRequestMessage;
+import org.apache.geode.distributed.internal.membership.gms.messages.NetworkPartitionMessage;
+import org.apache.geode.distributed.internal.membership.gms.messages.RemoveMemberMessage;
+import org.apache.geode.distributed.internal.membership.gms.messages.ViewAckMessage;
 import org.apache.geode.internal.Version;
 import org.apache.geode.security.AuthenticationFailedException;
-import org.apache.geode.test.junit.categories.FlakyTest;
 import org.apache.geode.test.junit.categories.IntegrationTest;
 import org.apache.geode.test.junit.categories.MembershipTest;
 import org.junit.After;
@@ -50,12 +69,16 @@ import org.mockito.verification.Timeout;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
-import java.util.*;
-
-import static org.junit.Assert.*;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.isA;
-import static org.mockito.Mockito.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+import java.util.Timer;
+import java.util.concurrent.TimeUnit;
 
 @Category({IntegrationTest.class, MembershipTest.class})
 public class GMSJoinLeaveJUnitTest {
@@ -1062,6 +1085,34 @@ public class GMSJoinLeaveJUnitTest {
   }
 
   @Test
+  public void testViewNotSentWhenShuttingDown() throws Exception {
+    try {
+      initMocks(false);
+      System.setProperty(GMSJoinLeave.BYPASS_DISCOVERY_PROPERTY, "true");
+      gmsJoinLeave.join();
+      installView(1, gmsJoinLeaveMemberId, createMemberList(mockMembers[0], mockMembers[1],
+          mockMembers[2], gmsJoinLeaveMemberId, mockMembers[3]));
+
+      assertTrue(gmsJoinLeave.getViewCreator().isAlive());
+
+      when(manager.shutdownInProgress()).thenReturn(Boolean.TRUE);
+      for (int i = 1; i < 4; i++) {
+        RemoveMemberMessage msg =
+            new RemoveMemberMessage(gmsJoinLeaveMemberId, mockMembers[i], "crashed");
+        msg.setSender(gmsJoinLeaveMemberId);
+        gmsJoinLeave.processMessage(msg);
+      }
+
+      Awaitility.await("waiting for view creator to stop").atMost(5000, TimeUnit.MILLISECONDS)
+          .until(() -> !gmsJoinLeave.getViewCreator().isAlive());
+      assertEquals(1, gmsJoinLeave.getView().getViewId());
+
+    } finally {
+      System.getProperties().remove(GMSJoinLeave.BYPASS_DISCOVERY_PROPERTY);
+    }
+  }
+
+  @Test
   public void testPreparedViewFoundDuringBecomeCoordinator() throws Exception {
     initMocks(false);
     prepareAndInstallView(gmsJoinLeaveMemberId,


Mime
View raw message