geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bschucha...@apache.org
Subject [1/3] geode git commit: GEODE-3052 Restarting locators causes potential locator split brain
Date Fri, 09 Jun 2017 00:10:43 GMT
Repository: geode
Updated Branches:
  refs/heads/develop 0d127560b -> 88825071f


GEODE-3052 Restarting locators causes potential locator split brain

When restarting from a locatorView.dat file we should ignore any locator
entries in the view.  Recovery tries to get this state from other locators
before resorting to using the persisted view so there we know all of the
locator entries in the view are invalid.  This allows the locators to
quickly move into the concurrent-startup algorithm and find each other.

I removed the Flaky categorization of the test that I modified to
reproduce the problem.  A subclass's use of the test was reported as
a Flaky failure but I found that the ticket was closed.


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/88825071
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/88825071
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/88825071

Branch: refs/heads/develop
Commit: 88825071fad314819358e8feb2f34481dd3c1d64
Parents: c585245
Author: Bruce Schuchardt <bschuchardt@pivotal.io>
Authored: Thu Jun 8 17:01:41 2017 -0700
Committer: Bruce Schuchardt <bschuchardt@pivotal.io>
Committed: Thu Jun 8 17:02:53 2017 -0700

----------------------------------------------------------------------
 .../internal/membership/gms/locator/GMSLocator.java         | 9 +++++++++
 .../java/org/apache/geode/distributed/LocatorDUnitTest.java | 4 +++-
 .../geode/distributed/LocatorUDPSecurityDUnitTest.java      | 6 ------
 3 files changed, 12 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/geode/blob/88825071/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/locator/GMSLocator.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/locator/GMSLocator.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/locator/GMSLocator.java
index e3635f2..93fa9da 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/locator/GMSLocator.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/membership/gms/locator/GMSLocator.java
@@ -35,6 +35,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import org.apache.geode.InternalGemFireException;
 
 import org.apache.geode.distributed.internal.ClusterConfigurationService;
+import org.apache.geode.distributed.internal.DistributionManager;
 import org.apache.logging.log4j.Logger;
 
 import org.apache.geode.DataSerializer;
@@ -405,6 +406,14 @@ public class GMSLocator implements Locator, NetLocator {
 
       Object o = DataSerializer.readObject(ois2);
       this.view = (NetView) o;
+      List<InternalDistributedMember> members = new ArrayList<>(view.getMembers());
+      // GEODE-3052 - remove locators from the view. Since we couldn't recover from an existing
+      // locator we know that all of the locators in the view are defunct
+      for (InternalDistributedMember member : members) {
+        if (member.getVmKind() == DistributionManager.LOCATOR_DM_TYPE) {
+          view.remove(member);
+        }
+      }
 
       logger.info("Peer locator initial membership is " + view);
       return true;

http://git-wip-us.apache.org/repos/asf/geode/blob/88825071/geode-core/src/test/java/org/apache/geode/distributed/LocatorDUnitTest.java
----------------------------------------------------------------------
diff --git a/geode-core/src/test/java/org/apache/geode/distributed/LocatorDUnitTest.java b/geode-core/src/test/java/org/apache/geode/distributed/LocatorDUnitTest.java
index 8ff9b67..c62a545 100644
--- a/geode-core/src/test/java/org/apache/geode/distributed/LocatorDUnitTest.java
+++ b/geode-core/src/test/java/org/apache/geode/distributed/LocatorDUnitTest.java
@@ -278,7 +278,6 @@ public class LocatorDUnitTest extends JUnit4DistributedTestCase {
    * stagger the starting of locators. This test configures two locators to start up simultaneously
    * and shows that they find each other and form a single system.
    */
-  @Category(FlakyTest.class) // GEODE-1931
   @Test
   public void testStartTwoLocators() throws Exception {
     disconnectAllFromDS();
@@ -305,6 +304,9 @@ public class LocatorDUnitTest extends JUnit4DistributedTestCase {
     properties.put(ENABLE_CLUSTER_CONFIGURATION, "false");
     addDSProps(properties);
     startVerifyAndStopLocator(loc1, loc2, port1, port2, properties);
+    // GEODE-3052 - split brain on restart from persistent view data
+    startVerifyAndStopLocator(loc1, loc2, port1, port2, properties);
+    startVerifyAndStopLocator(loc1, loc2, port1, port2, properties);
   }
 
   private Boolean startLocatorWithPortAndProperties(final int port, final Properties properties)

http://git-wip-us.apache.org/repos/asf/geode/blob/88825071/geode-core/src/test/java/org/apache/geode/distributed/LocatorUDPSecurityDUnitTest.java
----------------------------------------------------------------------
diff --git a/geode-core/src/test/java/org/apache/geode/distributed/LocatorUDPSecurityDUnitTest.java
b/geode-core/src/test/java/org/apache/geode/distributed/LocatorUDPSecurityDUnitTest.java
index 9d49d30..df1d8d1 100644
--- a/geode-core/src/test/java/org/apache/geode/distributed/LocatorUDPSecurityDUnitTest.java
+++ b/geode-core/src/test/java/org/apache/geode/distributed/LocatorUDPSecurityDUnitTest.java
@@ -45,12 +45,6 @@ public class LocatorUDPSecurityDUnitTest extends LocatorDUnitTest {
     p.setProperty(SECURITY_UDP_DHALGO, "AES:128");
   }
 
-  @Override
-  @Test
-  public void testStartTwoLocators() throws Exception {
-    super.testStartTwoLocators();
-  }
-
   @Test
   public void testLocatorWithUDPSecurityButServer() throws Exception {
     disconnectAllFromDS();


Mime
View raw message