geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bschucha...@apache.org
Subject [geode] 01/01: GEODE-5843 ReconnectDUnitTest.testReconnectWithRequiredRoleRegained is being ignored
Date Wed, 10 Oct 2018 18:18:02 GMT
This is an automated email from the ASF dual-hosted git repository.

bschuchardt pushed a commit to branch feature/GEODE-5843
in repository https://gitbox.apache.org/repos/asf/geode.git

commit 1d278f2550af32eff491619cffd283b3730ef95e
Author: Bruce Schuchardt <bschuchardt@pivotal.io>
AuthorDate: Wed Oct 10 10:51:27 2018 -0700

    GEODE-5843 ReconnectDUnitTest.testReconnectWithRequiredRoleRegained is being ignored
    
    Two problems have been addressed:
      First, the recursive reconnect logic in InternalDistributedSystem was
      broken.  After recursion created a new cache the higher up reconnect()
      method did not handle things correctly and called things like
      createAndStartCacheServers() on the defunct cache and notifying
      reconnect-listeners that the defunct DistributedSystem had been
      successfully created.  The test for
      reconnecting after role-loss was also messed up and needed to have
      its end conditions changed a bit.
    
      Second, the execution of role-loss actions was being done under a
      synchronization on the distribution advisor.  That was periodically
      causing a deadlock if we receive an update for the distribution advisor.
---
 .../apache/geode/cache30/ReconnectDUnitTest.java   | 315 +++++++++------------
 .../geode/distributed/DistributedSystem.java       |   5 +
 .../internal/InternalDistributedSystem.java        |  30 +-
 .../geode/internal/cache/DistributedRegion.java    |  15 +-
 .../geode/internal/cache/GemFireCacheImpl.java     |   2 +-
 5 files changed, 167 insertions(+), 200 deletions(-)

diff --git a/geode-core/src/distributedTest/java/org/apache/geode/cache30/ReconnectDUnitTest.java
b/geode-core/src/distributedTest/java/org/apache/geode/cache30/ReconnectDUnitTest.java
index f9c6938..6a07b88 100755
--- a/geode-core/src/distributedTest/java/org/apache/geode/cache30/ReconnectDUnitTest.java
+++ b/geode-core/src/distributedTest/java/org/apache/geode/cache30/ReconnectDUnitTest.java
@@ -46,12 +46,11 @@ import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import org.awaitility.Awaitility;
-import org.junit.Ignore;
+import org.junit.Before;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 import org.apache.geode.CancelException;
-import org.apache.geode.SystemFailure;
 import org.apache.geode.cache.AttributesFactory;
 import org.apache.geode.cache.CacheException;
 import org.apache.geode.cache.CacheFactory;
@@ -90,7 +89,6 @@ import org.apache.geode.test.dunit.DistributedTestUtils;
 import org.apache.geode.test.dunit.Host;
 import org.apache.geode.test.dunit.IgnoredException;
 import org.apache.geode.test.dunit.Invoke;
-import org.apache.geode.test.dunit.LogWriterUtils;
 import org.apache.geode.test.dunit.SerializableCallable;
 import org.apache.geode.test.dunit.SerializableRunnable;
 import org.apache.geode.test.dunit.ThreadUtils;
@@ -157,7 +155,6 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
     // Cache cache = getCache();
     closeCache();
     basicGetSystem().disconnect();
-    LogWriterUtils.getLogWriter().fine("Cache Closed ");
   }
 
   @Override
@@ -171,7 +168,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
       dsProperties.put(LOCATORS, "localHost[" + locatorPort + "]");
       dsProperties.put(MCAST_PORT, "0");
       dsProperties.put(MEMBER_TIMEOUT, "1000");
-      dsProperties.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
+      dsProperties.put(LOG_LEVEL, "info");
       dsProperties.put(SECURITY_MANAGER, SimpleSecurityManager.class.getName());
       dsProperties.put("security-username", "clusterManage");
       dsProperties.put("security-password", "clusterManage");
@@ -342,7 +339,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
                   }
                   try {
                     cache = (InternalCache) new CacheFactory(props).create();
-                    LogWriterUtils.getLogWriter().error(
+                    System.err.println(
                         "testReconnectCollidesWithApplication failed - application thread
was able to create a cache");
                   } catch (IllegalStateException cacheExists) {
                     // expected
@@ -376,8 +373,8 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
                 return "waiting for ds to begin reconnecting";
               }
             }, 30000, 1000, true);
-            LogWriterUtils.getLogWriter().info("entering reconnect wait for " + ds);
-            LogWriterUtils.getLogWriter().info("ds.isReconnecting() = " + ds.isReconnecting());
+            System.out.println("entering reconnect wait for " + ds);
+            System.out.println("ds.isReconnecting() = " + ds.isReconnecting());
             boolean failure = true;
             try {
               ds.waitUntilReconnected(60, TimeUnit.SECONDS);
@@ -396,7 +393,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
                       .getServices().getMessenger().isOldMembershipIdentifier(dm));
               return ds.getReconnectedSystem().getDistributedMember();
             } catch (InterruptedException e) {
-              LogWriterUtils.getLogWriter().warning("interrupted while waiting for reconnect");
+              System.err.println("interrupted while waiting for reconnect");
               return null;
             } finally {
               if (failure) {
@@ -426,8 +423,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
         assertFalse(ds.isReconnecting());
         DistributedSystem newDs = ds.getReconnectedSystem();
         if (newDs != null) {
-          LogWriterUtils.getLogWriter()
-              .warning("expected distributed system to be disconnected: " + newDs);
+          System.err.println("expected distributed system to be disconnected: " + newDs);
           newDs.disconnect();
           return false;
         }
@@ -484,7 +480,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
               }
             }, 30000, 1000, true);
             long waitTime = 120;
-            LogWriterUtils.getLogWriter().info("VM" + VM.getCurrentVMNum() + " waiting up
to "
+            System.out.println("VM" + VM.getCurrentVMNum() + " waiting up to "
                 + waitTime + " seconds for reconnect to complete");
             try {
               ds.waitUntilReconnected(waitTime, TimeUnit.SECONDS);
@@ -572,7 +568,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
             Awaitility.await("waiting for locator to restart").atMost(30, TimeUnit.SECONDS)
                 .until(Locator::getLocator, notNullValue());
             if (((InternalLocator) Locator.getLocator()).isStopped()) {
-              LogWriterUtils.getLogWriter().error("found a stopped locator");
+              System.err.println("found a stopped locator");
               return false;
             }
             return true;
@@ -657,7 +653,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
     locatorPort = locPort;
     Properties config = getDistributedSystemProperties();
     config.put(ROLES, "");
-    config.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
+    config.put(LOG_LEVEL, "info");
     // config.put("log-file", "roleLossController.log");
     // creating the DS
     getSystem(config);
@@ -685,7 +681,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
     closeCache();
     basicGetSystem().disconnect();
 
-    LogWriterUtils.getLogWriter().info("disconnected from the system...");
+    System.out.println("disconnected from the system...");
     Host host = Host.getHost(0);
 
     VM vm0 = host.getVM(0);
@@ -694,7 +690,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
     SerializableRunnable roleLoss = new CacheSerializableRunnable("ROLERECONNECTTESTS") {
       public void run2() throws RuntimeException {
-        LogWriterUtils.getLogWriter().info("####### STARTING THE REAL TEST ##########");
+        System.out.println("####### STARTING THE REAL TEST ##########");
 
         locatorPort = locPort;
         dsProperties = null;
@@ -703,23 +699,19 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
         props.put(MAX_WAIT_TIME_RECONNECT, "200");
         final int timeReconnect = 3;
         props.put(MAX_NUM_RECONNECT_TRIES, "3");
-        props.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
+        props.put(LOG_LEVEL, "info");
         // props.put("log-file", "roleLossVM0.log");
 
         getSystem(props);
 
         addReconnectListener();
 
-        basicGetSystem().getLogWriter().info(
-            "<ExpectedException action=add>" + "CacheClosedException" + "</ExpectedException");
+        IgnoredException.addIgnoredException("CacheClosedException");
         try {
           getCache(props);
           throw new RuntimeException("The test should throw a CancelException ");
         } catch (CancelException ignor) { // can be caused by role loss during intialization.
-          LogWriterUtils.getLogWriter().info("Got Expected CancelException ");
-        } finally {
-          basicGetSystem().getLogWriter().info(
-              "<ExpectedException action=remove>" + "CacheClosedException" + "</ExpectedException");
+          System.out.println("Got Expected CancelException ");
         }
 
         WaitCriterion ev = new WaitCriterion() {
@@ -732,7 +724,6 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
           }
         };
         Wait.waitForCriterion(ev, 60 * 1000, 200, true);
-        LogWriterUtils.getLogWriter().fine("roleLoss done Sleeping");
         assertEquals(timeReconnect, reconnectTries);
       }
 
@@ -752,22 +743,29 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
   public static volatile boolean initialRolePlayerStarted = false;
 
   // public static boolean rPut;
-  public static Integer reconnectTries() {
-    return new Integer(reconnectTries);
+  public static int reconnectTries() {
+    return reconnectTries;
   }
 
-  public static Boolean isInitialized() {
-    return new Boolean(initialized);
+  public static boolean isInitialized() {
+    return initialized;
   }
 
-  public static Boolean isInitialRolePlayerStarted() {
-    return new Boolean(initialRolePlayerStarted);
+  public static boolean isInitialRolePlayerStarted() {
+    return initialRolePlayerStarted;
   }
 
+  @Before
+  public void initStatics() {
+    Invoke.invokeInEveryVM(() -> {
+      reconnectTries = 0;
+      initialized = false;
+      initialRolePlayerStarted = false;
+    });
+  }
 
   // See #50944 before enabling the test. This ticket has been closed with wontFix
   // for the 2014 8.0 release.
-  @Ignore
   @Test
   public void testReconnectWithRequiredRoleRegained() throws Throwable {
 
@@ -789,7 +787,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
     locatorPort = locPort;
     Properties config = getDistributedSystemProperties();
     config.put(ROLES, "");
-    config.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
+    config.put(LOG_LEVEL, "info");
     // creating the DS
     getSystem(config);
 
@@ -816,7 +814,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
     }
     closeCache();
     // disconnectFromDS();
-    getSystem().disconnect(); // added
+    getSystem().disconnect();
 
     // ################################################################### //
     //
@@ -839,18 +837,19 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
     CacheSerializableRunnable roleLoss =
         getRoleLossRunnable(vm1, locPort, regionName, myKey, myValue,
-            "starting role loss vm.  When the role is lost it will start" + " trying to reconnect");
+            "starting role loss vm.  When the role is lost it will start" + " trying to reconnect",
+            file.getAbsolutePath());
     final AsyncInvocation roleLossAsync = vm0.invokeAsync(roleLoss);
 
-    LogWriterUtils.getLogWriter().info("waiting for role loss vm to start reconnect attempts");
+    System.out.println("waiting for role loss vm to start reconnect attempts");
 
     WaitCriterion ev = new WaitCriterion() {
       public boolean done() {
         if (!roleLossAsync.isAlive()) {
           return true;
         }
-        Object res = vm0.invoke(() -> ReconnectDUnitTest.reconnectTries());
-        return ((Integer) res).intValue() != 0;
+        int tries = vm0.invoke(() -> ReconnectDUnitTest.reconnectTries());
+        return tries != 0;
       }
 
       public String description() {
@@ -875,12 +874,12 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
        * "and the number of reconnected tried is not set to zero for " + "more than 2 mins");
try{
        * Thread.sleep(15); }catch(Exception ee){ getLogWriter().severe("Exception : "+ee);
} }
        */
-      LogWriterUtils.getLogWriter().info("waiting for vm0 to finish reconnecting");
+      System.out.println("waiting for vm0 to finish reconnecting");
       ThreadUtils.join(roleLossAsync, 120 * 1000);
     }
 
     if (roleLossAsync.getException() != null) {
-      Assert.fail("Exception in Vm0", roleLossAsync.getException());
+      throw roleLossAsync.getException();
     }
 
     ThreadUtils.join(avkVm1, 30 * 1000);
@@ -892,154 +891,111 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
   private CacheSerializableRunnable getRoleLossRunnable(final VM otherVM, final int locPort,
       final String regionName, final String myKey, final Object myValue,
-      final String startupMessage) {
+      final String startupMessage, final String xmlFilePath) {
 
     return new CacheSerializableRunnable("roleloss runnable") {
       public void run2() {
-        Thread t = null;
-        try {
-          // closeCache();
-          // getSystem().disconnect();
-          LogWriterUtils.getLogWriter().info(startupMessage);
-          WaitCriterion ev = new WaitCriterion() {
-            public boolean done() {
-              return otherVM.invoke(() -> ReconnectDUnitTest.isInitialRolePlayerStarted())
-                  .booleanValue();
-            }
-
-            public String description() {
-              return null;
-            }
-          };
-          Wait.waitForCriterion(ev, 10 * 1000, 200, true);
-
-          LogWriterUtils.getLogWriter()
-              .info("Starting the test and creating the cache and regions etc ...");
-          locatorPort = locPort;
-          Properties props = getDistributedSystemProperties();
-          props.put(CACHE_XML_FILE, "RoleRegained.xml");
-          props.put(MAX_WAIT_TIME_RECONNECT, "3000");
-          props.put(MAX_NUM_RECONNECT_TRIES, "8");
-          props.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
-
-          getSystem(props);
-          basicGetSystem().getLogWriter().info(
-              "<ExpectedException action=add>" + "CacheClosedException" + "</ExpectedException");
+        System.out.println(startupMessage);
+        WaitCriterion ev = new WaitCriterion() {
+          public boolean done() {
+            return otherVM.invoke(() -> ReconnectDUnitTest.isInitialRolePlayerStarted())
+                .booleanValue();
+          }
 
-          try {
-            getCache();
-          } catch (CancelException e) {
-            // can happen if RoleA goes away during initialization
-            LogWriterUtils.getLogWriter()
-                .info("cache threw CancelException while creating the cache");
+          public String description() {
+            return null;
           }
+        };
+        Wait.waitForCriterion(ev, 10 * 1000, 200, true);
 
-          initialized = true;
+        System.out.println(
+            "Starting the test and creating the cache and regions etc ..." + System.getenv("PWD"));
+        locatorPort = locPort;
+        Properties props = getDistributedSystemProperties();
+        props.put(CACHE_XML_FILE, xmlFilePath);
+        props.put(MAX_WAIT_TIME_RECONNECT, "3000");
+        props.put(MAX_NUM_RECONNECT_TRIES, "8");
+        props.put(LOG_LEVEL, "info");
 
-          addReconnectListener();
+        getSystem(props);
+        IgnoredException.addIgnoredException("CacheClosedException");
 
-          ev = new WaitCriterion() {
-            public boolean done() {
-              LogWriterUtils.getLogWriter().info("ReconnectTries=" + reconnectTries);
-              return reconnectTries != 0;
-            }
+        try {
+          getCache();
+        } catch (CancelException e) {
+          // can happen if RoleA goes away during initialization
+          System.out.println("cache threw CancelException while creating the cache");
+        }
 
-            public String description() {
-              return null;
-            }
-          };
-          Wait.waitForCriterion(ev, 30 * 1000, 200, true);
+        initialized = true;
 
-          // long startTime = System.currentTimeMillis();
+        addReconnectListener();
 
-          ev = new WaitCriterion() {
-            String excuse;
+        Awaitility.await().atMost(5, TimeUnit.MINUTES).until(() -> reconnectTries != 0);
 
-            public boolean done() {
-              if (InternalDistributedSystem.getReconnectAttemptCounter() != 0) {
-                excuse = "reconnectCount is " + reconnectTries + " waiting for it to be zero";
-                return false;
-              }
-              Object key = null;
-              Object value = null;
-              Region.Entry keyValue = null;
-              try {
-                if (cache == null) {
-                  excuse = "no cache";
-                  return false;
-                }
-                Region myRegion = cache.getRegion(regionName);
-                if (myRegion == null) {
-                  excuse = "no region";
-                  return false;
-                }
-
-                Set keyValuePair = myRegion.entrySet();
-                Iterator it = keyValuePair.iterator();
-                while (it.hasNext()) {
-                  keyValue = (Region.Entry) it.next();
-                  key = keyValue.getKey();
-                  value = keyValue.getValue();
-                }
-                if (key == null) {
-                  excuse = "key is null";
-                  return false;
-                }
-                if (!myKey.equals(key)) {
-                  excuse = "key is wrong";
-                  return false;
-                }
-                if (value == null) {
-                  excuse = "value is null";
-                  return false;
-                }
-                if (!myValue.equals(value)) {
-                  excuse = "value is wrong";
-                  return false;
-                }
-                LogWriterUtils.getLogWriter().info("All assertions passed");
-                LogWriterUtils.getLogWriter().info("MyKey : " + key + " and myvalue : " +
value);
-                return true;
-              } catch (CancelException ecc) {
-                // ignor the exception because the cache can be closed/null some times
-                // while in reconnect.
-              } catch (RegionDestroyedException rex) {
-
-              } finally {
-                LogWriterUtils.getLogWriter()
-                    .info("waiting for reconnect.  Current status is '" + excuse + "'");
-              }
+        Awaitility.await().atMost(5, TimeUnit.MINUTES).until(() -> {
+          String excuse = "none";
+          if (InternalDistributedSystem.getReconnectAttemptCounter() != 0) {
+            System.out.println("reconnectAttemptCounter is "
+                + InternalDistributedSystem.getReconnectAttemptCounter()
+                + " waiting for it to be zero");
+            return false;
+          }
+          Object key = null;
+          Object value = null;
+          Region.Entry keyValue = null;
+          try {
+            if (cache == null) {
+              excuse = "no cache";
               return false;
             }
-
-            public String description() {
-              return excuse;
+            Region myRegion = cache.getRegion(regionName);
+            if (myRegion == null) {
+              excuse = "no region";
+              return false;
             }
-          };
 
-          Wait.waitForCriterion(ev, 60 * 1000, 200, true); // was 5 * 60 * 1000
+            Set keyValuePair = myRegion.entrySet();
+            Iterator it = keyValuePair.iterator();
+            while (it.hasNext()) {
+              keyValue = (Region.Entry) it.next();
+              key = keyValue.getKey();
+              value = keyValue.getValue();
+            }
+            if (key == null) {
+              excuse = "key is null";
+              return false;
+            }
+            if (!myKey.equals(key)) {
+              excuse = "key is wrong";
+              return false;
+            }
+            if (value == null) {
+              excuse = "value is null";
+              return false;
+            }
+            if (!myValue.equals(value)) {
+              excuse = "value is wrong";
+              return false;
+            }
+            System.out.println("All assertions passed");
+            System.out.println("MyKey : " + key + " and myvalue : " + value);
+            return true;
+          } catch (CancelException ecc) {
+            // ignor the exception because the cache can be closed/null some times
+            // while in reconnect.
+          } catch (RegionDestroyedException rex) {
 
-          if (cache != null) {
-            cache.getDistributedSystem().disconnect();
-          }
-        } catch (VirtualMachineError e) {
-          SystemFailure.initiateFailure(e);
-          throw e;
-        } catch (Error th) {
-          LogWriterUtils.getLogWriter().severe("DEBUG", th);
-          throw th;
-        } finally {
-          if (t != null) {
-            ThreadUtils.join(t, 2 * 60 * 1000);
+          } finally {
+            System.out.println("waiting for reconnect.  Current status is '" + excuse + "'");
           }
-          // greplogs won't care if you remove an exception that was never added,
-          // and this ensures that it gets removed.
-          basicGetSystem().getLogWriter().info(
-              "<ExpectedException action=remove>" + "CacheClosedException" + "</ExpectedException");
-        }
+          return false;
+        });
 
+        if (cache != null) {
+          cache.getDistributedSystem().disconnect();
+        }
       }
-
     }; // roleloss runnable
   }
 
@@ -1161,12 +1117,12 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
       final String startupMessage) {
     return new CacheSerializableRunnable("second RoleA player") {
       public void run2() throws CacheException {
-        LogWriterUtils.getLogWriter().info(startupMessage);
+        System.out.println(startupMessage);
         // closeCache();
         // getSystem().disconnect();
         locatorPort = locPort;
         Properties props = getDistributedSystemProperties();
-        props.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
+        props.put(LOG_LEVEL, "info");
         props.put(ROLES, "RoleA");
 
         getSystem(props);
@@ -1177,7 +1133,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
         RegionAttributes attr = fac.create();
         Region region = createRootRegion(regionName, attr);
-        LogWriterUtils.getLogWriter().info("STARTED THE REQUIREDROLES CACHE");
+        System.out.println("STARTED THE REQUIREDROLES CACHE");
         try {
           Thread.sleep(120);
         } catch (Exception ee) {
@@ -1192,7 +1148,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
         } catch (InterruptedException ee) {
           fail("interrupted");
         }
-        LogWriterUtils.getLogWriter().info("RolePlayer is done...");
+        System.out.println("RolePlayer is done...");
 
 
       }
@@ -1208,10 +1164,10 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
       public void run2() throws CacheException {
         // closeCache();
         // getSystem().disconnect();
-        LogWriterUtils.getLogWriter().info(startupMessage);
+        System.out.println(startupMessage);
         locatorPort = locPort;
         Properties props = getDistributedSystemProperties();
-        props.put(LOG_LEVEL, LogWriterUtils.getDUnitLogLevel());
+        props.put(LOG_LEVEL, "info");
         props.put(ROLES, "RoleA");
 
         getSystem(props);
@@ -1222,7 +1178,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
         RegionAttributes attr = fac.create();
         createRootRegion(regionName, attr);
-        LogWriterUtils.getLogWriter().info("STARTED THE REQUIREDROLES CACHE");
+        System.out.println("STARTED THE REQUIREDROLES CACHE");
         initialRolePlayerStarted = true;
 
         while (!otherVM.invoke(() -> ReconnectDUnitTest.isInitialized()).booleanValue())
{
@@ -1232,7 +1188,7 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
             fail("interrupted");
           }
         }
-        LogWriterUtils.getLogWriter().info("RoleAPlayerInitializer is done...");
+        System.out.println("RoleAPlayerInitializer is done...");
         closeCache();
 
       }
@@ -1242,15 +1198,18 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
   void addReconnectListener() {
     reconnectTries = 0; // reset the count for this listener
-    LogWriterUtils.getLogWriter().info("adding reconnect listener");
+    System.out.println("adding reconnect listener");
     ReconnectListener reconlis = new ReconnectListener() {
       public void reconnecting(InternalDistributedSystem oldSys) {
-        LogWriterUtils.getLogWriter().info("reconnect listener invoked");
+        System.out.println("reconnect listener invoked");
         reconnectTries++;
       }
 
       public void onReconnect(InternalDistributedSystem system1,
-          InternalDistributedSystem system2) {}
+          InternalDistributedSystem system2) {
+        System.out.println("reconnect listener onReconnect invoked " + system2);
+        cache = system2.getCache();
+      }
     };
     InternalDistributedSystem.addReconnectListener(reconlis);
   }
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/DistributedSystem.java
b/geode-core/src/main/java/org/apache/geode/distributed/DistributedSystem.java
index bff0b1c..9f821d8 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/DistributedSystem.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/DistributedSystem.java
@@ -29,6 +29,8 @@ import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
+import org.apache.logging.log4j.Logger;
+
 import org.apache.geode.CancelCriterion;
 import org.apache.geode.LogWriter;
 import org.apache.geode.StatisticsFactory;
@@ -41,6 +43,7 @@ import org.apache.geode.distributed.internal.DistributionConfig;
 import org.apache.geode.distributed.internal.InternalDistributedSystem;
 import org.apache.geode.internal.Assert;
 import org.apache.geode.internal.ClassPathLoader;
+import org.apache.geode.internal.logging.LogService;
 import org.apache.geode.internal.tcp.ConnectionTable;
 import org.apache.geode.internal.util.IOUtils;
 
@@ -91,6 +94,8 @@ public abstract class DistributedSystem implements StatisticsFactory {
    */
   protected static final Object existingSystemsLock = new Object();
 
+  private static final Logger logger = LogService.getLogger();
+
   //////////////////////// Static Methods ////////////////////////
 
   /**
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
index d0d4e96..28a6b8d 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
@@ -2673,9 +2673,9 @@ public class InternalDistributedSystem extends DistributedSystem
 
         logger.info(
             "Attempting to reconnect to the distributed system.  This is attempt #{}.",
-            new Object[] {reconnectAttemptCounter});
+            reconnectAttemptCounter);
 
-        int savNumOfTries = reconnectAttemptCounter;
+        int saveNumberOfTries = reconnectAttemptCounter;
         try {
           // notify listeners of each attempt and then again after successful
           notifyReconnectListeners(this, this.reconnectDS, true);
@@ -2743,7 +2743,7 @@ public class InternalDistributedSystem extends DistributedSystem
           if (this.locatorDMTypeForced) {
             System.getProperties().remove(InternalLocator.FORCE_LOCATOR_DM_TYPE);
           }
-          reconnectAttemptCounter = savNumOfTries;
+          reconnectAttemptCounter = saveNumberOfTries;
         }
 
 
@@ -2759,13 +2759,13 @@ public class InternalDistributedSystem extends DistributedSystem
               }
               cache = GemFireCacheImpl.create(this.reconnectDS, config);
 
-              createAndStartCacheServers(cacheServerCreation, cache);
-
-              if (cache.getCachePerfStats().getReliableRegionsMissing() == 0) {
-                reconnectAttemptCounter = 0;
-              } else {
-                // this try failed. The new cache will call reconnect again
+              if (!cache.isClosed()) {
+                createAndStartCacheServers(cacheServerCreation, cache);
+                if (cache.getCachePerfStats().getReliableRegionsMissing() == 0) {
+                  reconnectAttemptCounter = 0;
+                }
               }
+
             } catch (CacheXmlException e) {
               logger.warn("Exception occurred while trying to create the cache during reconnect",
                   e);
@@ -2774,6 +2774,11 @@ public class InternalDistributedSystem extends DistributedSystem
               reconnectCancelled = true;
               break;
             } catch (CancelException ignor) {
+              // If this reconnect is for required-roles the algorithm is recursive and we
+              // shouldn't retry at this level
+              if (!forcedDisconnect) {
+                break;
+              }
               logger.warn("Exception occurred while trying to create the cache during reconnect",
                   ignor);
               reconnectDS.disconnect();
@@ -2795,7 +2800,6 @@ public class InternalDistributedSystem extends DistributedSystem
             return;
           }
         }
-
       } // while()
 
       if (isReconnectCancelled()) {
@@ -2804,7 +2808,9 @@ public class InternalDistributedSystem extends DistributedSystem
         }
       } else {
         reconnectDS.isReconnectingDS = false;
-        notifyReconnectListeners(this, this.reconnectDS, false);
+        if (reconnectDS.isConnected()) {
+          notifyReconnectListeners(this, this.reconnectDS, false);
+        }
       }
 
     } finally {
@@ -2832,7 +2838,7 @@ public class InternalDistributedSystem extends DistributedSystem
       }
       attemptingToReconnect = false;
       return;
-    } else {
+    } else if (reconnectDS != null && reconnectDS.isConnected()) {
       logger.info("Reconnect completed.\nNew DistributedSystem is {}\nNew Cache is {}", reconnectDS,
           cache);
     }
diff --git a/geode-core/src/main/java/org/apache/geode/internal/cache/DistributedRegion.java
b/geode-core/src/main/java/org/apache/geode/internal/cache/DistributedRegion.java
index eb6e97c..fb146c8 100755
--- a/geode-core/src/main/java/org/apache/geode/internal/cache/DistributedRegion.java
+++ b/geode-core/src/main/java/org/apache/geode/internal/cache/DistributedRegion.java
@@ -853,14 +853,7 @@ public class DistributedRegion extends LocalRegion implements InternalDistribute
     try {
       if (getMembershipAttributes().getLossAction().isReconnect()) {
         async = true;
-        if (this.isInitializingThread) {
-          doLostReliability(true, id, newlyMissingRoles);
-        } else {
-          doLostReliability(false, id, newlyMissingRoles);
-        }
-        // we don't do this in the waiting pool because we're going to
-        // disconnect
-        // the distributed system, and it will wait for the pool to empty
+        doLostReliability(isInitializingThread, id, newlyMissingRoles);
       }
     } catch (CancelException cce) {
       throw cce;
@@ -1455,6 +1448,7 @@ public class DistributedRegion extends LocalRegion implements InternalDistribute
           if (!getSystem().isLoner()) {
             waitForRequiredRoles(memberTimeout);
           }
+          boolean initiateLossAction = false;
           synchronized (this.advisorListener) {
             synchronized (this.missingRequiredRoles) {
               if (this.missingRequiredRoles.isEmpty()) {
@@ -1478,10 +1472,13 @@ public class DistributedRegion extends LocalRegion implements InternalDistribute
                       this.missingRequiredRoles);
                 }
                 this.isInitializingThread = true;
-                lostReliability(null, null);
+                initiateLossAction = true;
               }
             }
           }
+          if (initiateLossAction) {
+            lostReliability(null, null);
+          }
         }
       } catch (RegionDestroyedException ignore) {
         // ignore to fix bug 34639 may be thrown by waitForRequiredRoles
diff --git a/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
b/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
index 452b27a..1a9c800 100755
--- a/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
+++ b/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
@@ -1210,7 +1210,7 @@ public class GemFireCacheImpl implements InternalCache, InternalClientCache,
Has
       initializeDeclarativeCache();
       completedCacheXml = true;
     } catch (RuntimeException e) {
-      logger.error("Cache initialization failed because: " + e.toString()); // fix GEODE-3038
+      logger.error("Cache initialization for {} failed because: {}", this, e); // fix GEODE-3038
       throw e;
     } finally {
       if (!completedCacheXml) {


Mime
View raw message