geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bschucha...@apache.org
Subject geode git commit: GEODE-2155 Auto-reconnect fails with NPE
Date Thu, 05 Jan 2017 16:56:30 GMT
Repository: geode
Updated Branches:
  refs/heads/develop 0182a1bb7 -> 638b05840


GEODE-2155 Auto-reconnect fails with NPE

I created a test to reproduce the problem by introducing a non-Declarable
cache listener and then initiating a forced-disconnect.  cache.xml is
generated and when it's used to rebuild the cache a CacheXmlException
is thrown, reliably reproducing the problem.

The fix is to simply catch the CacheXmlException in
InternalDistributedSystem.reconnect() and terminate reconnection attempt


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/638b0584
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/638b0584
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/638b0584

Branch: refs/heads/develop
Commit: 638b05840d388f7d6476bad7ea0729510b67aca3
Parents: 0182a1b
Author: Bruce Schuchardt <bschuchardt@pivotal.io>
Authored: Thu Jan 5 08:36:50 2017 -0800
Committer: Bruce Schuchardt <bschuchardt@pivotal.io>
Committed: Thu Jan 5 08:38:53 2017 -0800

----------------------------------------------------------------------
 .../internal/InternalDistributedSystem.java     |  7 ++
 .../geode/internal/cache/GemFireCacheImpl.java  |  3 +
 .../geode/cache30/ReconnectDUnitTest.java       | 72 +++++++++++++++++++-
 3 files changed, 79 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/geode/blob/638b0584/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
index 3d51fb9..fad9206 100644
--- a/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
+++ b/geode-core/src/main/java/org/apache/geode/distributed/internal/InternalDistributedSystem.java
@@ -39,6 +39,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.concurrent.atomic.AtomicReference;
 
+import org.apache.geode.cache.CacheXmlException;
 import org.apache.logging.log4j.Logger;
 
 import org.apache.geode.CancelCriterion;
@@ -2741,6 +2742,12 @@ public class InternalDistributedSystem extends DistributedSystem
               } else {
                 // this try failed. The new cache will call reconnect again
               }
+            } catch (CacheXmlException e) {
+              logger.warn("Exception occured while trying to create the cache during reconnect",
e);
+              reconnectDS.disconnect();
+              reconnectDS = null;
+              reconnectCancelled = true;
+              break;
             } catch (CancelException ignor) {
               logger.warn("Exception occured while trying to create the cache during reconnect",
                   ignor);

http://git-wip-us.apache.org/repos/asf/geode/blob/638b0584/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
b/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
index 7f5fa32..943a240 100755
--- a/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
+++ b/geode-core/src/main/java/org/apache/geode/internal/cache/GemFireCacheImpl.java
@@ -2500,6 +2500,9 @@ public class GemFireCacheImpl
   // see Cache.getReconnectedCache()
   public Cache getReconnectedCache() {
     GemFireCacheImpl c = GemFireCacheImpl.getInstance();
+    if (c == null) {
+      return null;
+    }
     if (c == this || !c.isInitialized()) {
       c = null;
     }

http://git-wip-us.apache.org/repos/asf/geode/blob/638b0584/geode-core/src/test/java/org/apache/geode/cache30/ReconnectDUnitTest.java
----------------------------------------------------------------------
diff --git a/geode-core/src/test/java/org/apache/geode/cache30/ReconnectDUnitTest.java b/geode-core/src/test/java/org/apache/geode/cache30/ReconnectDUnitTest.java
index e324576..d0ca831 100755
--- a/geode-core/src/test/java/org/apache/geode/cache30/ReconnectDUnitTest.java
+++ b/geode-core/src/test/java/org/apache/geode/cache30/ReconnectDUnitTest.java
@@ -1096,6 +1096,60 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
     });
   }
 
+  /**
+   * GEODE-2155 Auto-reconnect fails with NPE due to a cache listener not implementing Declarable
+   */
+  @Test
+  public void testReconnectFailsDueToBadCacheXML() throws Exception {
+
+    Host host = Host.getHost(0);
+    VM vm0 = host.getVM(0);
+    VM vm1 = host.getVM(1);
+
+    final int locPort = locatorPort;
+
+    final String xmlFileLoc = (new File(".")).getAbsolutePath();
+
+    SerializableRunnable createCache = new SerializableRunnable("Create Cache and Regions")
{
+      public void run() {
+        locatorPort = locPort;
+        final Properties props = getDistributedSystemProperties();
+        props.put(MAX_WAIT_TIME_RECONNECT, "1000");
+        dsProperties = props;
+        ReconnectDUnitTest.savedSystem = getSystem(props);
+        ReconnectDUnitTest.savedCache = (GemFireCacheImpl) getCache();
+        Region myRegion = createRegion("myRegion", createAtts());
+        myRegion.put("MyKey", "MyValue");
+        myRegion.getAttributesMutator().addCacheListener(new NonDeclarableListener());
+      }
+    };
+
+    vm0.invoke(createCache); // vm0 keeps the locator from losing quorum when vm1 crashes
+
+    createCache.run();
+    IgnoredException.addIgnoredException(
+        "DistributedSystemDisconnectedException|ForcedDisconnectException", vm1);
+    forceDisconnect(null);
+
+    final GemFireCacheImpl cache = ReconnectDUnitTest.savedCache;
+    Wait.waitForCriterion(new WaitCriterion() {
+      public boolean done() {
+        return cache.isReconnecting() || cache.getDistributedSystem().isReconnectCancelled();
+      }
+
+      public String description() {
+        return "waiting for cache to begin reconnecting";
+      }
+    }, 30000, 100, true);
+    try {
+      cache.waitUntilReconnected(20, TimeUnit.SECONDS);
+    } catch (InterruptedException e) {
+      fail("interrupted");
+    }
+    assertTrue(cache.getDistributedSystem().isReconnectCancelled());
+    assertNull(cache.getReconnectedCache());
+  }
+
   private CacheSerializableRunnable getRoleAPlayerRunnable(final int locPort,
       final String regionName, final String myKey, final String myValue,
       final String startupMessage) {
@@ -1200,8 +1254,8 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
 
   }
 
-  public boolean forceDisconnect(VM vm) {
-    return (Boolean) vm.invoke(new SerializableCallable("crash distributed system") {
+  public boolean forceDisconnect(VM vm) throws Exception {
+    SerializableCallable fd = new SerializableCallable("crash distributed system") {
       public Object call() throws Exception {
         // since the system will disconnect and attempt to reconnect
         // a new system the old reference to DTC.system can cause
@@ -1224,7 +1278,12 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
         }
         return true;
       }
-    });
+    };
+    if (vm != null) {
+      return (Boolean) vm.invoke(fd);
+    } else {
+      return (Boolean) fd.call();
+    }
   }
 
   private static int getPID() {
@@ -1239,6 +1298,13 @@ public class ReconnectDUnitTest extends JUnit4CacheTestCase {
   }
 
   /**
+   * A non-Declarable listener will be rejected by the XML parser when rebuilding the cache,
causing
+   * auto-reconnect to fail.
+   */
+  public static class NonDeclarableListener extends CacheListenerAdapter {
+  }
+
+  /**
    * CacheKillingListener crashes the distributed system when it is invoked for the first
time.
    * After that it ignores any notifications.
    */


Mime
View raw message