geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bschucha...@apache.org
Subject [42/50] [abbrv] incubator-geode git commit: GEODE-77: faster network-partition detection, bugfixes for auto-reconnect
Date Fri, 20 Nov 2015 21:02:36 GMT
GEODE-77: faster network-partition detection, bugfixes for auto-reconnect

network-partition-detection was taking too long to initiate.  This adds checks for IOExceptions
to the Transport class to initiate member checks, shrinking the time to detect partitions.

There were still problems with auto-reconnect not being able to join while the old member
ID was still in the view.  It would also sometimes install a view and think it had joined
when it had not, causing other members to reject messages from the new "member" and resulting
in a hung test.  GMSJoinLeave now rejects view messages that don't contain an appropriate
member ID during the join process, and installView is smarter about what views it will accept
as well.

The view creator was being stubborn about exiting during shutdown.  I've added additional
checks to it so that it won't accidentally create another view when GMSJoinLeave is in the
process of stopping.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/21274f9c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/21274f9c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/21274f9c

Branch: refs/heads/develop
Commit: 21274f9c943a7f8cd99833a3b1f7f91ad12cbf48
Parents: 0ee416f
Author: Bruce Schuchardt <bschuchardt@pivotal.io>
Authored: Thu Nov 12 08:52:23 2015 -0800
Committer: Bruce Schuchardt <bschuchardt@pivotal.io>
Committed: Thu Nov 12 08:52:23 2015 -0800

----------------------------------------------------------------------
 .../internal/InternalDistributedSystem.java     |  4 +-
 .../internal/membership/QuorumChecker.java      |  5 ++
 .../membership/gms/fd/GMSHealthMonitor.java     | 14 ++--
 .../membership/gms/membership/GMSJoinLeave.java | 67 ++++++++++++++------
 .../gms/messenger/GMSQuorumChecker.java         |  9 +++
 .../gms/messenger/JGroupsMessenger.java         | 59 ++++++++++++++++-
 .../membership/gms/messenger/Transport.java     | 55 ++++++++++++++--
 .../gemfire/cache30/ReconnectDUnitTest.java     |  4 ++
 .../sanctionedDataSerializables.txt             |  4 +-
 9 files changed, 183 insertions(+), 38 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
index 287205f..956fe8b 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/InternalDistributedSystem.java
@@ -2812,9 +2812,7 @@ public final class InternalDistributedSystem
             }
             if (cache.getCachePerfStats().getReliableRegionsMissing() == 0){
               reconnectAttemptCounter = 0;
-              if (isDebugEnabled) {
-                logger.debug("Reconnected properly");
-              }  
+              logger.info("Reconnected properly");
             }
             else {
               // this try failed. The new cache will call reconnect again

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/QuorumChecker.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/QuorumChecker.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/QuorumChecker.java
index 0cde503..3d8d261 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/QuorumChecker.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/QuorumChecker.java
@@ -48,4 +48,9 @@ public interface QuorumChecker {
    * to the one that is reconnecting.
    */
   public Object getMembershipInfo();
+  
+  /**
+   * Returns the membership view that is being used to establish a quorum
+   */
+  public NetView getView();
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
index 9e410e2..b9ec83a 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/fd/GMSHealthMonitor.java
@@ -478,10 +478,6 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler
{
       return true;
     }
     try {
-      // establish TCP connection
-//      for (Map.Entry<InternalDistributedMember, InetSocketAddress> entry : socketInfo.entrySet())
{
-//        logger.info("socketInfo member:" + entry.getKey() + " port:" + entry.getValue().getPort());
-//      }
       logger.debug("Checking member {} with TCP socket connection {}:{}.", suspectMember,
suspectMember.getInetAddress(), port);
       clientSocket = SocketCreator.getDefaultInstance().connect(suspectMember.getInetAddress(),
port,
           (int)memberTimeout, new ConnectTimeoutTask(services.getTimer(), memberTimeout),
false, -1, false);
@@ -626,7 +622,7 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler
{
       serverSocket = new ServerSocket();
       serverSocket.bind(new InetSocketAddress(socketAddress, socketPort));
     } catch (IOException e) {
-      throw new GemFireConfigException("Unable to allocate a failure detection port in the
membership-port range");
+      throw new GemFireConfigException("Unable to allocate a failure detection port in the
membership-port range", e);
     }
 
     serverSocketExecutor.execute(new Runnable() {
@@ -712,6 +708,9 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler
{
           HeartbeatMessage message = new HeartbeatMessage(-1);
           message.setRecipient(coordinator);
           try {
+            if (isStopping) {
+              return;
+            }
             services.getMessenger().sendUnreliably(message);
           } catch (CancelException e) {
             return;
@@ -732,6 +731,9 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler
{
           if (mbr.equals(coordinator)) {
             continue;
           }
+          if (isStopping) {
+            return;
+          }
           HeartbeatMessage message = new HeartbeatMessage(-1);
           message.setRecipient(mbr);
           try {
@@ -921,13 +923,11 @@ public class GMSHealthMonitor implements HealthMonitor, MessageHandler
{
   @Override
   public void beSick() {
     this.beingSick = true;
-    initiateSuspicion(localAddress, "beSick invoked on GMSHealthMonitor");
   }
 
   @Override
   public void playDead() {
     this.playingDead = true;
-    initiateSuspicion(localAddress, "playDead invoked on GMSHealthMonitor");
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index a6121dd..500131b 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -13,7 +13,6 @@ import static com.gemstone.gemfire.internal.DataSerializableFixedID.VIEW_ACK_MES
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -676,6 +675,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
   }
 
   boolean sendView(NetView view, List<InternalDistributedMember> newMembers, boolean
preparing, ViewReplyProcessor rp) {
+
+    boolean isNetworkPartition = isNetworkPartition(view, false);
+    
     int id = view.getViewId();
     InstallViewMessage msg = new InstallViewMessage(view, services.getAuthenticator().getCredentials(this.localAddress),
preparing);
     Set<InternalDistributedMember> recips = new HashSet<>(view.getMembers());
@@ -726,8 +728,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     if (preparing) {
       // send join responses after other members at least have
       // a prepared view announcing the new member
-      if (!(isNetworkPartition(view) && quorumRequired)) {
-        List<Integer> newPorts = new ArrayList<Integer>(view.size());
+      if (!(isNetworkPartition && quorumRequired)) {
         sendJoinResponses(newMembers, view);
       }
 
@@ -764,21 +765,26 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       return;
     }
 
+    boolean viewContainsMyUnjoinedAddress = false;
+    if (!this.isJoined) {
+      // if we're still waiting for a join response and we're in this view we
+      // should install the view so join() can finish its work
+      for (InternalDistributedMember mbr: view.getMembers()) {
+        if (localAddress.compareTo(mbr) == 0) {
+          viewContainsMyUnjoinedAddress = true;
+          break;
+        }
+      }
+    }
+
     if (m.isPreparing()) {
       if (this.preparedView != null && this.preparedView.getViewId() >= view.getViewId())
{
         services.getMessenger().send(new ViewAckMessage(m.getSender(), this.preparedView));
       } else {
         this.preparedView = view;
         ackView(m);
-        if (!this.isJoined) {
-          // if we're still waiting for a join response and we're in this view we
-          // should install the view so join() can finish its work
-          for (InternalDistributedMember mbr: view.getMembers()) {
-            if (localAddress.compareTo(mbr) == 0) {
-              installView(view);
-              break;
-            }
-          }
+        if (viewContainsMyUnjoinedAddress) {
+          installView(view);
         }
       }
     } else { // !preparing
@@ -788,7 +794,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         if (!m.isRebroadcast()) { // no need to ack a rebroadcast view
           ackView(m);
         }
-        installView(view);
+        if (isJoined || viewContainsMyUnjoinedAddress) {
+          installView(view);
+        }
       }
     }
   }
@@ -1078,8 +1086,10 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       logger.info("received new view: {}\nold view is: {}", newView, currentView);
 
       if (currentView == null && !this.isJoined) {
+        boolean found = false;
         for (InternalDistributedMember mbr : newView.getMembers()) {
           if (this.localAddress.equals(mbr)) {
+            found = true;
             this.birthViewId = mbr.getVmViewId();
             this.localAddress.setVmViewId(this.birthViewId);
             GMSMember me = (GMSMember) this.localAddress.getNetMember();
@@ -1088,15 +1098,20 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
             break;
           }
         }
+        if (!found) {
+          logger.info("rejecting view (not yet joined)");
+          return;
+        }
       }
 
-      if (isJoined && isNetworkPartition(newView)) {
+      if (isJoined && isNetworkPartition(newView, true)) {
         if (quorumRequired) {
           Set<InternalDistributedMember> crashes = newView.getActualCrashedMembers(currentView);
           forceDisconnect(LocalizedStrings.Network_partition_detected.toLocalizedString(crashes.size(),
crashes));
           return;
         }
       }
+      
       previousView = currentView;
       currentView = newView;
       preparedView = null;
@@ -1198,13 +1213,13 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
   /**
    * check to see if the new view shows a drop of 51% or more
    */
-  private boolean isNetworkPartition(NetView newView) {
+  private boolean isNetworkPartition(NetView newView, boolean logWeights) {
     if (currentView == null) {
       return false;
     }
     int oldWeight = currentView.memberWeight();
     int failedWeight = newView.getCrashedMemberWeight(currentView);
-    if (failedWeight > 0) {
+    if (failedWeight > 0 && logWeights) {
       if (logger.isInfoEnabled()
           && newView.getCreator().equals(localAddress)) { // view-creator logs this
         newView.logCrashedMemberWeights(currentView, logger);
@@ -1693,6 +1708,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
               } finally {
                 waiting = false;
               }
+              if (shutdown || Thread.currentThread().isInterrupted()) {
+                return;
+              }
               if (viewRequests.size() == 1) {
                 // start the timer when we have only one request because
                 // concurrent startup / shutdown of multiple members is
@@ -1725,6 +1743,9 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
             logger.info("View Creator is processing {} requests for the next membership view",
requests.size());
             try {
               createAndSendView(requests);
+              if (shutdown) {
+                return;
+              }
             } catch (DistributedSystemDisconnectedException e) {
               shutdown = true;
             }
@@ -1876,10 +1897,14 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         mbr.setVmViewId(newView.getViewId());
         mbr.getNetMember().setSplitBrainEnabled(services.getConfig().isNetworkPartitionDetectionEnabled());
       }
+      
+      if (isShutdown()) {
+        return;
+      }
       // send removal messages before installing the view so we stop
       // getting messages from members that have been kicked out
       sendRemoveMessages(removalReqs, removalReasons, newView);
-
+      
       prepareAndSendView(newView, joinReqs, leaveReqs, newView.getCrashedMembers());
 
       return;
@@ -1896,7 +1921,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
           return;
         }
 
-        if (quorumRequired && isNetworkPartition(newView)) {
+        if (quorumRequired && isNetworkPartition(newView, true)) {
           sendNetworkPartitionMessage(newView);
           try {
             Thread.sleep(LEAVE_MESSAGE_SLEEP_TIME);
@@ -1995,6 +2020,12 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       lastConflictingView = null;
 
       sendView(newView, joinReqs);
+      
+      // after sending a final view we need to stop this thread if
+      // the GMS is shutting down
+      if (isStopping()) {
+        shutdown = true;
+      }
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/GMSQuorumChecker.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/GMSQuorumChecker.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/GMSQuorumChecker.java
index 13153a4..cf3fc6b 100644
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/GMSQuorumChecker.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/GMSQuorumChecker.java
@@ -100,6 +100,11 @@ public class GMSQuorumChecker implements QuorumChecker {
     channel.setReceiver(null);
     channel.setReceiver(new QuorumCheckerReceiver());
   }
+  
+  @Override
+  public NetView getView() {
+    return this.lastView;
+  }
 
   @Override
   public Object getMembershipInfo() {
@@ -239,5 +244,9 @@ public class GMSQuorumChecker implements QuorumChecker {
       }
     }
   }
+  
+  public String toString() {
+    return getClass().getSimpleName() + " on view " + this.lastView;
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
index 65d6f05..8ac8f77 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/JGroupsMessenger.java
@@ -1,6 +1,7 @@
 package com.gemstone.gemfire.distributed.internal.membership.gms.messenger;
 
 import static com.gemstone.gemfire.distributed.internal.membership.gms.GMSUtil.replaceStrings;
+import static com.gemstone.gemfire.internal.DataSerializableFixedID.JOIN_REQUEST;
 import static com.gemstone.gemfire.internal.DataSerializableFixedID.JOIN_RESPONSE;
 import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
 
@@ -63,6 +64,7 @@ import com.gemstone.gemfire.distributed.internal.membership.gms.GMSMember;
 import com.gemstone.gemfire.distributed.internal.membership.gms.Services;
 import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.MessageHandler;
 import com.gemstone.gemfire.distributed.internal.membership.gms.interfaces.Messenger;
+import com.gemstone.gemfire.distributed.internal.membership.gms.messages.JoinRequestMessage;
 import com.gemstone.gemfire.distributed.internal.membership.gms.messages.JoinResponseMessage;
 import com.gemstone.gemfire.internal.ClassPathLoader;
 import com.gemstone.gemfire.internal.HeapDataOutputStream;
@@ -122,8 +124,13 @@ public class JGroupsMessenger implements Messenger {
   
   private volatile long pongsReceived;
   
-  private byte[] serializedNetMember;
-  
+  /**
+   * A set that contains addresses that we have logged JGroups IOExceptions for in the
+   * current membership view and possibly initiated suspect processing.  This
+   * reduces the amount of suspect processing initiated by IOExceptions and the
+   * amount of exceptions logged
+   */
+  private Set<Address> addressesWithioExceptionsProcessed = Collections.synchronizedSet(new
HashSet<Address>());
   
   static {
     // register classes that we've added to jgroups that are put on the wire
@@ -282,6 +289,9 @@ public class JGroupsMessenger implements Messenger {
     if (sr != null) {
       sr.setDMStats(services.getStatistics());
     }
+    
+    Transport transport = (Transport)myChannel.getProtocolStack().getTransport();
+    transport.setMessenger(this);
 
     try {
       myChannel.setReceiver(null);
@@ -360,9 +370,43 @@ public class JGroupsMessenger implements Messenger {
     View jgv = new View(vid, new ArrayList<Address>(mbrs));
     logger.trace("installing JGroups view: {}", jgv);
     this.myChannel.down(new Event(Event.VIEW_CHANGE, jgv));
+
+    addressesWithioExceptionsProcessed.clear();
   }
   
 
+  /**
+   * If JGroups is unable to send a message it may mean that the network
+   * is down.  If so we need to initiate suspect processing on the
+   * recipient.<p>
+   * see Transport._send()
+   */
+  public void handleJGroupsIOException(IOException e, Message msg, Address dest) {
+    if (addressesWithioExceptionsProcessed.contains(dest)) {
+      return;
+    }
+    addressesWithioExceptionsProcessed.add(dest);
+    logger.info("processing JGroups IOException: " + e.getMessage());
+    NetView v = this.view;
+    JGAddress jgMbr = (JGAddress)dest;
+    if (v != null) {
+      List<InternalDistributedMember> members = v.getMembers();
+      InternalDistributedMember recipient = null;
+      for (InternalDistributedMember mbr: members) {
+        GMSMember gmsMbr = ((GMSMember)mbr.getNetMember());
+        if (jgMbr.getUUIDLsbs() == gmsMbr.getUuidLSBs()
+            && jgMbr.getUUIDMsbs() == gmsMbr.getUuidMSBs()
+            && jgMbr.getVmViewId() == gmsMbr.getVmViewId()) {
+          recipient = mbr;
+          break;
+        }
+      }
+      if (recipient != null) {
+        services.getHealthMonitor().checkIfAvailable(recipient,
+            "Unable to send messages to this member via JGroups", true);
+      }
+    }
+  }
   
   private void establishLocalAddress() {
     UUID logicalAddress = (UUID)myChannel.getAddress();
@@ -753,10 +797,19 @@ public class JGroupsMessenger implements Messenger {
       }
       
       GMSMember m = DataSerializer.readObject(dis);
-      sender = getMemberFromView(m, ordinal);
 
       result = DataSerializer.readObject(dis);
       if (result instanceof DistributionMessage) {
+        DistributionMessage dm = (DistributionMessage)result;
+        // JoinRequestMessages are sent with an ID that may have been
+        // reused from a previous life by way of auto-reconnect,
+        // so we don't want to find a canonical reference for the
+        // request's sender ID
+        if (dm.getDSFID() == JOIN_REQUEST) {
+          sender = ((JoinRequestMessage)dm).getMemberID();
+        } else {
+          sender = getMemberFromView(m, ordinal);
+        }
         ((DistributionMessage)result).setSender(sender);
       }
       

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/Transport.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/Transport.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/Transport.java
index 9f91a74..8fcac7d 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/Transport.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/messenger/Transport.java
@@ -1,17 +1,29 @@
 package com.gemstone.gemfire.distributed.internal.membership.gms.messenger;
 
+import java.io.IOException;
+import java.io.InterruptedIOException;
 import java.lang.reflect.InvocationTargetException;
+import java.net.SocketException;
 
+import org.jgroups.Address;
+import org.jgroups.Message;
 import org.jgroups.protocols.UDP;
 import org.jgroups.util.DefaultThreadFactory;
 import org.jgroups.util.LazyThreadFactory;
+import org.jgroups.util.Util;
 
 public class Transport extends UDP {
 
   /**
    * This is the initial part of the name of all JGroups threads that deliver messages
    */
-  public static final String PRECIOUS_THREAD_NAME_PREFIX = "Geode UDP";
+  public static final String THREAD_POOL_NAME_PREFIX = "Geode UDP";
+  
+  private JGroupsMessenger messenger;
+  
+  public void setMessenger(JGroupsMessenger m) {
+    messenger = m;
+  }
   
   /*
    * (non-Javadoc)
@@ -43,6 +55,39 @@ public class Transport extends UDP {
     }
   }
 
+  /*
+   * (non-Javadoc)
+   * copied from JGroups to perform Geode-specific error handling when there
+   * is a network partition
+   * @see org.jgroups.protocols.TP#_send(org.jgroups.Message, org.jgroups.Address)
+   */
+  @Override
+  protected void _send(Message msg, Address dest) {
+    try {
+        send(msg, dest);
+    }
+    catch(InterruptedIOException iex) {
+    }
+    catch(InterruptedException interruptedEx) {
+        Thread.currentThread().interrupt(); // let someone else handle the interrupt
+    }
+    catch(SocketException e) {
+      log.error("Exception caught while sending message", e);
+//        log.trace(Util.getMessage("SendFailure"),
+//                  local_addr, (dest == null? "cluster" : dest), msg.size(), e.toString(),
msg.printHeaders());
+    }
+    catch (IOException e) {
+      if (messenger != null
+          /*&& e.getMessage().contains("Operation not permitted")*/) { // this is
the english Oracle JDK exception condition we really want to catch
+        messenger.handleJGroupsIOException(e, msg, dest);
+      }
+    }
+    catch(Throwable e) {
+        log.error("Exception caught while sending message", e);
+//        Util.getMessage("SendFailure"),
+//                  local_addr, (dest == null? "cluster" : dest), msg.size(), e.toString(),
msg.printHeaders());
+    }
+}
     
   /*
    * (non-Javadoc)
@@ -54,10 +99,10 @@ public class Transport extends UDP {
   @Override
   public void init() throws Exception {
     global_thread_factory=new DefaultThreadFactory("Geode ", true);
-    timer_thread_factory=new LazyThreadFactory(PRECIOUS_THREAD_NAME_PREFIX + " Timer", true,
true);
-    default_thread_factory=new DefaultThreadFactory(PRECIOUS_THREAD_NAME_PREFIX + " Incoming",
true, true);
-    oob_thread_factory=new DefaultThreadFactory(PRECIOUS_THREAD_NAME_PREFIX + " OOB", true,
true);
-    internal_thread_factory=new DefaultThreadFactory(PRECIOUS_THREAD_NAME_PREFIX + " INT",
true, true);
+    timer_thread_factory=new LazyThreadFactory(THREAD_POOL_NAME_PREFIX + " Timer", true,
true);
+    default_thread_factory=new DefaultThreadFactory(THREAD_POOL_NAME_PREFIX + " Incoming",
true, true);
+    oob_thread_factory=new DefaultThreadFactory(THREAD_POOL_NAME_PREFIX + " OOB", true, true);
+    internal_thread_factory=new DefaultThreadFactory(THREAD_POOL_NAME_PREFIX + " INT", true,
true);
     super.init();
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/test/java/com/gemstone/gemfire/cache30/ReconnectDUnitTest.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/java/com/gemstone/gemfire/cache30/ReconnectDUnitTest.java
b/gemfire-core/src/test/java/com/gemstone/gemfire/cache30/ReconnectDUnitTest.java
index 46cbbc3..5b53159 100644
--- a/gemfire-core/src/test/java/com/gemstone/gemfire/cache30/ReconnectDUnitTest.java
+++ b/gemfire-core/src/test/java/com/gemstone/gemfire/cache30/ReconnectDUnitTest.java
@@ -48,6 +48,7 @@ import com.gemstone.gemfire.distributed.internal.DistributionConfig;
 import com.gemstone.gemfire.distributed.internal.InternalDistributedSystem;
 import com.gemstone.gemfire.distributed.internal.InternalDistributedSystem.ReconnectListener;
 import com.gemstone.gemfire.distributed.internal.InternalLocator;
+import com.gemstone.gemfire.distributed.internal.membership.InternalDistributedMember;
 import com.gemstone.gemfire.distributed.internal.membership.gms.MembershipManagerHelper;
 import com.gemstone.gemfire.distributed.internal.membership.gms.mgr.GMSMembershipManager;
 import com.gemstone.gemfire.internal.AvailablePort;
@@ -432,6 +433,9 @@ public class ReconnectDUnitTest extends CacheTestCase
           fail("interrupted while waiting for reconnect");
         }
         assertTrue("expected system to be reconnected", ds.getReconnectedSystem() != null);
+        int viewId = MembershipManagerHelper.getMembershipManager(ds.getReconnectedSystem()).getView().getViewId();
+        int memberViewId = ((InternalDistributedMember)ds.getReconnectedSystem().getDistributedMember()).getVmViewId();
+        assertEquals("expected a new ID to be assigned", viewId, memberViewId);
         return ds.getReconnectedSystem().getDistributedMember();
       }
     });

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/21274f9c/gemfire-core/src/test/resources/com/gemstone/gemfire/codeAnalysis/sanctionedDataSerializables.txt
----------------------------------------------------------------------
diff --git a/gemfire-core/src/test/resources/com/gemstone/gemfire/codeAnalysis/sanctionedDataSerializables.txt
b/gemfire-core/src/test/resources/com/gemstone/gemfire/codeAnalysis/sanctionedDataSerializables.txt
index eb798d8..137850f 100644
--- a/gemfire-core/src/test/resources/com/gemstone/gemfire/codeAnalysis/sanctionedDataSerializables.txt
+++ b/gemfire-core/src/test/resources/com/gemstone/gemfire/codeAnalysis/sanctionedDataSerializables.txt
@@ -1966,8 +1966,8 @@ fromData,1,b1
 toData,1,b1
 
 com/gemstone/gemfire/internal/cache/tier/sockets/ClientProxyMembershipID,2
-fromData,49,2a2bb80054b500502ab40050b2004eb6004f9b000c2a2b03b80055b500502a2bb80056b500082a2bb900570100b5000ab1
-toData,57,2bb8004d4d2cb2004eb6004f9b001a2ab40050c7000a2ab20018b500502ab400502b04b600512ab400082bb800522b2ab4000ab900530200b1
+fromData,19,2a2bb8004fb500082a2bb900500100b5000ab1
+toData,19,2ab400082bb8004d2b2ab4000ab9004e0200b1
 
 com/gemstone/gemfire/internal/cache/tier/sockets/ClientTombstoneMessage,2
 fromData,63,2ab800222bb90023010032b500072a2bb900230100b80024b5001c2a2bb80025b6000a2a2bb80026b500082a2bb80027b500202a2bb80026c00028b50021b1


Mime
View raw message