geode-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From bschucha...@apache.org
Subject incubator-geode git commit: GEM-335: perf099 is more than 3x slower with new GMS
Date Thu, 21 Jan 2016 19:17:55 GMT
Repository: incubator-geode
Updated Branches:
  refs/heads/develop 1be4f932a -> af25147e6


GEM-335: perf099 is more than 3x slower with new GMS

This lowers the membership event collection interval from 500ms to 300.  GemFire
8.2 used a 150ms interval so we might want to experiment with lowering it to
that in regression tests.

The wait period after sending a Leave message has been removed.  The
DistributionManager ShutdownMessage is already causing a Leave event to be
scheduled for a member when it is shutting down, so the Leave message isn't
needed unless the membership-manager is running outside of Geode/GemFire.

Finally, there was a race condition between the attemptToJoin method and
the installView method.  After sending a Join request it was possible for
a new view to be received and installed before attemptToJoin started its
wait() for a response.  This caused it to wait a full 12 seconds before
waking up and finding that isJoined had been set to true by the other thread.
Adding a check for isJoined before waiting for a response fixed this.


Project: http://git-wip-us.apache.org/repos/asf/incubator-geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-geode/commit/af25147e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-geode/tree/af25147e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-geode/diff/af25147e

Branch: refs/heads/develop
Commit: af25147e6e692b87fc4aa3ee317d577711d152ac
Parents: 1be4f93
Author: Bruce Schuchardt <bschuchardt@pivotal.io>
Authored: Thu Jan 21 11:09:55 2016 -0800
Committer: Bruce Schuchardt <bschuchardt@pivotal.io>
Committed: Thu Jan 21 11:16:37 2016 -0800

----------------------------------------------------------------------
 .../internal/membership/gms/ServiceConfig.java  |   2 +-
 .../membership/gms/membership/GMSJoinLeave.java | 127 ++++++++++---------
 .../gms/mgr/GMSMembershipManager.java           |   3 +
 3 files changed, 68 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/af25147e/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
index a412dfa..9f0bbf7 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/ServiceConfig.java
@@ -27,7 +27,7 @@ import com.gemstone.gemfire.internal.admin.remote.RemoteTransportConfig;
 public class ServiceConfig {
 
   /** stall time to wait for concurrent join/leave/remove requests to be received */
-  public static final long MEMBER_REQUEST_COLLECTION_INTERVAL = Long.getLong("gemfire.member-request-collection-interval",
500);
+  public static final long MEMBER_REQUEST_COLLECTION_INTERVAL = Long.getLong("gemfire.member-request-collection-interval",
300);
 
   /** various settings from Geode configuration */
   private long joinTimeout;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/af25147e/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
index abdceb4..6b09214 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/membership/GMSJoinLeave.java
@@ -97,8 +97,8 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
   /** amount of time to sleep before trying to join after a failed attempt */
   private static final int JOIN_RETRY_SLEEP = Integer.getInteger("gemfire.join-retry-sleep",
1000);
 
-  /** time to wait for a leave request to be transmitted by jgroups */
-  private static final long LEAVE_MESSAGE_SLEEP_TIME = Long.getLong("gemfire.leave-message-sleep-time",
1000);
+  /** time to wait for a broadcast message to be transmitted by jgroups */
+  private static final long BROADCAST_MESSAGE_SLEEP_TIME = Long.getLong("gemfire.broadcast-message-sleep-time",
1000);
 
   /** if the locators don't know who the coordinator is we send find-coord requests to this
many nodes */
   private static final int MAX_DISCOVERY_NODES = Integer.getInteger("gemfire.max-discovery-nodes",
30);
@@ -329,59 +329,67 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
     }
 
     JoinResponseMessage response = null;
-    synchronized (joinResponse) {
-      if (joinResponse[0] == null) {
-        try {
-          // Note that if we give up waiting but a response is on
-          // the way we will get the new view and join that way.
-          // See installView()
-          long timeout = Math.max(services.getConfig().getMemberTimeout(), services.getConfig().getJoinTimeout()
/ 5);
-          joinResponse.wait(timeout);
-        } catch (InterruptedException e) {
-          logger.debug("join attempt was interrupted");
-          Thread.currentThread().interrupt();
-          return false;
-        }
+    try {
+      response = waitForJoinResponse();
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      return false;
+    }
+    
+    if (response == null) {
+      if (!isJoined) {
+        logger.debug("received no join response");
       }
-      response = joinResponse[0];
+      return isJoined;
     }
-    if (response != null) {
-      logger.debug("received join response {}", response);
-      joinResponse[0] = null;
-      String failReason = response.getRejectionMessage();
-      if (failReason != null) {
-        if (failReason.contains("Rejecting the attempt of a member using an older version")

-            || failReason.contains("15806")) {
-          throw new SystemConnectException(failReason);
-        }
-        throw new AuthenticationFailedException(failReason);
-      }
-      if (response.getCurrentView() != null) {
-        if (response.getBecomeCoordinator()) {
-          logger.info("I am being told to become the membership coordinator by {}", coord);
-          synchronized(viewInstallationLock) {
-            this.currentView = response.getCurrentView();
-            becomeCoordinator(null);
-          }
-        } else {
-          this.birthViewId = response.getMemberID().getVmViewId();
-          this.localAddress.setVmViewId(this.birthViewId);
-          GMSMember me = (GMSMember) this.localAddress.getNetMember();
-          me.setBirthViewId(birthViewId);
-          installView(response.getCurrentView());
-        }
 
-        return true;
+    logger.debug("received join response {}", response);
+    joinResponse[0] = null;
+    String failReason = response.getRejectionMessage();
+    if (failReason != null) {
+      if (failReason.contains("Rejecting the attempt of a member using an older version")

+          || failReason.contains("15806")) {
+        throw new SystemConnectException(failReason);
+      }
+      throw new AuthenticationFailedException(failReason);
+    }
+    
+    if (response.getCurrentView() == null) {
+      logger.info("received join response with no membership view: {}", response);
+      return isJoined;
+    }
 
-      } else {
-        logger.info("received join response with no membership view: {}", response);
+    if (response.getBecomeCoordinator()) {
+      logger.info("I am being told to become the membership coordinator by {}", coord);
+      synchronized(viewInstallationLock) {
+        this.currentView = response.getCurrentView();
+        becomeCoordinator(null);
       }
-    } else {
-      if (!isJoined) {
-        logger.debug("received no join response");
+      return true;
+    }
+    
+    this.birthViewId = response.getMemberID().getVmViewId();
+    this.localAddress.setVmViewId(this.birthViewId);
+    GMSMember me = (GMSMember) this.localAddress.getNetMember();
+    me.setBirthViewId(birthViewId);
+    installView(response.getCurrentView());
+
+    return true;
+  }
+
+  private JoinResponseMessage waitForJoinResponse() throws InterruptedException {
+    JoinResponseMessage response;
+    synchronized (joinResponse) {
+      if (joinResponse[0] == null && !isJoined) {
+        // Note that if we give up waiting but a response is on
+        // the way we will get the new view and join that way.
+        // See installView()
+        long timeout = Math.max(services.getConfig().getMemberTimeout(), services.getConfig().getJoinTimeout()
/ 5);
+        joinResponse.wait(timeout);
       }
+      response = joinResponse[0];
     }
-    return isJoined;
+    return response;
   }
 
   /**
@@ -789,7 +797,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         this.preparedView = view;
         ackView(m);
         if (viewContainsMyUnjoinedAddress) {
-          installView(view);
+          installView(view); // this will notifyAll the joinResponse
         }
       }
     } else { // !preparing
@@ -1155,9 +1163,12 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       lastConflictingView = null;
       services.installView(newView);
 
-      isJoined = true;
-      synchronized(joinResponse) {
-        joinResponse.notifyAll();
+      if (!isJoined) {
+        logger.debug("notifying join thread");
+        isJoined = true;
+        synchronized(joinResponse) {
+          joinResponse.notifyAll();
+        }
       }
 
       if (!newView.getCreator().equals(this.localAddress)) {
@@ -1331,7 +1342,6 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
 
   @Override
   public void leave() {
-    boolean waitForProcessing = false;
     synchronized (viewInstallationLock) {
       NetView view = currentView;
       isStopping = true;
@@ -1339,21 +1349,12 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
       if (view != null) {
         if (view.size() > 1) {
           List<InternalDistributedMember> coords = view.getPreferredCoordinators(Collections.<InternalDistributedMember>
emptySet(), localAddress, 5);
-
           logger.debug("JoinLeave sending a leave request to {}", coords);
           LeaveRequestMessage m = new LeaveRequestMessage(coords, this.localAddress, "this
member is shutting down");
           services.getMessenger().send(m);
-          waitForProcessing = true;
         } // view.size
       } // view != null
     }
-    if (waitForProcessing) {
-      try {
-        Thread.sleep(LEAVE_MESSAGE_SLEEP_TIME);
-      } catch (InterruptedException e) {
-        Thread.currentThread().interrupt();
-      }
-    }
   }
 
   @Override
@@ -2028,7 +2029,7 @@ public class GMSJoinLeave implements JoinLeave, MessageHandler {
         if (quorumRequired && isNetworkPartition(newView, true)) {
           sendNetworkPartitionMessage(newView);
           try {
-            Thread.sleep(LEAVE_MESSAGE_SLEEP_TIME);
+            Thread.sleep(BROADCAST_MESSAGE_SLEEP_TIME);
           } catch (InterruptedException e) {
             // signal the run() method to exit
             shutdown = true;

http://git-wip-us.apache.org/repos/asf/incubator-geode/blob/af25147e/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
----------------------------------------------------------------------
diff --git a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
index 7fe55e5..204cf60 100755
--- a/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
+++ b/gemfire-core/src/main/java/com/gemstone/gemfire/distributed/internal/membership/gms/mgr/GMSMembershipManager.java
@@ -2034,6 +2034,9 @@ public class GMSMembershipManager implements MembershipManager, Manager
               Thread.currentThread().interrupt();
               // Keep going, try to close the endpoint.
             }
+            if (!dc.isOpen()) {
+              return;
+            }
             if (logger.isDebugEnabled())
               logger.debug("Membership: closing connections for departed member {}", member);
             // close connections, but don't do membership notification since it's already
been done


Mime
View raw message