lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject svn commit: r1573242 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/cloud/ElectionContext.java core/src/test/org/apache/solr/cloud/OverseerTest.java solrj/src/java/org/apache/solr/common/util/RetryUtil.java
Date Sun, 02 Mar 2014 00:27:25 GMT
Author: markrmiller
Date: Sun Mar  2 00:27:25 2014
New Revision: 1573242

URL: http://svn.apache.org/r1573242
Log:
SOLR-5799: When registering as the leader, if an existing ephemeral registration exists, wait
a short time to see if it goes away.

Added:
    lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java   (with
props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1573242&r1=1573241&r2=1573242&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Mar  2 00:27:25 2014
@@ -129,6 +129,10 @@ Other Changes
 * SOLR-5771: Add SolrTestCaseJ4.SuppressSSL annotation to disable SSL (instead of static
boolean).
   (Robert Muir)
 
+* SOLR-5799: When registering as the leader, if an existing ephemeral
+  registration exists, wait a short time to see if it goes away.
+  (Mark Miller)
+
 ==================  4.7.0 ==================
 
 Versions of Major Components

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1573242&r1=1573241&r2=1573242&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Sun Mar
 2 00:27:25 2014
@@ -14,6 +14,8 @@ import org.apache.solr.common.cloud.ZkCm
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.RetryUtil;
+import org.apache.solr.common.util.RetryUtil.RetryCmd;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.search.SolrIndexSearcher;
@@ -22,6 +24,7 @@ import org.apache.solr.util.RefCounted;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NoNodeException;
+import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -79,24 +82,28 @@ public abstract class ElectionContext {
 }
 
 class ShardLeaderElectionContextBase extends ElectionContext {
-  private static Logger log = LoggerFactory.getLogger(ShardLeaderElectionContextBase.class);
+  private static Logger log = LoggerFactory
+      .getLogger(ShardLeaderElectionContextBase.class);
   protected final SolrZkClient zkClient;
   protected String shardId;
   protected String collection;
   protected LeaderElector leaderElector;
-
-  public ShardLeaderElectionContextBase(LeaderElector leaderElector, final String shardId,
-      final String collection, final String coreNodeName, ZkNodeProps props, ZkStateReader
zkStateReader) {
-    super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leader_elect/"
-        + shardId, ZkStateReader.getShardLeadersPath(collection, shardId),
-        props, zkStateReader.getZkClient());
+  
+  public ShardLeaderElectionContextBase(LeaderElector leaderElector,
+      final String shardId, final String collection, final String coreNodeName,
+      ZkNodeProps props, ZkStateReader zkStateReader) {
+    super(coreNodeName, ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection
+        + "/leader_elect/" + shardId, ZkStateReader.getShardLeadersPath(
+        collection, shardId), props, zkStateReader.getZkClient());
     this.leaderElector = leaderElector;
     this.zkClient = zkStateReader.getZkClient();
     this.shardId = shardId;
     this.collection = collection;
     
     try {
-      new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout()).ensureExists(ZkStateReader.COLLECTIONS_ZKNODE
+ "/" + collection, zkClient);
+      new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout())
+          .ensureExists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection,
+              zkClient);
     } catch (KeeperException e) {
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
     } catch (InterruptedException e) {
@@ -104,24 +111,39 @@ class ShardLeaderElectionContextBase ext
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
     }
   }
-
+  
   @Override
-  void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart) throws KeeperException,
-      InterruptedException, IOException {
+  void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStart)
+      throws KeeperException, InterruptedException, IOException {
+    // register as leader - if an ephemeral is already there, wait just a bit
+    // to see if it goes away
+    RetryUtil.retryOnThrowable(NodeExistsException.class, 15000, 1000,
+        new RetryCmd() {
+          
+          @Override
+          public void execute() throws InterruptedException {
+            try {
+              zkClient.makePath(leaderPath, ZkStateReader.toJSON(leaderProps),
+                  CreateMode.EPHEMERAL, true);
+            } catch (KeeperException e) {
+              throw new SolrException(
+                  ErrorCode.SERVER_ERROR,
+                  "Could not register as the leader because creating the ephemeral registration
node in ZooKeeper failed", e);
+            }
+          }
+        });
     
-    zkClient.makePath(leaderPath, ZkStateReader.toJSON(leaderProps),
-        CreateMode.EPHEMERAL, true);
     assert shardId != null;
-    ZkNodeProps m = ZkNodeProps.fromKeyVals(Overseer.QUEUE_OPERATION, ZkStateReader.LEADER_PROP,
-        ZkStateReader.SHARD_ID_PROP, shardId, ZkStateReader.COLLECTION_PROP,
-        collection, ZkStateReader.BASE_URL_PROP, leaderProps.getProperties()
-            .get(ZkStateReader.BASE_URL_PROP), ZkStateReader.CORE_NAME_PROP,
+    ZkNodeProps m = ZkNodeProps.fromKeyVals(Overseer.QUEUE_OPERATION,
+        ZkStateReader.LEADER_PROP, ZkStateReader.SHARD_ID_PROP, shardId,
+        ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.BASE_URL_PROP,
+        leaderProps.getProperties().get(ZkStateReader.BASE_URL_PROP),
+        ZkStateReader.CORE_NAME_PROP,
         leaderProps.getProperties().get(ZkStateReader.CORE_NAME_PROP),
         ZkStateReader.STATE_PROP, ZkStateReader.ACTIVE);
     Overseer.getInQueue(zkClient).offer(ZkStateReader.toJSON(m));
-    
   }
-
+  
 }
 
 // add core container and stop passing core around...

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java?rev=1573242&r1=1573241&r2=1573242&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java Sun Mar  2
00:27:25 2014
@@ -564,8 +564,7 @@ public class OverseerTest extends SolrTe
       mockController.publishState("core1", "core_node1", ZkStateReader.ACTIVE,
           1);
       
-      while (version == getClusterStateVersion(controllerClient))
-        ;
+      while (version == getClusterStateVersion(controllerClient));
       
       verifyStatus(reader, ZkStateReader.ACTIVE);
       version = getClusterStateVersion(controllerClient);
@@ -593,9 +592,7 @@ public class OverseerTest extends SolrTe
       assertFalse("collection1 should be gone after publishing the null state",
           reader.getClusterState().getCollections().contains("collection1"));
     } finally {
-      
       close(mockController);
-      
       close(overseerClient);
       close(controllerClient);
       close(reader);

Added: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java?rev=1573242&view=auto
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java (added)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/RetryUtil.java Sun Mar
 2 00:27:25 2014
@@ -0,0 +1,43 @@
+package org.apache.solr.common.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.concurrent.TimeUnit;
+
+public class RetryUtil {
+  public static interface RetryCmd {
+    public void execute() throws InterruptedException;
+  }
+  
+  public static void retryOnThrowable(Class clazz, long timeoutms, long intervalms, RetryCmd
cmd) throws InterruptedException {
+    long timeout = System.nanoTime() + TimeUnit.NANOSECONDS.convert(timeoutms, TimeUnit.MILLISECONDS);
+    while (true) {
+      try {
+        cmd.execute();
+      } catch (Throwable t) {
+        if (clazz.isInstance(t) && System.nanoTime() < timeout) {
+          Thread.sleep(intervalms);
+          continue;
+        }
+        throw t;
+      }
+      // success
+      break;
+    }
+  }
+}



Mime
View raw message