lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From no...@apache.org
Subject svn commit: r1668992 [1/2] - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/logging/ core/src/java/org/apache/solr/servlet/ core/src/java/org/apache/solr/util/ server/resources/ solrj/src/java/org/apach...
Date Tue, 24 Mar 2015 21:01:45 GMT
Author: noble
Date: Tue Mar 24 21:01:44 2015
New Revision: 1668992

URL: http://svn.apache.org/r1668992
Log:
SOLR-6673: MDC based logging of collection, shard, replica, core

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/logging/MDCUtils.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/util/SolrLogLayout.java
    lucene/dev/trunk/solr/server/resources/log4j.properties
    lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/SolrException.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1668992&r1=1668991&r2=1668992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Mar 24 21:01:44 2015
@@ -384,6 +384,9 @@ Other Changes
 
 * SOLR-6414: Update to Hadoop 2.6.0. (Mark Miller)
 
+* SOLR-6673: MDC based logging of collection, shard, replica, core
+  (Ishan Chattopadhyaya , Noble Paul)
+
 ==================  5.0.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java?rev=1668992&r1=1668991&r2=1668992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java Tue Mar 24 21:01:44 2015
@@ -15,6 +15,7 @@ import org.apache.solr.common.util.Retry
 import org.apache.solr.common.util.RetryUtil.RetryCmd;
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.logging.MDCUtils;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.update.UpdateLog;
 import org.apache.solr.util.RefCounted;
@@ -24,10 +25,12 @@ import org.apache.zookeeper.KeeperExcept
 import org.apache.zookeeper.KeeperException.NodeExistsException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
 
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.TimeUnit;
 
@@ -113,7 +116,9 @@ class ShardLeaderElectionContextBase ext
     this.zkClient = zkStateReader.getZkClient();
     this.shardId = shardId;
     this.collection = collection;
-    
+
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setMDC(collection, shardId, null, null);
     try {
       new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout())
           .ensureExists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection,
@@ -123,6 +128,8 @@ class ShardLeaderElectionContextBase ext
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
   }
   

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java?rev=1668992&r1=1668991&r2=1668992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java Tue Mar 24 21:01:44 2015
@@ -61,6 +61,7 @@ import org.apache.solr.handler.component
 import org.apache.solr.handler.component.ShardHandlerFactory;
 import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.handler.component.ShardResponse;
+import org.apache.solr.logging.MDCUtils;
 import org.apache.solr.update.SolrIndexSplitter;
 import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.apache.solr.util.stats.Snapshot;
@@ -71,6 +72,7 @@ import org.apache.zookeeper.KeeperExcept
 import org.apache.zookeeper.data.Stat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
 
 import java.io.Closeable;
 import java.io.IOException;
@@ -1059,57 +1061,64 @@ public class OverseerCollectionProcessor
     String collectionName = message.getStr(COLLECTION_PROP);
     String shard = message.getStr(SHARD_ID_PROP);
     String replicaName = message.getStr(REPLICA_PROP);
-    DocCollection coll = clusterState.getCollection(collectionName);
-    Slice slice = coll.getSlice(shard);
-    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
-    if(slice==null){
-      throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid shard name : "+shard+" in collection : "+ collectionName);
-    }
-    Replica replica = slice.getReplica(replicaName);
-    if(replica == null){
-      ArrayList<String> l = new ArrayList<>();
-      for (Replica r : slice.getReplicas()) l.add(r.getName());
-      throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid replica : " + replicaName + " in shard/collection : "
-          + shard + "/"+ collectionName + " available replicas are "+ StrUtils.join(l,','));
-    }
-
-    // If users are being safe and only want to remove a shard if it is down, they can specify onlyIfDown=true
-    // on the command.
-    if (Boolean.parseBoolean(message.getStr(ONLY_IF_DOWN)) &&
-        ZkStateReader.DOWN.equals(replica.getStr(ZkStateReader.STATE_PROP)) == false) {
-      throw new SolrException(ErrorCode.BAD_REQUEST, "Attempted to remove replica : " + collectionName + "/" +
-          shard+"/" + replicaName +
-          " with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'");
-    }
-
-    String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
-    String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
-    
-    // assume the core exists and try to unload it
-    Map m = ZkNodeProps.makeMap("qt", adminPath, CoreAdminParams.ACTION,
-        CoreAdminAction.UNLOAD.toString(), CoreAdminParams.CORE, core,
-        CoreAdminParams.DELETE_INSTANCE_DIR, "true",
-        CoreAdminParams.DELETE_DATA_DIR, "true");
-    
-    ShardRequest sreq = new ShardRequest();
-    sreq.purpose = 1;
-    sreq.shards = new String[] {baseUrl};
-    sreq.actualShards = sreq.shards;
-    sreq.params = new ModifiableSolrParams(new MapSolrParams(m));
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setMDC(collectionName, shard, replicaName, null);
     try {
-      shardHandler.submit(sreq, baseUrl, sreq.params);
-    } catch (Exception e) {
-      log.warn("Exception trying to unload core " + sreq, e);
-    }
-    
-    collectShardResponses(!Slice.ACTIVE.equals(replica.getStr(Slice.STATE)) ? new NamedList() : results,
-        false, null, shardHandler);
-    
-    if (waitForCoreNodeGone(collectionName, shard, replicaName, 5000)) return;//check if the core unload removed the corenode zk enry
-    deleteCoreNode(collectionName, replicaName, replica, core); // try and ensure core info is removed from clusterstate
-    if(waitForCoreNodeGone(collectionName, shard, replicaName, 30000)) return;
+      DocCollection coll = clusterState.getCollection(collectionName);
+      Slice slice = coll.getSlice(shard);
+      ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
+      if (slice == null) {
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid shard name : " + shard + " in collection : " + collectionName);
+      }
+      Replica replica = slice.getReplica(replicaName);
+      if (replica == null) {
+        ArrayList<String> l = new ArrayList<>();
+        for (Replica r : slice.getReplicas()) l.add(r.getName());
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid replica : " + replicaName + " in shard/collection : "
+            + shard + "/" + collectionName + " available replicas are " + StrUtils.join(l, ','));
+      }
+
+      // If users are being safe and only want to remove a shard if it is down, they can specify onlyIfDown=true
+      // on the command.
+      if (Boolean.parseBoolean(message.getStr(ONLY_IF_DOWN)) &&
+          ZkStateReader.DOWN.equals(replica.getStr(ZkStateReader.STATE_PROP)) == false) {
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Attempted to remove replica : " + collectionName + "/" +
+            shard + "/" + replicaName +
+            " with onlyIfDown='true', but state is '" + replica.getStr(ZkStateReader.STATE_PROP) + "'");
+      }
+
+      String baseUrl = replica.getStr(ZkStateReader.BASE_URL_PROP);
+      String core = replica.getStr(ZkStateReader.CORE_NAME_PROP);
+
+      // assume the core exists and try to unload it
+      Map m = ZkNodeProps.makeMap("qt", adminPath, CoreAdminParams.ACTION,
+          CoreAdminAction.UNLOAD.toString(), CoreAdminParams.CORE, core,
+          CoreAdminParams.DELETE_INSTANCE_DIR, "true",
+          CoreAdminParams.DELETE_DATA_DIR, "true");
+
+      ShardRequest sreq = new ShardRequest();
+      sreq.purpose = 1;
+      sreq.shards = new String[]{baseUrl};
+      sreq.actualShards = sreq.shards;
+      sreq.params = new ModifiableSolrParams(new MapSolrParams(m));
+      try {
+        shardHandler.submit(sreq, baseUrl, sreq.params);
+      } catch (Exception e) {
+        log.warn("Exception trying to unload core " + sreq, e);
+      }
+
+      collectShardResponses(!Slice.ACTIVE.equals(replica.getStr(Slice.STATE)) ? new NamedList() : results,
+          false, null, shardHandler);
+
+      if (waitForCoreNodeGone(collectionName, shard, replicaName, 5000))
+        return;//check if the core unload removed the corenode zk enry
+      deleteCoreNode(collectionName, replicaName, replica, core); // try and ensure core info is removed from clusterstate
+      if (waitForCoreNodeGone(collectionName, shard, replicaName, 30000)) return;
 
-    throw new SolrException(ErrorCode.SERVER_ERROR, "Could not  remove replica : " + collectionName + "/" + shard+"/" + replicaName);
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Could not  remove replica : " + collectionName + "/" + shard + "/" + replicaName);
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
+    }
   }
 
   private boolean waitForCoreNodeGone(String collectionName, String shard, String replicaName, int timeoutms) throws InterruptedException {
@@ -1205,33 +1214,40 @@ public class OverseerCollectionProcessor
   private void createAlias(Aliases aliases, ZkNodeProps message) {
     String aliasName = message.getStr("name");
     String collections = message.getStr("collections");
-    
-    Map<String,Map<String,String>> newAliasesMap = new HashMap<>();
-    Map<String,String> newCollectionAliasesMap = new HashMap<>();
-    Map<String,String> prevColAliases = aliases.getCollectionAliasMap();
-    if (prevColAliases != null) {
-      newCollectionAliasesMap.putAll(prevColAliases);
-    }
-    newCollectionAliasesMap.put(aliasName, collections);
-    newAliasesMap.put("collection", newCollectionAliasesMap);
-    Aliases newAliases = new Aliases(newAliasesMap);
-    byte[] jsonBytes = null;
-    if (newAliases.collectionAliasSize() > 0) { // only sub map right now
-      jsonBytes  = ZkStateReader.toJSON(newAliases.getAliasMap());
-    }
+
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setCollection(aliasName);
+
     try {
-      zkStateReader.getZkClient().setData(ZkStateReader.ALIASES,
-          jsonBytes, true);
-      
-      checkForAlias(aliasName, collections);
-      // some fudge for other nodes
-      Thread.sleep(100);
-    } catch (KeeperException e) {
-      log.error("", e);
-      throw new SolrException(ErrorCode.SERVER_ERROR, e);
-    } catch (InterruptedException e) {
-      log.warn("", e);
-      throw new SolrException(ErrorCode.SERVER_ERROR, e);
+      Map<String, Map<String, String>> newAliasesMap = new HashMap<>();
+      Map<String, String> newCollectionAliasesMap = new HashMap<>();
+      Map<String, String> prevColAliases = aliases.getCollectionAliasMap();
+      if (prevColAliases != null) {
+        newCollectionAliasesMap.putAll(prevColAliases);
+      }
+      newCollectionAliasesMap.put(aliasName, collections);
+      newAliasesMap.put("collection", newCollectionAliasesMap);
+      Aliases newAliases = new Aliases(newAliasesMap);
+      byte[] jsonBytes = null;
+      if (newAliases.collectionAliasSize() > 0) { // only sub map right now
+        jsonBytes = ZkStateReader.toJSON(newAliases.getAliasMap());
+      }
+      try {
+        zkStateReader.getZkClient().setData(ZkStateReader.ALIASES,
+            jsonBytes, true);
+
+        checkForAlias(aliasName, collections);
+        // some fudge for other nodes
+        Thread.sleep(100);
+      } catch (KeeperException e) {
+        log.error("", e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
+      } catch (InterruptedException e) {
+        log.warn("", e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, e);
+      }
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
 
   }
@@ -1276,6 +1292,8 @@ public class OverseerCollectionProcessor
 
   private void deleteAlias(Aliases aliases, ZkNodeProps message) {
     String aliasName = message.getStr("name");
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setCollection(aliasName);
 
     Map<String,Map<String,String>> newAliasesMap = new HashMap<>();
     Map<String,String> newCollectionAliasesMap = new HashMap<>();
@@ -1299,471 +1317,486 @@ public class OverseerCollectionProcessor
     } catch (InterruptedException e) {
       log.warn("", e);
       throw new SolrException(ErrorCode.SERVER_ERROR, e);
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
     
   }
 
   private boolean createShard(ClusterState clusterState, ZkNodeProps message, NamedList results)
       throws KeeperException, InterruptedException {
-    log.info("Create shard invoked: {}", message);
+    Map previousMDCContext = MDC.getCopyOfContextMap();
     String collectionName = message.getStr(COLLECTION_PROP);
     String sliceName = message.getStr(SHARD_ID_PROP);
-    if (collectionName == null || sliceName == null)
-      throw new SolrException(ErrorCode.BAD_REQUEST, "'collection' and 'shard' are required parameters");
-    int numSlices = 1;
 
-    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
-    DocCollection collection = clusterState.getCollection(collectionName);
-    int maxShardsPerNode = collection.getInt(ZkStateReader.MAX_SHARDS_PER_NODE, 1);
-    int repFactor = message.getInt(ZkStateReader.REPLICATION_FACTOR, collection.getInt(ZkStateReader.REPLICATION_FACTOR, 1));
-    String createNodeSetStr = message.getStr(CREATE_NODE_SET);
-
-    ArrayList<Node> sortedNodeList = getNodesForNewShard(clusterState, collectionName, numSlices, maxShardsPerNode, repFactor, createNodeSetStr);
-
-    Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(message));
-    // wait for a while until we see the shard
-    long waitUntil = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
-    boolean created = false;
-    while (System.nanoTime() < waitUntil) {
-      Thread.sleep(100);
-      created = zkStateReader.getClusterState().getCollection(collectionName).getSlice(sliceName) != null;
-      if (created) break;
-    }
-    if (!created)
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr("name"));
+    MDCUtils.setMDC(collectionName, sliceName, null, null);
+    try {
+      log.info("Create shard invoked: {}", message);
+      if (collectionName == null || sliceName == null)
+        throw new SolrException(ErrorCode.BAD_REQUEST, "'collection' and 'shard' are required parameters");
+      int numSlices = 1;
 
+      ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
+      DocCollection collection = clusterState.getCollection(collectionName);
+      int maxShardsPerNode = collection.getInt(ZkStateReader.MAX_SHARDS_PER_NODE, 1);
+      int repFactor = message.getInt(ZkStateReader.REPLICATION_FACTOR, collection.getInt(ZkStateReader.REPLICATION_FACTOR, 1));
+      String createNodeSetStr = message.getStr(CREATE_NODE_SET);
 
-    String configName = message.getStr(COLL_CONF);
-    for (int j = 1; j <= repFactor; j++) {
-      String nodeName = sortedNodeList.get(((j - 1)) % sortedNodeList.size()).nodeName;
-      String shardName = collectionName + "_" + sliceName + "_replica" + j;
-      log.info("Creating shard " + shardName + " as part of slice "
-          + sliceName + " of collection " + collectionName + " on "
-          + nodeName);
+      ArrayList<Node> sortedNodeList = getNodesForNewShard(clusterState, collectionName, numSlices, maxShardsPerNode, repFactor, createNodeSetStr);
 
-      // Need to create new params for each request
-      ModifiableSolrParams params = new ModifiableSolrParams();
-      params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
+      Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(message));
+      // wait for a while until we see the shard
+      long waitUntil = System.nanoTime() + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
+      boolean created = false;
+      while (System.nanoTime() < waitUntil) {
+        Thread.sleep(100);
+        created = zkStateReader.getClusterState().getCollection(collectionName).getSlice(sliceName) != null;
+        if (created) break;
+      }
+      if (!created)
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr("name"));
 
-      params.set(CoreAdminParams.NAME, shardName);
-      params.set(COLL_CONF, configName);
-      params.set(CoreAdminParams.COLLECTION, collectionName);
-      params.set(CoreAdminParams.SHARD, sliceName);
-      params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices);
-      addPropertyParams(message, params);
 
-      ShardRequest sreq = new ShardRequest();
-      params.set("qt", adminPath);
-      sreq.purpose = 1;
-      String replica = zkStateReader.getBaseUrlForNodeName(nodeName);
-      sreq.shards = new String[]{replica};
-      sreq.actualShards = sreq.shards;
-      sreq.params = params;
+      String configName = message.getStr(COLL_CONF);
+      for (int j = 1; j <= repFactor; j++) {
+        String nodeName = sortedNodeList.get(((j - 1)) % sortedNodeList.size()).nodeName;
+        String shardName = collectionName + "_" + sliceName + "_replica" + j;
+        log.info("Creating shard " + shardName + " as part of slice "
+            + sliceName + " of collection " + collectionName + " on "
+            + nodeName);
 
-      shardHandler.submit(sreq, replica, sreq.params);
+        // Need to create new params for each request
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
+
+        params.set(CoreAdminParams.NAME, shardName);
+        params.set(COLL_CONF, configName);
+        params.set(CoreAdminParams.COLLECTION, collectionName);
+        params.set(CoreAdminParams.SHARD, sliceName);
+        params.set(ZkStateReader.NUM_SHARDS_PROP, numSlices);
+        addPropertyParams(message, params);
 
-    }
+        ShardRequest sreq = new ShardRequest();
+        params.set("qt", adminPath);
+        sreq.purpose = 1;
+        String replica = zkStateReader.getBaseUrlForNodeName(nodeName);
+        sreq.shards = new String[]{replica};
+        sreq.actualShards = sreq.shards;
+        sreq.params = params;
 
-    processResponses(results, shardHandler);
+        shardHandler.submit(sreq, replica, sreq.params);
 
-    log.info("Finished create command on all shards for collection: "
-        + collectionName);
+      }
 
-    return true;
+      processResponses(results, shardHandler);
+
+      log.info("Finished create command on all shards for collection: "
+          + collectionName);
+
+      return true;
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
+    }
   }
 
 
   private boolean splitShard(ClusterState clusterState, ZkNodeProps message, NamedList results) {
-    log.info("Split shard invoked");
     String collectionName = message.getStr("collection");
     String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
-    String splitKey = message.getStr("split.key");
-    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setMDC(collectionName, slice, null, null);
+    try {
+      log.info("Split shard invoked");
+      String splitKey = message.getStr("split.key");
+      ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
 
-    DocCollection collection = clusterState.getCollection(collectionName);
-    DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
+      DocCollection collection = clusterState.getCollection(collectionName);
+      DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
 
-    Slice parentSlice = null;
+      Slice parentSlice = null;
 
-    if (slice == null)  {
-      if (router instanceof CompositeIdRouter) {
-        Collection<Slice> searchSlices = router.getSearchSlicesSingle(splitKey, new ModifiableSolrParams(), collection);
-        if (searchSlices.isEmpty()) {
-          throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an active shard for split.key: " + splitKey);
-        }
-        if (searchSlices.size() > 1)  {
+      if (slice == null) {
+        if (router instanceof CompositeIdRouter) {
+          Collection<Slice> searchSlices = router.getSearchSlicesSingle(splitKey, new ModifiableSolrParams(), collection);
+          if (searchSlices.isEmpty()) {
+            throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to find an active shard for split.key: " + splitKey);
+          }
+          if (searchSlices.size() > 1) {
+            throw new SolrException(ErrorCode.BAD_REQUEST,
+                "Splitting a split.key: " + splitKey + " which spans multiple shards is not supported");
+          }
+          parentSlice = searchSlices.iterator().next();
+          slice = parentSlice.getName();
+          log.info("Split by route.key: {}, parent shard is: {} ", splitKey, slice);
+        } else {
           throw new SolrException(ErrorCode.BAD_REQUEST,
-              "Splitting a split.key: " + splitKey + " which spans multiple shards is not supported");
+              "Split by route key can only be used with CompositeIdRouter or subclass. Found router: " + router.getClass().getName());
         }
-        parentSlice = searchSlices.iterator().next();
-        slice = parentSlice.getName();
-        log.info("Split by route.key: {}, parent shard is: {} ", splitKey, slice);
-      } else  {
-        throw new SolrException(ErrorCode.BAD_REQUEST,
-            "Split by route key can only be used with CompositeIdRouter or subclass. Found router: " + router.getClass().getName());
+      } else {
+        parentSlice = clusterState.getSlice(collectionName, slice);
       }
-    } else  {
-      parentSlice = clusterState.getSlice(collectionName, slice);
-    }
 
-    if (parentSlice == null) {
-      if(clusterState.hasCollection(collectionName)) {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
-      } else {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
-      }      
-    }
-    
-    // find the leader for the shard
-    Replica parentShardLeader = null;
-    try {
-      parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice, 10000);
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-    }
+      if (parentSlice == null) {
+        if (clusterState.hasCollection(collectionName)) {
+          throw new SolrException(ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
+        } else {
+          throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
+        }
+      }
 
-    DocRouter.Range range = parentSlice.getRange();
-    if (range == null) {
-      range = new PlainIdRouter().fullRange();
-    }
+      // find the leader for the shard
+      Replica parentShardLeader = null;
+      try {
+        parentShardLeader = zkStateReader.getLeaderRetry(collectionName, slice, 10000);
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+      }
 
-    List<DocRouter.Range> subRanges = null;
-    String rangesStr = message.getStr(CoreAdminParams.RANGES);
-    if (rangesStr != null)  {
-      String[] ranges = rangesStr.split(",");
-      if (ranges.length == 0 || ranges.length == 1) {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "There must be at least two ranges specified to split a shard");
-      } else  {
-        subRanges = new ArrayList<>(ranges.length);
-        for (int i = 0; i < ranges.length; i++) {
-          String r = ranges[i];
-          try {
-            subRanges.add(DocRouter.DEFAULT.fromString(r));
-          } catch (Exception e) {
-            throw new SolrException(ErrorCode.BAD_REQUEST, "Exception in parsing hexadecimal hash range: " + r, e);
+      DocRouter.Range range = parentSlice.getRange();
+      if (range == null) {
+        range = new PlainIdRouter().fullRange();
+      }
+
+      List<DocRouter.Range> subRanges = null;
+      String rangesStr = message.getStr(CoreAdminParams.RANGES);
+      if (rangesStr != null) {
+        String[] ranges = rangesStr.split(",");
+        if (ranges.length == 0 || ranges.length == 1) {
+          throw new SolrException(ErrorCode.BAD_REQUEST, "There must be at least two ranges specified to split a shard");
+        } else {
+          subRanges = new ArrayList<>(ranges.length);
+          for (int i = 0; i < ranges.length; i++) {
+            String r = ranges[i];
+            try {
+              subRanges.add(DocRouter.DEFAULT.fromString(r));
+            } catch (Exception e) {
+              throw new SolrException(ErrorCode.BAD_REQUEST, "Exception in parsing hexadecimal hash range: " + r, e);
+            }
+            if (!subRanges.get(i).isSubsetOf(range)) {
+              throw new SolrException(ErrorCode.BAD_REQUEST,
+                  "Specified hash range: " + r + " is not a subset of parent shard's range: " + range.toString());
+            }
           }
-          if (!subRanges.get(i).isSubsetOf(range)) {
+          List<DocRouter.Range> temp = new ArrayList<>(subRanges); // copy to preserve original order
+          Collections.sort(temp);
+          if (!range.equals(new DocRouter.Range(temp.get(0).min, temp.get(temp.size() - 1).max))) {
             throw new SolrException(ErrorCode.BAD_REQUEST,
-                "Specified hash range: " + r + " is not a subset of parent shard's range: " + range.toString());
+                "Specified hash ranges: " + rangesStr + " do not cover the entire range of parent shard: " + range);
+          }
+          for (int i = 1; i < temp.size(); i++) {
+            if (temp.get(i - 1).max + 1 != temp.get(i).min) {
+              throw new SolrException(ErrorCode.BAD_REQUEST,
+                  "Specified hash ranges: " + rangesStr + " either overlap with each other or " +
+                      "do not cover the entire range of parent shard: " + range);
+            }
           }
         }
-        List<DocRouter.Range> temp = new ArrayList<>(subRanges); // copy to preserve original order
-        Collections.sort(temp);
-        if (!range.equals(new DocRouter.Range(temp.get(0).min, temp.get(temp.size() - 1).max)))  {
-          throw new SolrException(ErrorCode.BAD_REQUEST,
-              "Specified hash ranges: " + rangesStr + " do not cover the entire range of parent shard: " + range);
-        }
-        for (int i = 1; i < temp.size(); i++) {
-          if (temp.get(i - 1).max + 1 != temp.get(i).min) {
+      } else if (splitKey != null) {
+        if (router instanceof CompositeIdRouter) {
+          CompositeIdRouter compositeIdRouter = (CompositeIdRouter) router;
+          subRanges = compositeIdRouter.partitionRangeByKey(splitKey, range);
+          if (subRanges.size() == 1) {
             throw new SolrException(ErrorCode.BAD_REQUEST,
-                "Specified hash ranges: " + rangesStr + " either overlap with each other or " +
-                    "do not cover the entire range of parent shard: " + range);
+                "The split.key: " + splitKey + " has a hash range that is exactly equal to hash range of shard: " + slice);
           }
-        }
-      }
-    } else if (splitKey != null)  {
-      if (router instanceof CompositeIdRouter) {
-        CompositeIdRouter compositeIdRouter = (CompositeIdRouter) router;
-        subRanges = compositeIdRouter.partitionRangeByKey(splitKey, range);
-        if (subRanges.size() == 1)  {
-          throw new SolrException(ErrorCode.BAD_REQUEST,
-              "The split.key: " + splitKey + " has a hash range that is exactly equal to hash range of shard: " + slice);
-        }
-        for (DocRouter.Range subRange : subRanges) {
-          if (subRange.min == subRange.max) {
-            throw new SolrException(ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " must be a compositeId");
+          for (DocRouter.Range subRange : subRanges) {
+            if (subRange.min == subRange.max) {
+              throw new SolrException(ErrorCode.BAD_REQUEST, "The split.key: " + splitKey + " must be a compositeId");
+            }
+          }
+          log.info("Partitioning parent shard " + slice + " range: " + parentSlice.getRange() + " yields: " + subRanges);
+          rangesStr = "";
+          for (int i = 0; i < subRanges.size(); i++) {
+            DocRouter.Range subRange = subRanges.get(i);
+            rangesStr += subRange.toString();
+            if (i < subRanges.size() - 1)
+              rangesStr += ',';
           }
         }
-        log.info("Partitioning parent shard " + slice + " range: " + parentSlice.getRange() + " yields: " + subRanges);
-        rangesStr = "";
-        for (int i = 0; i < subRanges.size(); i++) {
-          DocRouter.Range subRange = subRanges.get(i);
-          rangesStr += subRange.toString();
-          if (i < subRanges.size() - 1)
-            rangesStr += ',';
-        }
+      } else {
+        // todo: fixed to two partitions?
+        subRanges = router.partitionRange(2, range);
       }
-    } else  {
-      // todo: fixed to two partitions?
-      subRanges = router.partitionRange(2, range);
-    }
 
-    try {
-      List<String> subSlices = new ArrayList<>(subRanges.size());
-      List<String> subShardNames = new ArrayList<>(subRanges.size());
-      String nodeName = parentShardLeader.getNodeName();
-      for (int i = 0; i < subRanges.size(); i++) {
-        String subSlice = slice + "_" + i;
-        subSlices.add(subSlice);
-        String subShardName = collectionName + "_" + subSlice + "_replica1";
-        subShardNames.add(subShardName);
-
-        Slice oSlice = clusterState.getSlice(collectionName, subSlice);
-        if (oSlice != null) {
-          if (Slice.ACTIVE.equals(oSlice.getState())) {
-            throw new SolrException(ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
-          } else if (Slice.CONSTRUCTION.equals(oSlice.getState()) || Slice.RECOVERY.equals(oSlice.getState()))  {
-            // delete the shards
-            for (String sub : subSlices) {
-              log.info("Sub-shard: {} already exists therefore requesting its deletion", sub);
-              Map<String, Object> propMap = new HashMap<>();
-              propMap.put(Overseer.QUEUE_OPERATION, "deleteshard");
-              propMap.put(COLLECTION_PROP, collectionName);
-              propMap.put(SHARD_ID_PROP, sub);
-              ZkNodeProps m = new ZkNodeProps(propMap);
-              try {
-                deleteShard(clusterState, m, new NamedList());
-              } catch (Exception e) {
-                throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to delete already existing sub shard: " + sub, e);
+      try {
+        List<String> subSlices = new ArrayList<>(subRanges.size());
+        List<String> subShardNames = new ArrayList<>(subRanges.size());
+        String nodeName = parentShardLeader.getNodeName();
+        for (int i = 0; i < subRanges.size(); i++) {
+          String subSlice = slice + "_" + i;
+          subSlices.add(subSlice);
+          String subShardName = collectionName + "_" + subSlice + "_replica1";
+          subShardNames.add(subShardName);
+
+          Slice oSlice = clusterState.getSlice(collectionName, subSlice);
+          if (oSlice != null) {
+            if (Slice.ACTIVE.equals(oSlice.getState())) {
+              throw new SolrException(ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
+            } else if (Slice.CONSTRUCTION.equals(oSlice.getState()) || Slice.RECOVERY.equals(oSlice.getState())) {
+              // delete the shards
+              for (String sub : subSlices) {
+                log.info("Sub-shard: {} already exists therefore requesting its deletion", sub);
+                Map<String, Object> propMap = new HashMap<>();
+                propMap.put(Overseer.QUEUE_OPERATION, "deleteshard");
+                propMap.put(COLLECTION_PROP, collectionName);
+                propMap.put(SHARD_ID_PROP, sub);
+                ZkNodeProps m = new ZkNodeProps(propMap);
+                try {
+                  deleteShard(clusterState, m, new NamedList());
+                } catch (Exception e) {
+                  throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to delete already existing sub shard: " + sub, e);
+                }
               }
             }
           }
         }
-      }
 
-      // do not abort splitshard if the unloading fails
-      // this can happen because the replicas created previously may be down
-      // the only side effect of this is that the sub shard may end up having more replicas than we want
-      collectShardResponses(results, false, null, shardHandler);
+        // do not abort splitshard if the unloading fails
+        // this can happen because the replicas created previously may be down
+        // the only side effect of this is that the sub shard may end up having more replicas than we want
+        collectShardResponses(results, false, null, shardHandler);
 
-      String asyncId = message.getStr(ASYNC);
-      HashMap<String, String> requestMap = new HashMap<String, String>();
+        String asyncId = message.getStr(ASYNC);
+        HashMap<String, String> requestMap = new HashMap<String, String>();
 
-      for (int i=0; i<subRanges.size(); i++)  {
-        String subSlice = subSlices.get(i);
-        String subShardName = subShardNames.get(i);
-        DocRouter.Range subRange = subRanges.get(i);
+        for (int i = 0; i < subRanges.size(); i++) {
+          String subSlice = subSlices.get(i);
+          String subShardName = subShardNames.get(i);
+          DocRouter.Range subRange = subRanges.get(i);
 
-        log.info("Creating slice "
-            + subSlice + " of collection " + collectionName + " on "
-            + nodeName);
+          log.info("Creating slice "
+              + subSlice + " of collection " + collectionName + " on "
+              + nodeName);
 
-        Map<String, Object> propMap = new HashMap<>();
-        propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD.toLower());
-        propMap.put(ZkStateReader.SHARD_ID_PROP, subSlice);
-        propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
-        propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
-        propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.CONSTRUCTION);
-        propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
-        DistributedQueue inQueue = Overseer.getInQueue(zkStateReader.getZkClient());
-        inQueue.offer(ZkStateReader.toJSON(new ZkNodeProps(propMap)));
+          Map<String, Object> propMap = new HashMap<>();
+          propMap.put(Overseer.QUEUE_OPERATION, CREATESHARD.toLower());
+          propMap.put(ZkStateReader.SHARD_ID_PROP, subSlice);
+          propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
+          propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
+          propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.CONSTRUCTION);
+          propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
+          DistributedQueue inQueue = Overseer.getInQueue(zkStateReader.getZkClient());
+          inQueue.offer(ZkStateReader.toJSON(new ZkNodeProps(propMap)));
 
-        // wait until we are able to see the new shard in cluster state
-        waitForNewShard(collectionName, subSlice);
+          // wait until we are able to see the new shard in cluster state
+          waitForNewShard(collectionName, subSlice);
 
-        // refresh cluster state
-        clusterState = zkStateReader.getClusterState();
+          // refresh cluster state
+          clusterState = zkStateReader.getClusterState();
 
-        log.info("Adding replica " + subShardName + " as part of slice "
-            + subSlice + " of collection " + collectionName + " on "
-            + nodeName);
-        propMap = new HashMap<>();
-        propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
-        propMap.put(COLLECTION_PROP, collectionName);
-        propMap.put(SHARD_ID_PROP, subSlice);
-        propMap.put("node", nodeName);
-        propMap.put(CoreAdminParams.NAME, subShardName);
-        // copy over property params:
-        for (String key : message.keySet()) {
-          if (key.startsWith(COLL_PROP_PREFIX)) {
-            propMap.put(key, message.getStr(key));
+          log.info("Adding replica " + subShardName + " as part of slice "
+              + subSlice + " of collection " + collectionName + " on "
+              + nodeName);
+          propMap = new HashMap<>();
+          propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
+          propMap.put(COLLECTION_PROP, collectionName);
+          propMap.put(SHARD_ID_PROP, subSlice);
+          propMap.put("node", nodeName);
+          propMap.put(CoreAdminParams.NAME, subShardName);
+          // copy over property params:
+          for (String key : message.keySet()) {
+            if (key.startsWith(COLL_PROP_PREFIX)) {
+              propMap.put(key, message.getStr(key));
+            }
           }
+          // add async param
+          if(asyncId != null) {
+            propMap.put(ASYNC, asyncId);
+          }
+          addReplica(clusterState, new ZkNodeProps(propMap), results);
         }
-        // add async param
-        if(asyncId != null) {
-          propMap.put(ASYNC, asyncId);
-        }
-        addReplica(clusterState, new ZkNodeProps(propMap), results);
-      }
 
-      collectShardResponses(results, true,
-          "SPLITSHARD failed to create subshard leaders", shardHandler);
-
-      completeAsyncRequest(asyncId, requestMap, results);
+        collectShardResponses(results, true,
+            "SPLITSHARD failed to create subshard leaders", shardHandler);
 
-      for (String subShardName : subShardNames) {
-        // wait for parent leader to acknowledge the sub-shard core
-        log.info("Asking parent leader to wait for: " + subShardName + " to be alive on: " + nodeName);
-        String coreNodeName = waitForCoreNodeName(collectionName, nodeName, subShardName);
-        CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
-        cmd.setCoreName(subShardName);
-        cmd.setNodeName(nodeName);
-        cmd.setCoreNodeName(coreNodeName);
-        cmd.setState(ZkStateReader.ACTIVE);
-        cmd.setCheckLive(true);
-        cmd.setOnlyIfLeader(true);
+        completeAsyncRequest(asyncId, requestMap, results);
 
-        ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
-        sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
-      }
+        for (String subShardName : subShardNames) {
+          // wait for parent leader to acknowledge the sub-shard core
+          log.info("Asking parent leader to wait for: " + subShardName + " to be alive on: " + nodeName);
+          String coreNodeName = waitForCoreNodeName(collectionName, nodeName, subShardName);
+          CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
+          cmd.setCoreName(subShardName);
+          cmd.setNodeName(nodeName);
+          cmd.setCoreNodeName(coreNodeName);
+          cmd.setState(ZkStateReader.ACTIVE);
+          cmd.setCheckLive(true);
+          cmd.setOnlyIfLeader(true);
 
-      collectShardResponses(results, true,
-          "SPLITSHARD timed out waiting for subshard leaders to come up", shardHandler);
+          ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
+          sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
+        }
 
-      completeAsyncRequest(asyncId, requestMap, results);
+        collectShardResponses(results, true,
+            "SPLITSHARD timed out waiting for subshard leaders to come up", shardHandler);
 
-      log.info("Successfully created all sub-shards for collection "
-          + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
+        completeAsyncRequest(asyncId, requestMap, results);
 
-      log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice "
-          + slice + " of collection " + collectionName + " on "
-          + parentShardLeader);
+        log.info("Successfully created all sub-shards for collection "
+            + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
 
-      ModifiableSolrParams params = new ModifiableSolrParams();
-      params.set(CoreAdminParams.ACTION, CoreAdminAction.SPLIT.toString());
-      params.set(CoreAdminParams.CORE, parentShardLeader.getStr("core"));
-      for (int i = 0; i < subShardNames.size(); i++) {
-        String subShardName = subShardNames.get(i);
-        params.add(CoreAdminParams.TARGET_CORE, subShardName);
-      }
-      params.set(CoreAdminParams.RANGES, rangesStr);
+        log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice "
+            + slice + " of collection " + collectionName + " on "
+            + parentShardLeader);
 
-      sendShardRequest(parentShardLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set(CoreAdminParams.ACTION, CoreAdminAction.SPLIT.toString());
+        params.set(CoreAdminParams.CORE, parentShardLeader.getStr("core"));
+        for (int i = 0; i < subShardNames.size(); i++) {
+          String subShardName = subShardNames.get(i);
+          params.add(CoreAdminParams.TARGET_CORE, subShardName);
+        }
+        params.set(CoreAdminParams.RANGES, rangesStr);
 
-      collectShardResponses(results, true, "SPLITSHARD failed to invoke SPLIT core admin command",
-          shardHandler);
-      completeAsyncRequest(asyncId, requestMap, results);
+        sendShardRequest(parentShardLeader.getNodeName(), params, shardHandler, asyncId, requestMap);
 
-      log.info("Index on shard: " + nodeName + " split into two successfully");
+        collectShardResponses(results, true, "SPLITSHARD failed to invoke SPLIT core admin command",
+            shardHandler);
+        completeAsyncRequest(asyncId, requestMap, results);
 
-      // apply buffered updates on sub-shards
-      for (int i = 0; i < subShardNames.size(); i++) {
-        String subShardName = subShardNames.get(i);
+        log.info("Index on shard: " + nodeName + " split into two successfully");
 
-        log.info("Applying buffered updates on : " + subShardName);
+        // apply buffered updates on sub-shards
+        for (int i = 0; i < subShardNames.size(); i++) {
+          String subShardName = subShardNames.get(i);
 
-        params = new ModifiableSolrParams();
-        params.set(CoreAdminParams.ACTION, CoreAdminAction.REQUESTAPPLYUPDATES.toString());
-        params.set(CoreAdminParams.NAME, subShardName);
+          log.info("Applying buffered updates on : " + subShardName);
 
-        sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
-      }
+          params = new ModifiableSolrParams();
+          params.set(CoreAdminParams.ACTION, CoreAdminAction.REQUESTAPPLYUPDATES.toString());
+          params.set(CoreAdminParams.NAME, subShardName);
 
-      collectShardResponses(results, true,
-          "SPLITSHARD failed while asking sub shard leaders to apply buffered updates",
-          shardHandler);
+          sendShardRequest(nodeName, params, shardHandler, asyncId, requestMap);
+        }
 
-      completeAsyncRequest(asyncId, requestMap, results);
+        collectShardResponses(results, true,
+            "SPLITSHARD failed while asking sub shard leaders to apply buffered updates",
+            shardHandler);
 
-      log.info("Successfully applied buffered updates on : " + subShardNames);
+        completeAsyncRequest(asyncId, requestMap, results);
 
-      // Replica creation for the new Slices
+        log.info("Successfully applied buffered updates on : " + subShardNames);
 
-      // look at the replication factor and see if it matches reality
-      // if it does not, find best nodes to create more cores
+        // Replica creation for the new Slices
 
-      // TODO: Have replication factor decided in some other way instead of numShards for the parent
+        // look at the replication factor and see if it matches reality
+        // if it does not, find best nodes to create more cores
 
-      int repFactor = clusterState.getSlice(collectionName, slice).getReplicas().size();
+        // TODO: Have replication factor decided in some other way instead of numShards for the parent
 
-      // we need to look at every node and see how many cores it serves
-      // add our new cores to existing nodes serving the least number of cores
-      // but (for now) require that each core goes on a distinct node.
+        int repFactor = clusterState.getSlice(collectionName, slice).getReplicas().size();
 
-      // TODO: add smarter options that look at the current number of cores per
-      // node?
-      // for now we just go random
-      Set<String> nodes = clusterState.getLiveNodes();
-      List<String> nodeList = new ArrayList<>(nodes.size());
-      nodeList.addAll(nodes);
+        // we need to look at every node and see how many cores it serves
+        // add our new cores to existing nodes serving the least number of cores
+        // but (for now) require that each core goes on a distinct node.
 
-      Collections.shuffle(nodeList, RANDOM);
+        // TODO: add smarter options that look at the current number of cores per
+        // node?
+        // for now we just go random
+        Set<String> nodes = clusterState.getLiveNodes();
+        List<String> nodeList = new ArrayList<>(nodes.size());
+        nodeList.addAll(nodes);
 
-      // TODO: Have maxShardsPerNode param for this operation?
+        Collections.shuffle(nodeList, RANDOM);
 
-      // Remove the node that hosts the parent shard for replica creation.
-      nodeList.remove(nodeName);
-      
-      // TODO: change this to handle sharding a slice into > 2 sub-shards.
+        // TODO: Have maxShardsPerNode param for this operation?
 
-      for (int i = 1; i <= subSlices.size(); i++) {
-        Collections.shuffle(nodeList, RANDOM);
-        String sliceName = subSlices.get(i - 1);
-        for (int j = 2; j <= repFactor; j++) {
-          String subShardNodeName = nodeList.get((repFactor * (i - 1) + (j - 2)) % nodeList.size());
-          String shardName = collectionName + "_" + sliceName + "_replica" + (j);
+        // Remove the node that hosts the parent shard for replica creation.
+        nodeList.remove(nodeName);
 
-          log.info("Creating replica shard " + shardName + " as part of slice "
-              + sliceName + " of collection " + collectionName + " on "
-              + subShardNodeName);
+        // TODO: change this to handle sharding a slice into > 2 sub-shards.
 
-          HashMap<String, Object> propMap = new HashMap<>();
-          propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
-          propMap.put(COLLECTION_PROP, collectionName);
-          propMap.put(SHARD_ID_PROP, sliceName);
-          propMap.put("node", subShardNodeName);
-          propMap.put(CoreAdminParams.NAME, shardName);
-          // copy over property params:
-          for (String key : message.keySet()) {
-            if (key.startsWith(COLL_PROP_PREFIX)) {
-              propMap.put(key, message.getStr(key));
+        for (int i = 1; i <= subSlices.size(); i++) {
+          Collections.shuffle(nodeList, RANDOM);
+          String sliceName = subSlices.get(i - 1);
+          for (int j = 2; j <= repFactor; j++) {
+            String subShardNodeName = nodeList.get((repFactor * (i - 1) + (j - 2)) % nodeList.size());
+            String shardName = collectionName + "_" + sliceName + "_replica" + (j);
+
+            log.info("Creating replica shard " + shardName + " as part of slice "
+                + sliceName + " of collection " + collectionName + " on "
+                + subShardNodeName);
+
+            HashMap<String, Object> propMap = new HashMap<>();
+            propMap.put(Overseer.QUEUE_OPERATION, ADDREPLICA.toLower());
+            propMap.put(COLLECTION_PROP, collectionName);
+            propMap.put(SHARD_ID_PROP, sliceName);
+            propMap.put("node", subShardNodeName);
+            propMap.put(CoreAdminParams.NAME, shardName);
+            // copy over property params:
+            for (String key : message.keySet()) {
+              if (key.startsWith(COLL_PROP_PREFIX)) {
+                propMap.put(key, message.getStr(key));
+              }
             }
-          }
-          // add async param
-          if(asyncId != null) {
-            propMap.put(ASYNC, asyncId);
-          }
-          addReplica(clusterState, new ZkNodeProps(propMap), results);
+            // add async param
+            if (asyncId != null) {
+              propMap.put(ASYNC, asyncId);
+            }
+            addReplica(clusterState, new ZkNodeProps(propMap), results);
 
-          String coreNodeName = waitForCoreNodeName(collectionName, subShardNodeName, shardName);
-          // wait for the replicas to be seen as active on sub shard leader
-          log.info("Asking sub shard leader to wait for: " + shardName + " to be alive on: " + subShardNodeName);
-          CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
-          cmd.setCoreName(subShardNames.get(i-1));
-          cmd.setNodeName(subShardNodeName);
-          cmd.setCoreNodeName(coreNodeName);
-          cmd.setState(ZkStateReader.RECOVERING);
-          cmd.setCheckLive(true);
-          cmd.setOnlyIfLeader(true);
-          ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
+            String coreNodeName = waitForCoreNodeName(collectionName, subShardNodeName, shardName);
+            // wait for the replicas to be seen as active on sub shard leader
+            log.info("Asking sub shard leader to wait for: " + shardName + " to be alive on: " + subShardNodeName);
+            CoreAdminRequest.WaitForState cmd = new CoreAdminRequest.WaitForState();
+            cmd.setCoreName(subShardNames.get(i - 1));
+            cmd.setNodeName(subShardNodeName);
+            cmd.setCoreNodeName(coreNodeName);
+            cmd.setState(ZkStateReader.RECOVERING);
+            cmd.setCheckLive(true);
+            cmd.setOnlyIfLeader(true);
+            ModifiableSolrParams p = new ModifiableSolrParams(cmd.getParams());
 
-          sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
+            sendShardRequest(nodeName, p, shardHandler, asyncId, requestMap);
 
+          }
         }
-      }
 
-      collectShardResponses(results, true,
-          "SPLITSHARD failed to create subshard replicas or timed out waiting for them to come up",
-          shardHandler);
+        collectShardResponses(results, true,
+            "SPLITSHARD failed to create subshard replicas or timed out waiting for them to come up",
+            shardHandler);
 
-      completeAsyncRequest(asyncId, requestMap, results);
+        completeAsyncRequest(asyncId, requestMap, results);
 
-      log.info("Successfully created all replica shards for all sub-slices " + subSlices);
+        log.info("Successfully created all replica shards for all sub-slices " + subSlices);
 
-      commit(results, slice, parentShardLeader);
+        commit(results, slice, parentShardLeader);
 
-      if (repFactor == 1) {
-        // switch sub shard states to 'active'
-        log.info("Replication factor is 1 so switching shard states");
-        DistributedQueue inQueue = Overseer.getInQueue(zkStateReader.getZkClient());
-        Map<String, Object> propMap = new HashMap<>();
-        propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
-        propMap.put(slice, Slice.INACTIVE);
-        for (String subSlice : subSlices) {
-          propMap.put(subSlice, Slice.ACTIVE);
-        }
-        propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
-        ZkNodeProps m = new ZkNodeProps(propMap);
-        inQueue.offer(ZkStateReader.toJSON(m));
-      } else  {
-        log.info("Requesting shard state be set to 'recovery'");
-        DistributedQueue inQueue = Overseer.getInQueue(zkStateReader.getZkClient());
-        Map<String, Object> propMap = new HashMap<>();
-        propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
-        for (String subSlice : subSlices) {
-          propMap.put(subSlice, Slice.RECOVERY);
-        }
-        propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
-        ZkNodeProps m = new ZkNodeProps(propMap);
-        inQueue.offer(ZkStateReader.toJSON(m));
-      }
+        if (repFactor == 1) {
+          // switch sub shard states to 'active'
+          log.info("Replication factor is 1 so switching shard states");
+          DistributedQueue inQueue = Overseer.getInQueue(zkStateReader.getZkClient());
+          Map<String, Object> propMap = new HashMap<>();
+          propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
+          propMap.put(slice, Slice.INACTIVE);
+          for (String subSlice : subSlices) {
+            propMap.put(subSlice, Slice.ACTIVE);
+          }
+          propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
+          ZkNodeProps m = new ZkNodeProps(propMap);
+          inQueue.offer(ZkStateReader.toJSON(m));
+        } else {
+          log.info("Requesting shard state be set to 'recovery'");
+          DistributedQueue inQueue = Overseer.getInQueue(zkStateReader.getZkClient());
+          Map<String, Object> propMap = new HashMap<>();
+          propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
+          for (String subSlice : subSlices) {
+            propMap.put(subSlice, Slice.RECOVERY);
+          }
+          propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
+          ZkNodeProps m = new ZkNodeProps(propMap);
+          inQueue.offer(ZkStateReader.toJSON(m));
+        }
 
-      return true;
-    } catch (SolrException e) {
-      throw e;
-    } catch (Exception e) {
-      log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
-      throw new SolrException(ErrorCode.SERVER_ERROR, null, e);
+        return true;
+      } catch (SolrException e) {
+        throw e;
+      } catch (Exception e) {
+        log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, null, e);
+      }
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
   }
 
@@ -1870,67 +1903,72 @@ public class OverseerCollectionProcessor
   }
 
   private void deleteShard(ClusterState clusterState, ZkNodeProps message, NamedList results) {
-    log.info("Delete shard invoked");
     String collection = message.getStr(ZkStateReader.COLLECTION_PROP);
-
     String sliceId = message.getStr(ZkStateReader.SHARD_ID_PROP);
-    Slice slice = clusterState.getSlice(collection, sliceId);
-    
-    if (slice == null) {
-      if(clusterState.hasCollection(collection)) {
-        throw new SolrException(ErrorCode.BAD_REQUEST,
-            "No shard with name " + sliceId + " exists for collection " + collection);
-      } else {
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setMDC(collection, sliceId, null, null);
+    try {
+      log.info("Delete shard invoked");
+      Slice slice = clusterState.getSlice(collection, sliceId);
+
+      if (slice == null) {
+        if (clusterState.hasCollection(collection)) {
+          throw new SolrException(ErrorCode.BAD_REQUEST,
+              "No shard with name " + sliceId + " exists for collection " + collection);
+        } else {
+          throw new SolrException(ErrorCode.BAD_REQUEST,
+              "No collection with the specified name exists: " + collection);
+        }
+      }
+      // For now, only allow for deletions of Inactive slices or custom hashes (range==null).
+      // TODO: Add check for range gaps on Slice deletion
+      if (!(slice.getRange() == null || slice.getState().equals(Slice.INACTIVE)
+          || slice.getState().equals(Slice.RECOVERY) || slice.getState().equals(Slice.CONSTRUCTION))) {
         throw new SolrException(ErrorCode.BAD_REQUEST,
-            "No collection with the specified name exists: " + collection);
+            "The slice: " + slice.getName() + " is currently "
+                + slice.getState() + ". Only non-active (or custom-hashed) slices can be deleted.");
       }
-    }
-    // For now, only allow for deletions of Inactive slices or custom hashes (range==null).
-    // TODO: Add check for range gaps on Slice deletion
-    if (!(slice.getRange() == null || slice.getState().equals(Slice.INACTIVE)
-        || slice.getState().equals(Slice.RECOVERY) || slice.getState().equals(Slice.CONSTRUCTION))) {
-      throw new SolrException(ErrorCode.BAD_REQUEST,
-          "The slice: " + slice.getName() + " is currently "
-          + slice.getState() + ". Only non-active (or custom-hashed) slices can be deleted.");
-    }
-    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
+      ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
 
-    try {
-      ModifiableSolrParams params = new ModifiableSolrParams();
-      params.set(CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString());
-      params.set(CoreAdminParams.DELETE_INDEX, "true");
-      sliceCmd(clusterState, params, null, slice, shardHandler);
+      try {
+        ModifiableSolrParams params = new ModifiableSolrParams();
+        params.set(CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString());
+        params.set(CoreAdminParams.DELETE_INDEX, "true");
+        sliceCmd(clusterState, params, null, slice, shardHandler);
 
-      processResponses(results, shardHandler);
+        processResponses(results, shardHandler);
 
-      ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
-          DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, collection,
-          ZkStateReader.SHARD_ID_PROP, sliceId);
-      Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
+        ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
+            DELETESHARD.toLower(), ZkStateReader.COLLECTION_PROP, collection,
+            ZkStateReader.SHARD_ID_PROP, sliceId);
+        Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(m));
 
-      // wait for a while until we don't see the shard
-      long now = System.nanoTime();
-      long timeout = now + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
-      boolean removed = false;
-      while (System.nanoTime() < timeout) {
-        Thread.sleep(100);
-        removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
-        if (removed) {
-          Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
-          break;
+        // wait for a while until we don't see the shard
+        long now = System.nanoTime();
+        long timeout = now + TimeUnit.NANOSECONDS.convert(30, TimeUnit.SECONDS);
+        boolean removed = false;
+        while (System.nanoTime() < timeout) {
+          Thread.sleep(100);
+          removed = zkStateReader.getClusterState().getSlice(collection, sliceId) == null;
+          if (removed) {
+            Thread.sleep(100); // just a bit of time so it's more likely other readers see on return
+            break;
+          }
+        }
+        if (!removed) {
+          throw new SolrException(ErrorCode.SERVER_ERROR,
+              "Could not fully remove collection: " + collection + " shard: " + sliceId);
         }
-      }
-      if (!removed) {
-        throw new SolrException(ErrorCode.SERVER_ERROR,
-            "Could not fully remove collection: " + collection + " shard: " + sliceId);
-      }
 
-      log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
+        log.info("Successfully deleted collection: " + collection + ", shard: " + sliceId);
 
-    } catch (SolrException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
+      } catch (SolrException e) {
+        throw e;
+      } catch (Exception e) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Error executing delete operation for collection: " + collection + " shard: " + sliceId, e);
+      }
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
   }
 
@@ -2495,97 +2533,103 @@ public class OverseerCollectionProcessor
     String node = message.getStr("node");
     String shard = message.getStr(SHARD_ID_PROP);
     String coreName = message.getStr(CoreAdminParams.NAME);
-    String asyncId = message.getStr("async");
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setMDC(collection, shard, null, coreName);
+    try {
+      String asyncId = message.getStr("async");
 
-    DocCollection coll = clusterState.getCollection(collection);
-    if (coll == null) {
-      throw new SolrException(ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
-    }
-    if (coll.getSlice(shard) == null) {
-      throw new SolrException(ErrorCode.BAD_REQUEST,
-          "Collection: " + collection + " shard: " + shard + " does not exist");
-    }
-    ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
+      DocCollection coll = clusterState.getCollection(collection);
+      if (coll == null) {
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Collection: " + collection + " does not exist");
+      }
+      if (coll.getSlice(shard) == null) {
+        throw new SolrException(ErrorCode.BAD_REQUEST,
+            "Collection: " + collection + " shard: " + shard + " does not exist");
+      }
+      ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
 
-    if (node == null) {
-      node = getNodesForNewShard(clusterState, collection, coll.getSlices().size(), coll.getInt(ZkStateReader.MAX_SHARDS_PER_NODE, 1), coll.getInt(ZkStateReader.REPLICATION_FACTOR, 1), null).get(0).nodeName;
-      log.info("Node not provided, Identified {} for creating new replica", node);
-    }
+      if (node == null) {
+        node = getNodesForNewShard(clusterState, collection, coll.getSlices().size(), coll.getInt(ZkStateReader.MAX_SHARDS_PER_NODE, 1), coll.getInt(ZkStateReader.REPLICATION_FACTOR, 1), null).get(0).nodeName;
+        log.info("Node not provided, Identified {} for creating new replica", node);
+      }
 
 
-    if (!clusterState.liveNodesContain(node))  {
-      throw new SolrException(ErrorCode.BAD_REQUEST, "Node: " + node + " is not live");
-    }
-    if (coreName == null) {
-      // assign a name to this core
-      Slice slice = coll.getSlice(shard);
-      int replicaNum = slice.getReplicas().size();
-      for (;;)  {
-        String replicaName = collection + "_" + shard + "_replica" + replicaNum;
-        boolean exists = false;
-        for (Replica replica : slice.getReplicas()) {
-          if (replicaName.equals(replica.getStr("core"))) {
-            exists = true;
-            break;
+      if (!clusterState.liveNodesContain(node)) {
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Node: " + node + " is not live");
+      }
+      if (coreName == null) {
+        // assign a name to this core
+        Slice slice = coll.getSlice(shard);
+        int replicaNum = slice.getReplicas().size();
+        for (; ; ) {
+          String replicaName = collection + "_" + shard + "_replica" + replicaNum;
+          boolean exists = false;
+          for (Replica replica : slice.getReplicas()) {
+            if (replicaName.equals(replica.getStr("core"))) {
+              exists = true;
+              break;
+            }
           }
+          if (exists) replicaNum++;
+          else break;
         }
-        if (exists) replicaNum++;
-        else break;
+        coreName = collection + "_" + shard + "_replica" + replicaNum;
       }
-      coreName = collection + "_" + shard + "_replica" + replicaNum;
-    }
-    ModifiableSolrParams params = new ModifiableSolrParams();
+      ModifiableSolrParams params = new ModifiableSolrParams();
 
-    if(!Overseer.isLegacy(zkStateReader.getClusterProps())){
-      ZkNodeProps props = new ZkNodeProps(
-          Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(),
-          ZkStateReader.COLLECTION_PROP, collection,
-          ZkStateReader.SHARD_ID_PROP, shard,
-          ZkStateReader.CORE_NAME_PROP, coreName,
-          ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
-          ZkStateReader.BASE_URL_PROP,zkStateReader.getBaseUrlForNodeName(node));
-      Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
-      params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
-    }
+      if (!Overseer.isLegacy(zkStateReader.getClusterProps())) {
+        ZkNodeProps props = new ZkNodeProps(
+            Overseer.QUEUE_OPERATION, ADDREPLICA.toLower(),
+            ZkStateReader.COLLECTION_PROP, collection,
+            ZkStateReader.SHARD_ID_PROP, shard,
+            ZkStateReader.CORE_NAME_PROP, coreName,
+            ZkStateReader.STATE_PROP, ZkStateReader.DOWN,
+            ZkStateReader.BASE_URL_PROP, zkStateReader.getBaseUrlForNodeName(node));
+        Overseer.getInQueue(zkStateReader.getZkClient()).offer(ZkStateReader.toJSON(props));
+        params.set(CoreAdminParams.CORE_NODE_NAME, waitToSeeReplicasInState(collection, Collections.singletonList(coreName)).get(coreName).getName());
+      }
 
 
-    String configName = zkStateReader.readConfigName(collection);
-    String routeKey = message.getStr(ShardParams._ROUTE_);
-    String dataDir = message.getStr(CoreAdminParams.DATA_DIR);
-    String instanceDir = message.getStr(CoreAdminParams.INSTANCE_DIR);
-
-    params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
-    params.set(CoreAdminParams.NAME, coreName);
-    params.set(COLL_CONF, configName);
-    params.set(CoreAdminParams.COLLECTION, collection);
-    if (shard != null)  {
-      params.set(CoreAdminParams.SHARD, shard);
-    } else if (routeKey != null)  {
-      Collection<Slice> slices = coll.getRouter().getSearchSlicesSingle(routeKey, null, coll);
-      if (slices.isEmpty()) {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "No active shard serving _route_=" + routeKey + " found");
+      String configName = zkStateReader.readConfigName(collection);
+      String routeKey = message.getStr(ShardParams._ROUTE_);
+      String dataDir = message.getStr(CoreAdminParams.DATA_DIR);
+      String instanceDir = message.getStr(CoreAdminParams.INSTANCE_DIR);
+
+      params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
+      params.set(CoreAdminParams.NAME, coreName);
+      params.set(COLL_CONF, configName);
+      params.set(CoreAdminParams.COLLECTION, collection);
+      if (shard != null) {
+        params.set(CoreAdminParams.SHARD, shard);
+      } else if (routeKey != null) {
+        Collection<Slice> slices = coll.getRouter().getSearchSlicesSingle(routeKey, null, coll);
+        if (slices.isEmpty()) {
+          throw new SolrException(ErrorCode.BAD_REQUEST, "No active shard serving _route_=" + routeKey + " found");
+        } else {
+          params.set(CoreAdminParams.SHARD, slices.iterator().next().getName());
+        }
       } else  {
-        params.set(CoreAdminParams.SHARD, slices.iterator().next().getName());
+        throw new SolrException(ErrorCode.BAD_REQUEST, "Specify either 'shard' or _route_ param");
       }
-    } else  {
-      throw new SolrException(ErrorCode.BAD_REQUEST, "Specify either 'shard' or _route_ param");
-    }
-    if (dataDir != null)  {
-      params.set(CoreAdminParams.DATA_DIR, dataDir);
-    }
-    if (instanceDir != null)  {
-      params.set(CoreAdminParams.INSTANCE_DIR, instanceDir);
-    }
-    addPropertyParams(message, params);
+      if (dataDir != null) {
+        params.set(CoreAdminParams.DATA_DIR, dataDir);
+      }
+      if (instanceDir != null) {
+        params.set(CoreAdminParams.INSTANCE_DIR, instanceDir);
+      }
+      addPropertyParams(message, params);
 
-    // For tracking async calls.
-    HashMap<String, String> requestMap = new HashMap<>();
-    sendShardRequest(node, params, shardHandler, asyncId, requestMap);
+      // For tracking async calls.
+      HashMap<String, String> requestMap = new HashMap<>();
+      sendShardRequest(node, params, shardHandler, asyncId, requestMap);
 
-    collectShardResponses(results, true,
-        "ADDREPLICA failed to create replica", shardHandler);
+      collectShardResponses(results, true,
+          "ADDREPLICA failed to create replica", shardHandler);
 
-    completeAsyncRequest(asyncId, requestMap, results);
+      completeAsyncRequest(asyncId, requestMap, results);
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
+    }
   }
 
   private void processResponses(NamedList results, ShardHandler shardHandler) {
@@ -2844,6 +2888,8 @@ public class OverseerCollectionProcessor
       String asyncId = message.getStr(ASYNC);
       String collectionName = message.containsKey(COLLECTION_PROP) ?
           message.getStr(COLLECTION_PROP) : message.getStr("name");
+      Map previousMDCContext = MDC.getCopyOfContextMap();
+      MDCUtils.setCollection(collectionName);
       try {
         try {
           log.debug("Runner processing {}", head.getId());
@@ -2888,6 +2934,7 @@ public class OverseerCollectionProcessor
         synchronized (waitLock){
           waitLock.notifyAll();
         }
+        MDCUtils.cleanupMDC(previousMDCContext);
       }
     }
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1668992&r1=1668991&r2=1668992&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/ZkController.java Tue Mar 24 21:01:44 2015
@@ -53,6 +53,7 @@ import org.apache.solr.core.CoreContaine
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.ShardHandler;
+import org.apache.solr.logging.MDCUtils;
 import org.apache.solr.update.UpdateLog;
 import org.apache.solr.update.UpdateShardHandler;
 import org.apache.zookeeper.CreateMode;
@@ -66,6 +67,7 @@ import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.data.Stat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
 
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
@@ -826,17 +828,19 @@ public final class ZkController {
    */
   public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration) throws Exception {  
     // pre register has published our down state
-    
     final String baseUrl = getBaseUrl();
     
     final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
     final String collection = cloudDesc.getCollectionName();
 
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setCollection(collection);
+
     final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName();
     assert coreZkNodeName != null : "we should have a coreNodeName by now";
     
     String shardId = cloudDesc.getShardId();
-
+    MDCUtils.setShard(shardId);
     Map<String,Object> props = new HashMap<>();
  // we only put a subset of props into the leader node
     props.put(ZkStateReader.BASE_URL_PROP, baseUrl);
@@ -852,67 +856,71 @@ public final class ZkController {
     ZkNodeProps leaderProps = new ZkNodeProps(props);
     
     try {
-      // If we're a preferred leader, insert ourselves at the head of the queue
-      boolean joinAtHead = false;
-      Replica replica = zkStateReader.getClusterState().getReplica(desc.getCloudDescriptor().getCollectionName(), coreZkNodeName);
-      if (replica != null) {
-        joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
+      try {
+        // If we're a preferred leader, insert ourselves at the head of the queue
+        boolean joinAtHead = false;
+        Replica replica = zkStateReader.getClusterState().getReplica(desc.getCloudDescriptor().getCollectionName(), coreZkNodeName);
+        if (replica != null) {
+          joinAtHead = replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false);
+        }
+        joinElection(desc, afterExpiration, joinAtHead);
+      } catch (InterruptedException e) {
+        // Restore the interrupted status
+        Thread.currentThread().interrupt();
+        throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
+      } catch (KeeperException | IOException e) {
+        throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
       }
-      joinElection(desc, afterExpiration, joinAtHead);
-    } catch (InterruptedException e) {
-      // Restore the interrupted status
-      Thread.currentThread().interrupt();
-      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
-    } catch (KeeperException | IOException e) {
-      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
-    }
 
 
-    // in this case, we want to wait for the leader as long as the leader might 
-    // wait for a vote, at least - but also long enough that a large cluster has
-    // time to get its act together
-    String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000);
-    
-    String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
-    log.info("We are " + ourUrl + " and leader is " + leaderUrl);
-    boolean isLeader = leaderUrl.equals(ourUrl);
-
-    try (SolrCore core = cc.getCore(desc.getName())) {
-
-      // recover from local transaction log and wait for it to complete before
-      // going active
-      // TODO: should this be moved to another thread? To recoveryStrat?
-      // TODO: should this actually be done earlier, before (or as part of)
-      // leader election perhaps?
-
-      UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
-      if (!core.isReloaded() && ulog != null) {
-        // disable recovery in case shard is in construction state (for shard splits)
-        Slice slice = getClusterState().getSlice(collection, shardId);
-        if (!Slice.CONSTRUCTION.equals(slice.getState()) || !isLeader) {
-          Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
-              .getUpdateLog().recoverFromLog();
-          if (recoveryFuture != null) {
-            log.info("Replaying tlog for "+ourUrl+" during startup... NOTE: This can take a while.");
-            recoveryFuture.get(); // NOTE: this could potentially block for
-            // minutes or more!
-            // TODO: public as recovering in the mean time?
-            // TODO: in the future we could do peersync in parallel with recoverFromLog
-          } else {
-            log.info("No LogReplay needed for core=" + core.getName() + " baseURL=" + baseUrl);
+      // in this case, we want to wait for the leader as long as the leader might 
+      // wait for a vote, at least - but also long enough that a large cluster has
+      // time to get its act together
+      String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000);
+
+      String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
+      log.info("We are " + ourUrl + " and leader is " + leaderUrl);
+      boolean isLeader = leaderUrl.equals(ourUrl);
+
+      try (SolrCore core = cc.getCore(desc.getName())) {
+
+        // recover from local transaction log and wait for it to complete before
+        // going active
+        // TODO: should this be moved to another thread? To recoveryStrat?
+        // TODO: should this actually be done earlier, before (or as part of)
+        // leader election perhaps?
+
+        UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
+        if (!core.isReloaded() && ulog != null) {
+          // disable recovery in case shard is in construction state (for shard splits)
+          Slice slice = getClusterState().getSlice(collection, shardId);
+          if (!Slice.CONSTRUCTION.equals(slice.getState()) || !isLeader) {
+            Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler()
+                .getUpdateLog().recoverFromLog();
+            if (recoveryFuture != null) {
+              log.info("Replaying tlog for " + ourUrl + " during startup... NOTE: This can take a while.");
+              recoveryFuture.get(); // NOTE: this could potentially block for
+              // minutes or more!
+              // TODO: public as recovering in the mean time?
+              // TODO: in the future we could do peersync in parallel with recoverFromLog
+            } else {
+              log.info("No LogReplay needed for core=" + core.getName() + " baseURL=" + baseUrl);
+            }
+          }
+          boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc,
+              collection, coreZkNodeName, shardId, leaderProps, core, cc);
+          if (!didRecovery) {
+            publish(desc, ZkStateReader.ACTIVE);
           }
-        }
-        boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc,
-            collection, coreZkNodeName, shardId, leaderProps, core, cc);
-        if (!didRecovery) {
-          publish(desc, ZkStateReader.ACTIVE);
         }
       }
+
+      // make sure we have an update cluster state right away
+      zkStateReader.updateClusterState(true);
+      return shardId;
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
-    
-    // make sure we have an update cluster state right away
-    zkStateReader.updateClusterState(true);
-    return shardId;
   }
 
   // timeoutms is the timeout for the first call to get the leader - there is then
@@ -1110,74 +1118,85 @@ public final class ZkController {
       }
     }
     String collection = cd.getCloudDescriptor().getCollectionName();
-    log.info("publishing core={} state={} collection={}", cd.getName(), state, collection);
-    //System.out.println(Thread.currentThread().getStackTrace()[3]);
-    Integer numShards = cd.getCloudDescriptor().getNumShards();
-    if (numShards == null) { //XXX sys prop hack
-      log.info("numShards not found on descriptor - reading it from system property");
-      numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
-    }
-    
-    assert collection != null && collection.length() > 0;
-    
-    String shardId = cd.getCloudDescriptor().getShardId();
-    String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();    
-    // If the leader initiated recovery, then verify that this replica has performed
-    // recovery as requested before becoming active; don't even look at lirState if going down
-    if (!ZkStateReader.DOWN.equals(state)) {
-      String lirState = getLeaderInitiatedRecoveryState(collection, shardId, coreNodeName);
-      if (lirState != null) {
-        if (ZkStateReader.ACTIVE.equals(state)) {
-          // trying to become active, so leader-initiated state must be recovering
-          if (ZkStateReader.RECOVERING.equals(lirState)) {
-            updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.ACTIVE, null, true);
-          } else if (ZkStateReader.DOWN.equals(lirState)) {
-            throw new SolrException(ErrorCode.INVALID_STATE, 
-                "Cannot publish state of core '"+cd.getName()+"' as active without recovering first!");
-          }
-        } else if (ZkStateReader.RECOVERING.equals(state)) {
-          // if it is currently DOWN, then trying to enter into recovering state is good
-          if (ZkStateReader.DOWN.equals(lirState)) {
-            updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.RECOVERING, null, true);
+
+    Map previousMDCContext = MDC.getCopyOfContextMap();
+    MDCUtils.setCollection(collection);
+
+    try {
+      if (cd != null && cd.getName() != null)
+        MDCUtils.setCore(cd.getName());
+      log.info("publishing core={} state={} collection={}", cd.getName(), state, collection);
+      //System.out.println(Thread.currentThread().getStackTrace()[3]);
+      Integer numShards = cd.getCloudDescriptor().getNumShards();
+      if (numShards == null) { //XXX sys prop hack
+        log.info("numShards not found on descriptor - reading it from system property");
+        numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP);
+      }
+
+      assert collection != null && collection.length() > 0;
+
+      String shardId = cd.getCloudDescriptor().getShardId();
+      MDCUtils.setShard(shardId);
+      String coreNodeName = cd.getCloudDescriptor().getCoreNodeName();
+      // If the leader initiated recovery, then verify that this replica has performed
+      // recovery as requested before becoming active; don't even look at lirState if going down
+      if (!ZkStateReader.DOWN.equals(state)) {
+        String lirState = getLeaderInitiatedRecoveryState(collection, shardId, coreNodeName);
+        if (lirState != null) {
+          if (ZkStateReader.ACTIVE.equals(state)) {
+            // trying to become active, so leader-initiated state must be recovering
+            if (ZkStateReader.RECOVERING.equals(lirState)) {
+              updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.ACTIVE, null, true);
+            } else if (ZkStateReader.DOWN.equals(lirState)) {
+              throw new SolrException(ErrorCode.INVALID_STATE,
+                  "Cannot publish state of core '" + cd.getName() + "' as active without recovering first!");
+            }
+          } else if (ZkStateReader.RECOVERING.equals(state)) {
+            // if it is currently DOWN, then trying to enter into recovering state is good
+            if (ZkStateReader.DOWN.equals(lirState)) {
+              updateLeaderInitiatedRecoveryState(collection, shardId, coreNodeName, ZkStateReader.RECOVERING, null, true);
+            }
           }
         }
       }
-    }
-    
-    Map<String, Object> props = new HashMap<>();
-    props.put(Overseer.QUEUE_OPERATION, "state");
-    props.put(ZkStateReader.STATE_PROP, state);
-    props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
-    props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
-    props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles());
-    props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
-    props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
-    props.put(ZkStateReader.COLLECTION_PROP, collection);
-    if (numShards != null) {
-      props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
-    }
-    if (coreNodeName != null) {
-      props.put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
-    }
-    
-    if (ClusterStateUtil.isAutoAddReplicas(getZkStateReader(), collection)) { 
-      try (SolrCore core = cc.getCore(cd.getName())) {
-        if (core != null && core.getDirectoryFactory().isSharedStorage()) {
-          props.put("dataDir", core.getDataDir());
-          UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
-          if (ulog != null) {
-            props.put("ulogDir", ulog.getLogDir());
+
+      Map<String, Object> props = new HashMap<>();
+      props.put(Overseer.QUEUE_OPERATION, "state");
+      props.put(ZkStateReader.STATE_PROP, state);
+      props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
+      props.put(ZkStateReader.CORE_NAME_PROP, cd.getName());
+      props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles());
+      props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
+      props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId());
+      props.put(ZkStateReader.COLLECTION_PROP, collection);
+      if (numShards != null) {
+        props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString());
+      }
+      if (coreNodeName != null) {
+        props.put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName);
+      }
+
+      if (ClusterStateUtil.isAutoAddReplicas(getZkStateReader(), collection)) {
+        try (SolrCore core = cc.getCore(cd.getName())) {
+          if (core != null && core.getDirectoryFactory().isSharedStorage()) {
+            props.put("dataDir", core.getDataDir());
+            UpdateLog ulog = core.getUpdateHandler().getUpdateLog();
+            if (ulog != null) {
+              props.put("ulogDir", ulog.getLogDir());
+            }
           }
         }
       }
+
+      ZkNodeProps m = new ZkNodeProps(props);
+
+      if (updateLastState) {
+        cd.getCloudDescriptor().lastPublished = state;
+      }
+      overseerJobQueue.offer(ZkStateReader.toJSON(m));
+    } finally {
+      MDCUtils.cleanupMDC(previousMDCContext);
     }
-    
-    ZkNodeProps m = new ZkNodeProps(props);
-    
-    if (updateLastState) {
-      cd.getCloudDescriptor().lastPublished = state;
-    }
-    overseerJobQueue.offer(ZkStateReader.toJSON(m));
   }
   
   private boolean needsToBeAssignedShardId(final CoreDescriptor desc,



Mime
View raw message