This is an automated email from the ASF dual-hosted git repository.
ab pushed a commit to branch branch_8_7
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8_7 by this push:
new 6e5e247 SOLR-14948: Autoscaling maxComputeOperations override causes exceptions.
6e5e247 is described below
commit 6e5e247abcafd6a38590b2835fb60e851cf215db
Author: Andrzej Bialecki <ab@apache.org>
AuthorDate: Wed Oct 21 08:43:05 2020 +0200
SOLR-14948: Autoscaling maxComputeOperations override causes exceptions.
---
solr/CHANGES.txt | 2 +
.../solr/cloud/autoscaling/ComputePlanAction.java | 61 ++++++++++++++++---
.../cloud/autoscaling/ComputePlanActionTest.java | 71 +++++++++++++++++-----
3 files changed, 110 insertions(+), 24 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 28ae39b..3bfa47d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -118,6 +118,8 @@ Bug Fixes
* SOLR-14546: Fix for a relatively hard to hit issue in OverseerTaskProcessor that could
lead to out of order execution
of Collection API tasks competing for a lock (Ilan Ginzburg).
+* SOLR-14948: Autoscaling maxComputeOperations override causes exceptions. (ab)
+
================== 8.7.0 ==================
Consult the lucene/CHANGES.txt file for additional, low level, changes in this release.
diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
index 07cbb38..b76ddb0 100644
--- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
+++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java
@@ -53,6 +53,8 @@ import static org.apache.solr.cloud.autoscaling.TriggerEvent.NODE_NAMES;
public class ComputePlanAction extends TriggerActionBase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ public static final String DIAGNOSTICS = "__compute_diag__";
+
// accept all collections by default
Predicate<String> collectionsPredicate = s -> true;
@@ -129,8 +131,12 @@ public class ComputePlanAction extends TriggerActionBase {
int opCount = 0;
int opLimit = maxOperations;
if (requestedOperations > 0) {
+ log.debug("-- adjusting limit due to explicitly requested number of ops={}", requestedOperations);
opLimit = requestedOperations;
}
+ addDiagnostics(event, "maxOperations", maxOperations);
+ addDiagnostics(event, "requestedOperations", requestedOperations);
+ addDiagnostics(event, "opLimit", opLimit);
do {
// computing changes in large clusters may take a long time
if (Thread.currentThread().isInterrupted()) {
@@ -156,6 +162,8 @@ public class ComputePlanAction extends TriggerActionBase {
if (requestedOperations < 0) {
//uncomment the following to log zero operations
// PolicyHelper.logState(cloudManager, initialSuggester);
+ log.debug("-- no more operations suggested, stopping after {} ops...", (opCount
- 1));
+ addDiagnostics(event, "noSuggestionsStopAfter", (opCount - 1));
break;
} else {
log.info("Computed plan empty, remained {} requested ops to try.", opCount
- opLimit);
@@ -173,6 +181,10 @@ public class ComputePlanAction extends TriggerActionBase {
operations.add(operation);
return operations;
});
+ if (opCount >= opLimit) {
+ log.debug("-- reached limit of maxOps={}, stopping.", opLimit);
+ addDiagnostics(event, "opLimitReached", true);
+ }
} while (opCount < opLimit);
} finally {
releasePolicySession(sessionWrapper, session);
@@ -189,6 +201,14 @@ public class ComputePlanAction extends TriggerActionBase {
}
+ private void addDiagnostics(TriggerEvent event, String key, Object value) {
+ if (log.isDebugEnabled()) {
+ Map<String, Object> diag = (Map<String, Object>) event.getProperties()
+ .computeIfAbsent(DIAGNOSTICS, n -> new HashMap<>());
+ diag.put(key, value);
+ }
+ }
+
protected int getMaxNumOps(TriggerEvent event, AutoScalingConfig autoScalingConfig, ClusterState
clusterState) {
// estimate a maximum default limit that should be sufficient for most purposes:
// number of nodes * total number of replicas * 3
@@ -205,14 +225,26 @@ public class ComputePlanAction extends TriggerActionBase {
totalRF.addAndGet(rf * coll.getSlices().size());
});
int totalMax = clusterState.getLiveNodes().size() * totalRF.get() * 3;
- int maxOp = (Integer) autoScalingConfig.getProperties().getOrDefault(AutoScalingParams.MAX_COMPUTE_OPERATIONS,
totalMax);
+ addDiagnostics(event, "estimatedMaxOps", totalMax);
+ int maxOp = ((Number) autoScalingConfig.getProperties().getOrDefault(AutoScalingParams.MAX_COMPUTE_OPERATIONS,
totalMax)).intValue();
Object o = event.getProperty(AutoScalingParams.MAX_COMPUTE_OPERATIONS, maxOp);
- try {
- return Integer.parseInt(String.valueOf(o));
- } catch (Exception e) {
- log.warn("Invalid '{}' event property: {}, using default {}", AutoScalingParams.MAX_COMPUTE_OPERATIONS,
o, maxOp);
- return maxOp;
+ if (o != null) {
+ try {
+ maxOp = Integer.parseInt(String.valueOf(o));
+ } catch (Exception e) {
+ log.warn("Invalid '{}' event property: {}, using default {}", AutoScalingParams.MAX_COMPUTE_OPERATIONS,
o, maxOp);
+ }
}
+ if (maxOp < 0) {
+ // unlimited
+ maxOp = Integer.MAX_VALUE;
+ } else if (maxOp < 1) {
+ // try at least one operation
+ log.debug("-- estimated maxOp={}, resetting to 1...", maxOp);
+ maxOp = 1;
+ }
+ log.debug("-- estimated total max ops={}, effective maxOps={}", totalMax, maxOp);
+ return maxOp;
}
protected int getRequestedNumOps(TriggerEvent event) {
@@ -278,19 +310,27 @@ public class ComputePlanAction extends TriggerActionBase {
case MOVEREPLICA:
Suggester s = session.getSuggester(action)
.hint(Suggester.Hint.SRC_NODE, event.getProperty(NODE_NAMES));
- if (applyCollectionHints(cloudManager, s) == 0) return NoneSuggester.get(session);
+ if (applyCollectionHints(cloudManager, s) == 0) {
+ addDiagnostics(event, "noRelevantCollections", true);
+ return NoneSuggester.get(session);
+ }
return s;
case DELETENODE:
int start = (Integer)event.getProperty(START, 0);
@SuppressWarnings({"unchecked"})
List<String> srcNodes = (List<String>) event.getProperty(NODE_NAMES);
if (srcNodes.isEmpty() || start >= srcNodes.size()) {
+ addDiagnostics(event, "noSourceNodes", true);
return NoneSuggester.get(session);
}
String sourceNode = srcNodes.get(start);
s = session.getSuggester(action)
.hint(Suggester.Hint.SRC_NODE, event.getProperty(NODE_NAMES));
- if (applyCollectionHints(cloudManager, s) == 0) return NoneSuggester.get(session);
+ if (applyCollectionHints(cloudManager, s) == 0) {
+ log.debug("-- no relevant collections on {}, no operations computed.", srcNodes);
+ addDiagnostics(event, "noRelevantCollections", true);
+ return NoneSuggester.get(session);
+ }
s.hint(Suggester.Hint.SRC_NODE, Collections.singletonList(sourceNode));
event.getProperties().put(START, ++start);
return s;
@@ -342,11 +382,16 @@ public class ComputePlanAction extends TriggerActionBase {
.forEach(collShards::add);
}
});
+ log.debug("-- NODE_ADDED: ADDREPLICA suggester configured with {} collection/shard
hints.", collShards.size());
+ addDiagnostics(event, "relevantCollShard", collShards);
suggester.hint(Suggester.Hint.COLL_SHARD, collShards);
suggester.hint(Suggester.Hint.REPLICATYPE, replicaType);
break;
case MOVEREPLICA:
+ log.debug("-- NODE_ADDED event specified MOVEREPLICA - no hints added.");
+ break;
case NONE:
+ log.debug("-- NODE_ADDED event specified NONE - no operations suggested.");
break;
default:
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
index 2526292..e7a317d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/ComputePlanActionTest.java
@@ -583,7 +583,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
int numShards = 1;
int numCollections = 5;
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
null);
}
@Test
@@ -592,7 +592,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
int numShards = 1;
int numCollections = 5;
- nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(collectionNamePrefix, numShards,
numCollections);
+ nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(collectionNamePrefix, numShards,
numCollections, null);
}
@Test
@@ -602,9 +602,19 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
int numShards = 2;
int numCollections = 5;
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
null);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int
numShards, int numCollections) throws Exception {
+
+ @Test
+ public void testNodeAddedTriggerWithAddReplicaPreferredOp_2Shard_OpLimit() throws Exception
{
+ String collectionNamePrefix = "testNodeAddedTriggerWithAddReplicaPreferredOp_2Shard";
+ int numShards = 2;
+ int numCollections = 5;
+
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
1);
+ }
+
+ private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int
numShards, int numCollections, Integer maxOps) throws Exception {
String setTriggerCommand = "{" +
"'set-trigger' : {" +
"'name' : 'node_added_trigger'," +
@@ -624,10 +634,10 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
" ]" +
"}";
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
setTriggerCommand, setClusterPolicyCommand);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
setTriggerCommand, setClusterPolicyCommand, maxOps);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(String collectionNamePrefix,
int numShards, int numCollections) throws Exception {
+ private void nodeAddedTriggerWithAddReplicaPreferredOpReplicaType(String collectionNamePrefix,
int numShards, int numCollections, Integer maxOps) throws Exception {
String setTriggerCommand = "{" +
"'set-trigger' : {" +
"'name' : 'node_added_trigger'," +
@@ -648,13 +658,15 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
" ]" +
"}";
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
setTriggerCommand, setClusterPolicyCommand, 0, 1, 0);
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
setTriggerCommand, setClusterPolicyCommand, maxOps, 0, 1, 0);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int
numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand) throws
Exception {
- nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
setTriggerCommand, setClusterPolicyCommand, 1, null, null);
+ private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int
numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand, Integer
maxOps) throws Exception {
+ nodeAddedTriggerWithAddReplicaPreferredOp(collectionNamePrefix, numShards, numCollections,
setTriggerCommand, setClusterPolicyCommand, maxOps, 1, null, null);
}
- private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int
numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand, Integer
nNrtReplicas, Integer nTlogReplicas, Integer nPullReplicas) throws Exception {
+ private void nodeAddedTriggerWithAddReplicaPreferredOp(String collectionNamePrefix, int
numShards, int numCollections, String setTriggerCommand, String setClusterPolicyCommand,
+ Integer maxOps,
+ Integer nNrtReplicas, Integer nTlogReplicas,
Integer nPullReplicas) throws Exception {
CloudSolrClient solrClient = cluster.getSolrClient();
@SuppressWarnings({"rawtypes"})
SolrRequest req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setTriggerCommand);
@@ -665,6 +677,16 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
response = solrClient.request(req);
assertEquals(response.get("result").toString(), "success");
+ if (maxOps != null) {
+ String setMaxOpsCommand = "{" +
+ " 'set-properties': {" +
+ " 'maxComputeOperations': " + maxOps +
+ " }" +
+ "}";
+ req = AutoScalingRequest.create(SolrRequest.METHOD.POST, setMaxOpsCommand);
+ response = solrClient.request(req);
+ assertEquals(response.get("result").toString(), "success");
+ }
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionNamePrefix
+ "_0",
"conf", numShards, nNrtReplicas, nTlogReplicas, nPullReplicas).setMaxShardsPerNode(2);
@@ -683,7 +705,13 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
@SuppressWarnings({"rawtypes"})
List operations = (List) actionContext.get("operations");
assertNotNull(operations);
- assertEquals(numShards, operations.size());
+ int numExpectedOps;
+ if (maxOps != null && maxOps > 0) {
+ numExpectedOps = maxOps;
+ } else {
+ numExpectedOps = numShards;
+ }
+ assertEquals(numExpectedOps, operations.size());
Set<String> affectedShards = new HashSet<>(2);
for (Object operation : operations) {
assertTrue(operation instanceof CollectionAdminRequest.AddReplica);
@@ -692,7 +720,7 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
assertEquals(collectionNamePrefix + "_0", addReplica.getCollection());
affectedShards.add(addReplica.getShard());
}
- assertEquals(numShards, affectedShards.size());
+ assertEquals(numExpectedOps, affectedShards.size());
for (int i = 1; i < numCollections; i++) {
create = CollectionAdminRequest.createCollection(collectionNamePrefix + "_" + i,
@@ -712,7 +740,12 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
actionContext = actionContextPropsRef.get();
operations = (List) actionContext.get("operations");
assertNotNull(operations);
- assertEquals(numCollections * numShards, operations.size());
+ if (maxOps != null && maxOps > 0) {
+ numExpectedOps = maxOps;
+ } else {
+ numExpectedOps = numCollections * numShards;
+ }
+ assertEquals(numExpectedOps, operations.size());
Set<String> affectedCollections = new HashSet<>(numCollections);
affectedShards = new HashSet<>(numShards);
Set<Pair<String, String>> affectedCollShards = new HashSet<>(numCollections
* numShards);
@@ -724,9 +757,15 @@ public class ComputePlanActionTest extends SolrCloudTestCase {
affectedShards.add(addReplica.getShard());
affectedCollShards.add(new Pair<>(addReplica.getCollection(), addReplica.getShard()));
}
- assertEquals(numCollections, affectedCollections.size());
- assertEquals(numShards, affectedShards.size());
- assertEquals(numCollections * numShards, affectedCollShards.size());
+ if (maxOps != null && maxOps > 0) {
+ assertEquals(numExpectedOps, affectedCollections.size());
+ assertEquals(numExpectedOps, affectedShards.size());
+ assertEquals(numExpectedOps, affectedCollShards.size());
+ } else {
+ assertEquals(numCollections, affectedCollections.size());
+ assertEquals(numShards, affectedShards.size());
+ assertEquals(numCollections * numShards, affectedCollShards.size());
+ }
}
@Test
|