From pr-return-1620-archive-asf-public=cust-asf.ponee.io@cassandra.apache.org Fri Nov 30 20:15:21 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id A225F180671 for ; Fri, 30 Nov 2018 20:15:20 +0100 (CET) Received: (qmail 89812 invoked by uid 500); 30 Nov 2018 19:15:19 -0000 Mailing-List: contact pr-help@cassandra.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: pr@cassandra.apache.org Delivered-To: mailing list pr@cassandra.apache.org Received: (qmail 89800 invoked by uid 99); 30 Nov 2018 19:15:19 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 30 Nov 2018 19:15:19 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 14B5EE0BCF; Fri, 30 Nov 2018 19:15:19 +0000 (UTC) From: jolynch To: pr@cassandra.apache.org Reply-To: pr@cassandra.apache.org References: In-Reply-To: Subject: [GitHub] cassandra pull request #283: CASSANDRA-14459: DynamicEndpointSnitch should n... Content-Type: text/plain Message-Id: <20181130191519.14B5EE0BCF@git1-us-west.apache.org> Date: Fri, 30 Nov 2018 19:15:19 +0000 (UTC) Github user jolynch commented on a diff in the pull request: https://github.com/apache/cassandra/pull/283#discussion_r237971740 --- Diff: src/java/org/apache/cassandra/locator/DynamicEndpointSnitch.java --- @@ -154,31 +326,203 @@ private void registerMBean() public void close() { - updateSchedular.cancel(false); - resetSchedular.cancel(false); + if (updateScoresScheduler != null) + updateScoresScheduler.cancel(false); + if (updateSamplesScheduler != null) + updateSamplesScheduler.cancel(false); + + for (AnnotatedMeasurement measurement : samples.values()) + { + if (measurement.probeFuture != null) + measurement.probeFuture.cancel(false); + + measurement.millisSinceLastMeasure.set(0); + measurement.millisSinceLastRequest.set(MAX_PROBE_INTERVAL_MS); + measurement.probeTimerMillis = 0; + } MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); try { - mbs.unregisterMBean(new ObjectName(mbeanName)); + if (mbeanRegistered) + mbs.unregisterMBean(new ObjectName(mbeanName)); } catch (Exception e) { throw new RuntimeException(e); } } + /** + * Background task running on the samples dictionary. The default implementation sends latency probes (PING) + * messages to explore nodes that we have not received timings for recently but have ranked in + * {@link DynamicEndpointSnitch#sortedByProximity(InetAddressAndPort, ReplicaCollection)}. + */ + protected void updateSamples() + { + // Split calculation of probe timers from sending probes for testability + calculateProbes(samples, dynamicLatencyProbeInterval); + + if (!StorageService.instance.isGossipActive()) + return; + + schedulePings(samples); + } + + /** + * This method mutates the passed AnnotatedMeasurements to implement capped exponential backoff per endpoint. + * + * The algorithm is as follows: + * 1. All samples get their millisSinceLastMeasure and millisSinceLastRequest fields + * incremented by the passed interval + * 2. Any recently requested (ranked) endpoints that have not been measured recently (e.g. because the snitch + * has sent them no traffic) get probes with exponential backoff. + * + * The backoff is capped at MAX_PROBE_INTERVAL_MS. Furthermore the probes are stopped after + * MAX_PROBE_INTERVAL_MS of no ranking requests as well. + * + * At the end of this method, any passed AnnotatedMeasurements that need latency probes will have non zero + * probeTimerMillis members set. + */ + @VisibleForTesting + static void calculateProbes(Map samples, long intervalMillis) { + for (Map.Entry entry: samples.entrySet()) + { + if (entry.getKey().equals(FBUtilities.getBroadcastAddressAndPort())) + continue; + + AnnotatedMeasurement measurement = entry.getValue(); + long lastMeasure = measurement.millisSinceLastMeasure.getAndAdd(intervalMillis); --- End diff -- The messaging rate is so low let's punt on the metric for now? I'm happy to add it as a follow up patch but I'd like to get the pluggability and probes in first. --- --------------------------------------------------------------------- To unsubscribe, e-mail: pr-unsubscribe@cassandra.apache.org For additional commands, e-mail: pr-help@cassandra.apache.org