From dev-return-60348-archive-asf-public=cust-asf.ponee.io@storm.apache.org Thu Aug 1 16:32:32 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 83823180644 for ; Thu, 1 Aug 2019 18:32:32 +0200 (CEST) Received: (qmail 80227 invoked by uid 500); 1 Aug 2019 16:32:31 -0000 Mailing-List: contact dev-help@storm.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@storm.apache.org Delivered-To: mailing list dev@storm.apache.org Received: (qmail 80216 invoked by uid 99); 1 Aug 2019 16:32:31 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 01 Aug 2019 16:32:31 +0000 From: GitBox To: dev@storm.apache.org Subject: [GitHub] [storm] dandsager1 commented on a change in pull request #3092: STORM-3474 Large fragmented cluster scheduling time test Message-ID: <156467715158.21993.6544995040564794035.gitbox@gitbox.apache.org> Date: Thu, 01 Aug 2019 16:32:31 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit dandsager1 commented on a change in pull request #3092: STORM-3474 Large fragmented cluster scheduling time test URL: https://github.com/apache/storm/pull/3092#discussion_r309789628 ########## File path: storm-server/src/test/java/org/apache/storm/scheduler/resource/TestResourceAwareScheduler.java ########## @@ -1067,6 +1068,208 @@ public void minCpuWorkerSplitFails() { assertFalse("Topo-1 unscheduled?", cluster.getAssignmentById(topo1.getId()) != null); } + protected static class TimeBlockResult { + long firstBlockTime; + long lastBlockTime; + } + + private long getMedianBlockTime(TimeBlockResult[] runResults, boolean firstBlock) { + final int numRuns = runResults.length; + assert(numRuns % 2 == 1); // number of runs must be odd to compute median as below + long[] times = new long[numRuns]; + for (int i = 0; i < numRuns; ++i) { + times[i] = firstBlock ? runResults[i].firstBlockTime : runResults[i].lastBlockTime; + } + Arrays.sort(times); + + final int medianIndex = (int) Math.floor(numRuns / 2); + return times[medianIndex]; + } + + /** + * Check time to schedule a fragmented cluster using different strategies + * + * Simulate scheduling on a large production cluster. Find the ratio of time to schedule a set of topologies when + * the cluster is empty and when the cluster is nearly full. While the cluster has sufficient resources to schedule + * all topologies, when nearly full the cluster becomes fragmented and some topologies fail to schedule. + */ + @Test + public void TestLargeFragmentedClusterScheduling() { + /* + Without fragmentation, the cluster would be able to schedule both topologies on each node. Let's call each node + with both topologies scheduled as 100% scheduled. + + We schedule the cluster in 3 blocks of topologies, measuring the time to schedule the blocks. The first, middle + and last blocks attempt to schedule the following 0-10%, 10%-90%, 90%-100%. The last block has a number of + scheduling failures due to cluster fragmentation and its time is dominated by attempting to evict topologies. + + Timing results for scheduling are noisy. As a result, we do multiple runs and use median values for FirstBlock + and LastBlock times. (somewhere a statistician is crying). The ratio of LastBlock / FirstBlock remains fairly constant. + + + TestLargeFragmentedClusterScheduling took 91118 ms + DefaultResourceAwareStrategy, FirstBlock 249.0, LastBlock 1734.0 ratio 6.963855421686747 + GenericResourceAwareStrategy, FirstBlock 215.0, LastBlock 1673.0 ratio 7.78139534883721 + ConstraintSolverStrategy, FirstBlock 279.0, LastBlock 2200.0 ratio 7.885304659498208 + + TestLargeFragmentedClusterScheduling took 98455 ms + DefaultResourceAwareStrategy, FirstBlock 266.0, LastBlock 1812.0 ratio 6.81203007518797 + GenericResourceAwareStrategy, FirstBlock 235.0, LastBlock 1802.0 ratio 7.6680851063829785 + ConstraintSolverStrategy, FirstBlock 304.0, LastBlock 2320.0 ratio 7.631578947368421 + + TestLargeFragmentedClusterScheduling took 97268 ms + DefaultResourceAwareStrategy, FirstBlock 251.0, LastBlock 1826.0 ratio 7.274900398406374 + GenericResourceAwareStrategy, FirstBlock 220.0, LastBlock 1719.0 ratio 7.8136363636363635 + ConstraintSolverStrategy, FirstBlock 296.0, LastBlock 2469.0 ratio 8.341216216216216 + + TestLargeFragmentedClusterScheduling took 97963 ms + DefaultResourceAwareStrategy, FirstBlock 249.0, LastBlock 1788.0 ratio 7.180722891566265 + GenericResourceAwareStrategy, FirstBlock 240.0, LastBlock 1796.0 ratio 7.483333333333333 + ConstraintSolverStrategy, FirstBlock 328.0, LastBlock 2544.0 ratio 7.7560975609756095 + + TestLargeFragmentedClusterScheduling took 93106 ms + DefaultResourceAwareStrategy, FirstBlock 258.0, LastBlock 1714.0 ratio 6.6434108527131785 + GenericResourceAwareStrategy, FirstBlock 215.0, LastBlock 1692.0 ratio 7.869767441860465 + ConstraintSolverStrategy, FirstBlock 309.0, LastBlock 2342.0 ratio 7.5792880258899675 + + Choose the median value of the values above + DefaultResourceAwareStrategy 6.96 + GenericResourceAwareStrategy 7.78 + ConstraintSolverStrategy 7.75 + */ + + final int numNodes = 500; + final String[] strategies = new String[]{ + DefaultResourceAwareStrategy.class.getName(), + GenericResourceAwareStrategy.class.getName(), + ConstraintSolverStrategy.class.getName() + }; + + final int numStrategies = strategies.length; + final int numRuns = 5; + TimeBlockResult testResults[][] = new TimeBlockResult[numStrategies][numRuns]; + + // Get first and last block times for multiple runs and strategies + long startTime = Time.currentTimeMillis(); + for (int strategyIdx = 0; strategyIdx < numStrategies; ++strategyIdx) { + String strategy = strategies[strategyIdx]; + + for (int run = 0; run < numRuns; ++run) { + testResults[strategyIdx][run] = testLargeClusterSchedulingTiming(numNodes, strategy); + } + } + + // Log median ratios for different strategies + LOG.info("TestLargeFragmentedClusterScheduling took {} ms", Time.currentTimeMillis() - startTime); + for (int strategyIdx = 0; strategyIdx < numStrategies; ++strategyIdx) { + double medianFirstBlockTime = getMedianBlockTime(testResults[strategyIdx], true); + double medianLastBlockTime = getMedianBlockTime(testResults[strategyIdx], false); + double ratio = medianLastBlockTime / medianFirstBlockTime; + LOG.info("{}, FirstBlock {}, LastBlock {} ratio {}", strategies[strategyIdx], medianFirstBlockTime, medianLastBlockTime, ratio); + } + + // Check last block scheduling time does not get significantly slower + final double[] acceptedStrategyTimeRatios = {6.96, 7.78, 7.75}; + for (int strategyIdx = 0; strategyIdx < numStrategies; ++strategyIdx) { + double medianFirstBlockTime = getMedianBlockTime(testResults[strategyIdx], true); + double medianLastBlockTime = getMedianBlockTime(testResults[strategyIdx], false); + double ratio = medianLastBlockTime / medianFirstBlockTime; + + double slowSchedulingThreshold = 1.5; + assert(ratio < slowSchedulingThreshold * acceptedStrategyTimeRatios[strategyIdx]); + } + } + + // Create multiple copies of a test topology + private void addTopologyBlockToMap(Map topologyMap, String baseName, Config config, + double spoutMemoryLoad, int[] blockIndices) { + TopologyBuilder builder = new TopologyBuilder(); // a topology with multiple spouts + builder.setSpout("testSpout", new TestSpout(), 1).setMemoryLoad(spoutMemoryLoad); + StormTopology stormTopology = builder.createTopology(); + Map executorMap = genExecsAndComps(stormTopology); + + for (int i = blockIndices[0]; i <= blockIndices[1]; ++i) { + TopologyDetails topo = new TopologyDetails(baseName + i, config, stormTopology, 0, executorMap, 0, "user"); + topologyMap.put(topo.getId(), topo); + } + } + + /* + * Test time to schedule large cluster scheduling with fragmentation + */ + private TimeBlockResult testLargeClusterSchedulingTiming(int numNodes, String Strategy) { + Config config = null; + if (Strategy.equals(DefaultResourceAwareStrategy.class.getName())) { + config = createClusterConfig(10, 10, 0, null); Review comment: I don't know how this would be structured. I can imagine an interface class and then 3 derived classes, each with its own createClusterConfig method, but it sounds like you have something different in mind? As a recent convert from C++, I think of using function pointers, but that concept is not in Java. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services