Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 60D0B200C45 for ; Tue, 28 Mar 2017 23:25:39 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 5F923160B9C; Tue, 28 Mar 2017 21:25:39 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 7DE8B160B9B for ; Tue, 28 Mar 2017 23:25:38 +0200 (CEST) Received: (qmail 23505 invoked by uid 500); 28 Mar 2017 21:25:26 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 22075 invoked by uid 99); 28 Mar 2017 21:25:25 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 28 Mar 2017 21:25:25 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id BA8E2E0779; Tue, 28 Mar 2017 21:25:25 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: inigoiri@apache.org To: common-commits@hadoop.apache.org Date: Tue, 28 Mar 2017 21:26:12 -0000 Message-Id: <0796f7dd085c41e3a0d97cb5297a76a7@git.apache.org> In-Reply-To: <12443666a4294d309c5510a561d49742@git.apache.org> References: <12443666a4294d309c5510a561d49742@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [49/50] [abbrv] hadoop git commit: HDFS-11577. Combine the old and the new chooseRandom for better performance. Contributed by Chen Liang. archived-at: Tue, 28 Mar 2017 21:25:39 -0000 HDFS-11577. Combine the old and the new chooseRandom for better performance. Contributed by Chen Liang. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/6b093364 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/6b093364 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/6b093364 Branch: refs/heads/HDFS-10467 Commit: 6b0933643835d7696ced011cfdb8b74f63022e8b Parents: fdf8f8e Author: Yiqun Lin Authored: Tue Mar 28 23:02:07 2017 +0800 Committer: Yiqun Lin Committed: Tue Mar 28 23:02:07 2017 +0800 ---------------------------------------------------------------------- .../org/apache/hadoop/net/NetworkTopology.java | 2 +- .../hadoop/hdfs/net/DFSNetworkTopology.java | 76 +++++++++++++++++--- .../hadoop/hdfs/net/TestDFSNetworkTopology.java | 57 +++++++++++++++ 3 files changed, 126 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/6b093364/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java index 051012b..f8cecdd 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetworkTopology.java @@ -496,7 +496,7 @@ public class NetworkTopology { } } - private Node chooseRandom(final String scope, String excludedScope, + protected Node chooseRandom(final String scope, String excludedScope, final Collection excludedNodes) { if (excludedScope != null) { if (scope.startsWith(excludedScope)) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/6b093364/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java index ee83dba..259e275 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/net/DFSNetworkTopology.java @@ -36,7 +36,6 @@ import java.util.Random; * remaining parts should be the same. * * Currently a placeholder to test storage type info. - * TODO : add "chooseRandom with storageType info" function. */ public class DFSNetworkTopology extends NetworkTopology { @@ -56,6 +55,7 @@ public class DFSNetworkTopology extends NetworkTopology { * * @param scope range of nodes from which a node will be chosen * @param excludedNodes nodes to be excluded from + * @param type the storage type we search for * @return the chosen node */ public Node chooseRandomWithStorageType(final String scope, @@ -75,6 +75,69 @@ public class DFSNetworkTopology extends NetworkTopology { } /** + * Randomly choose one node from scope with the given storage type. + * + * If scope starts with ~, choose one from the all nodes except for the + * ones in scope; otherwise, choose one from scope. + * If excludedNodes is given, choose a node that's not in excludedNodes. + * + * This call would make up to two calls. It first tries to get a random node + * (with old method) and check if it satisfies. If yes, simply return it. + * Otherwise, it make a second call (with the new method) by passing in a + * storage type. + * + * This is for better performance reason. Put in short, the key note is that + * the old method is faster but may take several runs, while the new method + * is somewhat slower, and always succeed in one trial. + * See HDFS-11535 for more detail. + * + * @param scope range of nodes from which a node will be chosen + * @param excludedNodes nodes to be excluded from + * @param type the storage type we search for + * @return the chosen node + */ + public Node chooseRandomWithStorageTypeTwoTrial(final String scope, + final Collection excludedNodes, StorageType type) { + netlock.readLock().lock(); + try { + String searchScope; + String excludedScope; + if (scope.startsWith("~")) { + searchScope = NodeBase.ROOT; + excludedScope = scope.substring(1); + } else { + searchScope = scope; + excludedScope = null; + } + // next do a two-trial search + // first trial, call the old method, inherited from NetworkTopology + Node n = chooseRandom(searchScope, excludedScope, excludedNodes); + if (n == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("No node to choose."); + } + // this means there is simply no node to choose from + return null; + } + Preconditions.checkArgument(n instanceof DatanodeDescriptor); + DatanodeDescriptor dnDescriptor = (DatanodeDescriptor)n; + + if (dnDescriptor.hasStorageType(type)) { + // the first trial succeeded, just return + return dnDescriptor; + } else { + // otherwise, make the second trial by calling the new method + LOG.debug("First trial failed, node has no type {}, " + + "making second trial carrying this type", type); + return chooseRandomWithStorageType(searchScope, excludedScope, + excludedNodes, type); + } + } finally { + netlock.readLock().unlock(); + } + } + + /** * Choose a random node based on given scope, excludedScope and excludedNodes * set. Although in general the topology has at most three layers, this class * will not impose such assumption. @@ -99,13 +162,10 @@ public class DFSNetworkTopology extends NetworkTopology { * all it's ancestors' storage counters accordingly, this way the excluded * root is out of the picture. * - * TODO : this function has duplicate code as NetworkTopology, need to - * refactor in the future. - * - * @param scope - * @param excludedScope - * @param excludedNodes - * @return + * @param scope the scope where we look for node. + * @param excludedScope the scope where the node must NOT be from. + * @param excludedNodes the returned node must not be in this set + * @return a node with required storage type */ @VisibleForTesting Node chooseRandomWithStorageType(final String scope, http://git-wip-us.apache.org/repos/asf/hadoop/blob/6b093364/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java ---------------------------------------------------------------------- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java index 30ef2ac..26d96b2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/net/TestDFSNetworkTopology.java @@ -508,4 +508,61 @@ public class TestDFSNetworkTopology { assertEquals(1, innerl2d3r3.getSubtreeStorageCount(StorageType.DISK)); } + + @Test + public void testChooseRandomWithStorageTypeTwoTrial() throws Exception { + Node n; + DatanodeDescriptor dd; + n = CLUSTER.chooseRandomWithStorageType("/l2/d3/r4", null, null, + StorageType.ARCHIVE); + HashSet excluded = new HashSet<>(); + // exclude the host on r4 (since there is only one host, no randomness here) + excluded.add(n); + + // search with given scope being desired scope + for (int i = 0; i<10; i++) { + n = CLUSTER.chooseRandomWithStorageTypeTwoTrial( + "/l2/d3", null, StorageType.ARCHIVE); + assertTrue(n instanceof DatanodeDescriptor); + dd = (DatanodeDescriptor) n; + assertTrue(dd.getHostName().equals("host12") || + dd.getHostName().equals("host13")); + } + + for (int i = 0; i<10; i++) { + n = CLUSTER.chooseRandomWithStorageTypeTwoTrial( + "/l2/d3", excluded, StorageType.ARCHIVE); + assertTrue(n instanceof DatanodeDescriptor); + dd = (DatanodeDescriptor) n; + assertTrue(dd.getHostName().equals("host13")); + } + + // search with given scope being exclude scope + + // a total of 4 ramdisk nodes: + // /l1/d2/r3/host7, /l2/d3/r2/host10, /l2/d4/r1/host7 and /l2/d4/r1/host10 + // so if we exclude /l2/d4/r1, if should be always either host7 or host10 + for (int i = 0; i<10; i++) { + n = CLUSTER.chooseRandomWithStorageTypeTwoTrial( + "~/l2/d4", null, StorageType.RAM_DISK); + assertTrue(n instanceof DatanodeDescriptor); + dd = (DatanodeDescriptor) n; + assertTrue(dd.getHostName().equals("host7") || + dd.getHostName().equals("host10")); + } + + // similar to above, except that we also exclude host10 here. so it should + // always be host7 + n = CLUSTER.chooseRandomWithStorageType("/l2/d3/r2", null, null, + StorageType.RAM_DISK); + // add host10 to exclude + excluded.add(n); + for (int i = 0; i<10; i++) { + n = CLUSTER.chooseRandomWithStorageTypeTwoTrial( + "~/l2/d4", excluded, StorageType.RAM_DISK); + assertTrue(n instanceof DatanodeDescriptor); + dd = (DatanodeDescriptor) n; + assertTrue(dd.getHostName().equals("host7")); + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org