Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id A93ED200C2F for ; Mon, 6 Mar 2017 19:18:24 +0100 (CET) Received: by cust-asf.ponee.io (Postfix) id A75B1160B87; Mon, 6 Mar 2017 18:18:24 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id AC435160B81 for ; Mon, 6 Mar 2017 19:18:23 +0100 (CET) Received: (qmail 85589 invoked by uid 500); 6 Mar 2017 18:18:22 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 85443 invoked by uid 99); 6 Mar 2017 18:18:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 06 Mar 2017 18:18:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 42495DFF09; Mon, 6 Mar 2017 18:18:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jhung@apache.org To: common-commits@hadoop.apache.org Date: Mon, 06 Mar 2017 18:18:23 -0000 Message-Id: <64af1a8d361c46c2abc7d1268d0c2da3@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [03/21] hadoop git commit: HADOOP-6801. io.sort.mb and io.sort.factor were renamed and moved to mapreduce but are still in CommonConfigurationKeysPublic.java and used in SequenceFile.java. archived-at: Mon, 06 Mar 2017 18:18:24 -0000 HADOOP-6801. io.sort.mb and io.sort.factor were renamed and moved to mapreduce but are still in CommonConfigurationKeysPublic.java and used in SequenceFile.java. This closes #146 Signed-off-by: Akira Ajisaka Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/eb5a1795 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/eb5a1795 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/eb5a1795 Branch: refs/heads/YARN-5734 Commit: eb5a17954a758fdb1f3f29ef34e129d5f37d3a26 Parents: 19a1fc6 Author: Harsh J Authored: Wed Oct 26 13:21:51 2016 +0530 Committer: Akira Ajisaka Committed: Fri Mar 3 18:38:04 2017 +0900 ---------------------------------------------------------------------- .../fs/CommonConfigurationKeysPublic.java | 29 ++++++++- .../java/org/apache/hadoop/io/SequenceFile.java | 20 +++++- .../src/main/resources/core-default.xml | 19 ++++++ .../org/apache/hadoop/io/TestSequenceFile.java | 68 +++++++++++++++++++- 4 files changed, 131 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/eb5a1795/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index e1feda1..6b267dc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -250,18 +250,43 @@ public class CommonConfigurationKeysPublic { * @deprecated Moved to mapreduce, see mapreduce.task.io.sort.mb * in mapred-default.xml * See https://issues.apache.org/jira/browse/HADOOP-6801 + * + * For {@link org.apache.hadoop.io.SequenceFile.Sorter} control + * instead, see {@link #SEQ_IO_SORT_MB_KEY}. */ public static final String IO_SORT_MB_KEY = "io.sort.mb"; - /** Default value for IO_SORT_MB_DEFAULT */ + /** Default value for {@link #IO_SORT_MB_KEY}. */ public static final int IO_SORT_MB_DEFAULT = 100; /** * @deprecated Moved to mapreduce, see mapreduce.task.io.sort.factor * in mapred-default.xml * See https://issues.apache.org/jira/browse/HADOOP-6801 + * + * For {@link org.apache.hadoop.io.SequenceFile.Sorter} control + * instead, see {@link #SEQ_IO_SORT_FACTOR_KEY}. */ public static final String IO_SORT_FACTOR_KEY = "io.sort.factor"; - /** Default value for IO_SORT_FACTOR_DEFAULT */ + /** Default value for {@link #IO_SORT_FACTOR_KEY}. */ public static final int IO_SORT_FACTOR_DEFAULT = 100; + + /** + * @see + * + * core-default.xml + */ + public static final String SEQ_IO_SORT_MB_KEY = "seq.io.sort.mb"; + /** Default value for {@link #SEQ_IO_SORT_MB_KEY}. */ + public static final int SEQ_IO_SORT_MB_DEFAULT = 100; + + /** + * @see + * + * core-default.xml + */ + public static final String SEQ_IO_SORT_FACTOR_KEY = "seq.io.sort.factor"; + /** Default value for {@link #SEQ_IO_SORT_FACTOR_KEY}. */ + public static final int SEQ_IO_SORT_FACTOR_DEFAULT = 100; + /** * @see * http://git-wip-us.apache.org/repos/asf/hadoop/blob/eb5a1795/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index c510ff7..de0bf4f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -2816,14 +2816,30 @@ public class SequenceFile { } /** Sort and merge using an arbitrary {@link RawComparator}. */ + @SuppressWarnings("deprecation") public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf, Metadata metadata) { this.fs = fs; this.comparator = comparator; this.keyClass = keyClass; this.valClass = valClass; - this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024; - this.factor = conf.getInt("io.sort.factor", 100); + // Remember to fall-back on the deprecated MB and Factor keys + // until they are removed away permanently. + if (conf.get(CommonConfigurationKeys.IO_SORT_MB_KEY) != null) { + this.memory = conf.getInt(CommonConfigurationKeys.IO_SORT_MB_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_MB_DEFAULT) * 1024 * 1024; + } else { + this.memory = conf.getInt(CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_MB_DEFAULT) * 1024 * 1024; + } + if (conf.get(CommonConfigurationKeys.IO_SORT_FACTOR_KEY) != null) { + this.factor = conf.getInt(CommonConfigurationKeys.IO_SORT_FACTOR_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_DEFAULT); + } else { + this.factor = conf.getInt( + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_DEFAULT); + } this.conf = conf; this.metadata = metadata; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/eb5a1795/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 08ca05b..35be56b 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -2476,4 +2476,23 @@ in audit logs. + + + seq.io.sort.mb + 100 + + The total amount of buffer memory to use while sorting files, + while using SequenceFile.Sorter, in megabytes. By default, + gives each merge stream 1MB, which should minimize seeks. + + + + seq.io.sort.factor + 100 + + The number of streams to merge at once while sorting + files using SequenceFile.Sorter. + This determines the number of open file handles. + + http://git-wip-us.apache.org/repos/asf/hadoop/blob/eb5a1795/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java index b76cff6..e97ab6a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java @@ -38,6 +38,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import org.mockito.Mockito; @@ -54,7 +55,72 @@ public class TestSequenceFile { compressedSeqFileTest(new DefaultCodec()); LOG.info("Successfully tested SequenceFile with DefaultCodec"); } - + + @SuppressWarnings("deprecation") + public void testSorterProperties() throws IOException { + // Test to ensure that deprecated properties have no default + // references anymore. + Configuration config = new Configuration(); + assertNull("The deprecated sort memory property " + + CommonConfigurationKeys.IO_SORT_MB_KEY + + " must not exist in any core-*.xml files.", + config.get(CommonConfigurationKeys.IO_SORT_MB_KEY)); + assertNull("The deprecated sort factor property " + + CommonConfigurationKeys.IO_SORT_FACTOR_KEY + + " must not exist in any core-*.xml files.", + config.get(CommonConfigurationKeys.IO_SORT_FACTOR_KEY)); + + // Test deprecated property honoring + // Set different values for old and new property names + // and compare which one gets loaded + config = new Configuration(); + FileSystem fs = FileSystem.get(config); + config.setInt(CommonConfigurationKeys.IO_SORT_MB_KEY, 10); + config.setInt(CommonConfigurationKeys.IO_SORT_FACTOR_KEY, 10); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY, 20); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY, 20); + SequenceFile.Sorter sorter = new SequenceFile.Sorter( + fs, Text.class, Text.class, config); + assertEquals("Deprecated memory conf must be honored over newer property", + 10*1024*1024, sorter.getMemory()); + assertEquals("Deprecated factor conf must be honored over newer property", + 10, sorter.getFactor()); + + // Test deprecated properties (graceful deprecation) + config = new Configuration(); + fs = FileSystem.get(config); + config.setInt(CommonConfigurationKeys.IO_SORT_MB_KEY, 10); + config.setInt(CommonConfigurationKeys.IO_SORT_FACTOR_KEY, 10); + sorter = new SequenceFile.Sorter( + fs, Text.class, Text.class, config); + assertEquals("Deprecated memory property " + + CommonConfigurationKeys.IO_SORT_MB_KEY + + " must get properly applied.", + 10*1024*1024, // In bytes + sorter.getMemory()); + assertEquals("Deprecated sort factor property " + + CommonConfigurationKeys.IO_SORT_FACTOR_KEY + + " must get properly applied.", + 10, sorter.getFactor()); + + // Test regular properties (graceful deprecation) + config = new Configuration(); + fs = FileSystem.get(config); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY, 20); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY, 20); + sorter = new SequenceFile.Sorter( + fs, Text.class, Text.class, config); + assertEquals("Memory property " + + CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY + + " must get properly applied if present.", + 20*1024*1024, // In bytes + sorter.getMemory()); + assertEquals("Merge factor property " + + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY + + " must get properly applied if present.", + 20, sorter.getFactor()); + } + public void compressedSeqFileTest(CompressionCodec codec) throws Exception { int count = 1024 * 10; int megabytes = 1; --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org