Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 34C6E200B2D for ; Tue, 24 May 2016 01:20:55 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 33761160A2F; Mon, 23 May 2016 23:20:55 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 0F864160A0E for ; Tue, 24 May 2016 01:20:53 +0200 (CEST) Received: (qmail 26004 invoked by uid 500); 23 May 2016 23:20:41 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 23497 invoked by uid 99); 23 May 2016 23:20:40 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 23 May 2016 23:20:40 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 4CD7FDFFD9; Mon, 23 May 2016 23:20:40 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: aengineer@apache.org To: common-commits@hadoop.apache.org Date: Mon, 23 May 2016 23:21:14 -0000 Message-Id: <0649783fe1d642ef99ae2fbea96bfed3@git.apache.org> In-Reply-To: <0183e187a76049c1b5e83355610edae2@git.apache.org> References: <0183e187a76049c1b5e83355610edae2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [36/50] hadoop git commit: HADOOP-13145 In DistCp, prevent unnecessary getFileStatus call when not preserving metadata. Contributed by Chris Nauroth. archived-at: Mon, 23 May 2016 23:20:55 -0000 HADOOP-13145 In DistCp, prevent unnecessary getFileStatus call when not preserving metadata. Contributed by Chris Nauroth. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c918286b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c918286b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c918286b Branch: refs/heads/HDFS-1312 Commit: c918286b17e7f2a64735d0c972a8dd749e0bf6c4 Parents: 0287c49 Author: Steve Loughran Authored: Fri May 20 12:21:35 2016 +0100 Committer: Steve Loughran Committed: Fri May 20 12:21:59 2016 +0100 ---------------------------------------------------------------------- hadoop-project/pom.xml | 6 + hadoop-tools/hadoop-aws/pom.xml | 11 + .../src/site/markdown/tools/hadoop-aws/index.md | 9 + .../fs/contract/s3a/TestS3AContractDistCp.java | 46 +++++ hadoop-tools/hadoop-azure/pom.xml | 19 ++ .../contract/TestAzureNativeContractDistCp.java | 33 +++ hadoop-tools/hadoop-distcp/pom.xml | 16 ++ .../apache/hadoop/tools/util/DistCpUtils.java | 10 +- .../contract/AbstractContractDistCpTest.java | 204 +++++++++++++++++++ 9 files changed, 351 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-project/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 3a2f9d9..bee2e58 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -355,6 +355,12 @@ org.apache.hadoop + hadoop-distcp + ${project.version} + test-jar + + + org.apache.hadoop hadoop-datajoin ${project.version} http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-aws/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index dfcb1b0..c95f1e6 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -252,5 +252,16 @@ test jar + + org.apache.hadoop + hadoop-distcp + test + + + org.apache.hadoop + hadoop-distcp + test + test-jar + http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 95e3274..fe81400 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -731,6 +731,15 @@ or in batch runs. Smaller values should result in faster test runs, especially when the object store is a long way away. +DistCp tests targeting S3A support a configurable file size. The default is +10 MB, but the configuration value is expressed in KB so that it can be tuned +smaller to achieve faster test runs. + + + scale.test.distcp.file.size.kb + 10240 + + ### Running the Tests After completing the configuration, execute the test run through Maven. http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java new file mode 100644 index 0000000..7eb0afa --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractDistCp.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import static org.apache.hadoop.fs.s3a.Constants.MIN_MULTIPART_THRESHOLD; +import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_SIZE; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; + +/** + * Contract test suite covering S3A integration with DistCp. + */ +public class TestS3AContractDistCp extends AbstractContractDistCpTest { + + private static final long MULTIPART_SETTING = 8 * 1024 * 1024; // 8 MB + + @Override + protected Configuration createConfiguration() { + Configuration newConf = super.createConfiguration(); + newConf.setLong(MIN_MULTIPART_THRESHOLD, MULTIPART_SETTING); + newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING); + return newConf; + } + + @Override + protected S3AContract createContract(Configuration conf) { + return new S3AContract(conf); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-azure/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index 8344ed7..02a1240 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -194,6 +194,25 @@ + org.apache.hadoop + hadoop-mapreduce-client-jobclient + test + + + + org.apache.hadoop + hadoop-distcp + test + + + + org.apache.hadoop + hadoop-distcp + test + test-jar + + + org.mockito mockito-all test http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java new file mode 100644 index 0000000..a3750d4 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/TestAzureNativeContractDistCp.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azure.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; + +/** + * Contract test suite covering WASB integration with DistCp. + */ +public class TestAzureNativeContractDistCp extends AbstractContractDistCpTest { + + @Override + protected NativeAzureFileSystemContract createContract(Configuration conf) { + return new NativeAzureFileSystemContract(conf); + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-distcp/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/pom.xml b/hadoop-tools/hadoop-distcp/pom.xml index 4ea38c3..2cec22f 100644 --- a/hadoop-tools/hadoop-distcp/pom.xml +++ b/hadoop-tools/hadoop-distcp/pom.xml @@ -186,6 +186,22 @@ + + + prepare-jar + prepare-package + + jar + + + + prepare-test-jar + prepare-package + + test-jar + + + org.apache.maven.plugins http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index d3d7677..1784c5d 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -195,9 +195,13 @@ public class DistCpUtils { EnumSet attributes, boolean preserveRawXattrs) throws IOException { - FileStatus targetFileStatus = targetFS.getFileStatus(path); - String group = targetFileStatus.getGroup(); - String user = targetFileStatus.getOwner(); + // If not preserving anything from FileStatus, don't bother fetching it. + FileStatus targetFileStatus = attributes.isEmpty() ? null : + targetFS.getFileStatus(path); + String group = targetFileStatus == null ? null : + targetFileStatus.getGroup(); + String user = targetFileStatus == null ? null : + targetFileStatus.getOwner(); boolean chown = false; if (attributes.contains(FileAttribute.ACL)) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/c918286b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java new file mode 100644 index 0000000..a4f50c7 --- /dev/null +++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/contract/AbstractContractDistCpTest.java @@ -0,0 +1,204 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.tools.contract; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.*; +import static org.junit.Assert.*; + +import java.util.Arrays; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContractTestBase; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.tools.DistCp; +import org.apache.hadoop.tools.DistCpOptions; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TestName; + +/** + * Contract test suite covering a file system's integration with DistCp. The + * tests coordinate two file system instances: one "local", which is the local + * file system, and the other "remote", which is the file system implementation + * under test. The tests in the suite cover both copying from local to remote + * (e.g. a backup use case) and copying from remote to local (e.g. a restore use + * case). + */ +public abstract class AbstractContractDistCpTest + extends AbstractFSContractTestBase { + + @Rule + public TestName testName = new TestName(); + + private Configuration conf; + private FileSystem localFS, remoteFS; + private Path localDir, remoteDir; + + @Override + protected Configuration createConfiguration() { + Configuration newConf = new Configuration(); + newConf.set("mapred.job.tracker", "local"); + return newConf; + } + + @Before + @Override + public void setup() throws Exception { + super.setup(); + conf = getContract().getConf(); + localFS = FileSystem.getLocal(conf); + remoteFS = getFileSystem(); + // Test paths are isolated by concrete subclass name and test method name. + // All paths are fully qualified including scheme (not taking advantage of + // default file system), so if something fails, the messages will make it + // clear which paths are local and which paths are remote. + Path testSubDir = new Path(getClass().getSimpleName(), + testName.getMethodName()); + localDir = localFS.makeQualified(new Path(new Path( + GenericTestUtils.getTestDir().toURI()), testSubDir)); + mkdirs(localFS, localDir); + remoteDir = remoteFS.makeQualified( + new Path(getContract().getTestPath(), testSubDir)); + mkdirs(remoteFS, remoteDir); + } + + @Test + public void deepDirectoryStructureToRemote() throws Exception { + describe("copy a deep directory structure from local to remote"); + deepDirectoryStructure(localFS, localDir, remoteFS, remoteDir); + } + + @Test + public void largeFilesToRemote() throws Exception { + describe("copy multiple large files from local to remote"); + largeFiles(localFS, localDir, remoteFS, remoteDir); + } + + @Test + public void deepDirectoryStructureFromRemote() throws Exception { + describe("copy a deep directory structure from remote to local"); + deepDirectoryStructure(remoteFS, remoteDir, localFS, localDir); + } + + @Test + public void largeFilesFromRemote() throws Exception { + describe("copy multiple large files from remote to local"); + largeFiles(remoteFS, remoteDir, localFS, localDir); + } + + /** + * Executes a test using a file system sub-tree with multiple nesting levels. + * + * @param srcFS source FileSystem + * @param srcDir source directory + * @param dstFS destination FileSystem + * @param dstDir destination directory + * @throws Exception if there is a failure + */ + private void deepDirectoryStructure(FileSystem srcFS, Path srcDir, + FileSystem dstFS, Path dstDir) throws Exception { + Path inputDir = new Path(srcDir, "inputDir"); + Path inputSubDir1 = new Path(inputDir, "subDir1"); + Path inputSubDir2 = new Path(inputDir, "subDir2/subDir3"); + Path inputFile1 = new Path(inputDir, "file1"); + Path inputFile2 = new Path(inputSubDir1, "file2"); + Path inputFile3 = new Path(inputSubDir2, "file3"); + mkdirs(srcFS, inputSubDir1); + mkdirs(srcFS, inputSubDir2); + byte[] data1 = dataset(100, 33, 43); + createFile(srcFS, inputFile1, true, data1); + byte[] data2 = dataset(200, 43, 53); + createFile(srcFS, inputFile2, true, data2); + byte[] data3 = dataset(300, 53, 63); + createFile(srcFS, inputFile3, true, data3); + Path target = new Path(dstDir, "outputDir"); + runDistCp(inputDir, target); + ContractTestUtils.assertIsDirectory(dstFS, target); + verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1); + verifyFileContents(dstFS, + new Path(target, "inputDir/subDir1/file2"), data2); + verifyFileContents(dstFS, + new Path(target, "inputDir/subDir2/subDir3/file3"), data3); + } + + /** + * Executes a test using multiple large files. + * + * @param srcFS source FileSystem + * @param srcDir source directory + * @param dstFS destination FileSystem + * @param dstDir destination directory + * @throws Exception if there is a failure + */ + private void largeFiles(FileSystem srcFS, Path srcDir, FileSystem dstFS, + Path dstDir) throws Exception { + Path inputDir = new Path(srcDir, "inputDir"); + Path inputFile1 = new Path(inputDir, "file1"); + Path inputFile2 = new Path(inputDir, "file2"); + Path inputFile3 = new Path(inputDir, "file3"); + mkdirs(srcFS, inputDir); + int fileSizeKb = conf.getInt("scale.test.distcp.file.size.kb", 10 * 1024); + int fileSizeMb = fileSizeKb * 1024; + getLog().info("{} with file size {}", testName.getMethodName(), fileSizeMb); + byte[] data1 = dataset((fileSizeMb + 1) * 1024 * 1024, 33, 43); + createFile(srcFS, inputFile1, true, data1); + byte[] data2 = dataset((fileSizeMb + 2) * 1024 * 1024, 43, 53); + createFile(srcFS, inputFile2, true, data2); + byte[] data3 = dataset((fileSizeMb + 3) * 1024 * 1024, 53, 63); + createFile(srcFS, inputFile3, true, data3); + Path target = new Path(dstDir, "outputDir"); + runDistCp(inputDir, target); + ContractTestUtils.assertIsDirectory(dstFS, target); + verifyFileContents(dstFS, new Path(target, "inputDir/file1"), data1); + verifyFileContents(dstFS, new Path(target, "inputDir/file2"), data2); + verifyFileContents(dstFS, new Path(target, "inputDir/file3"), data3); + } + + /** + * Executes DistCp and asserts that the job finished successfully. + * + * @param src source path + * @param dst destination path + * @throws Exception if there is a failure + */ + private void runDistCp(Path src, Path dst) throws Exception { + DistCpOptions options = new DistCpOptions(Arrays.asList(src), dst); + Job job = new DistCp(conf, options).execute(); + assertNotNull("Unexpected null job returned from DistCp execution.", job); + assertTrue("DistCp job did not complete.", job.isComplete()); + assertTrue("DistCp job did not complete successfully.", job.isSuccessful()); + } + + /** + * Creates a directory and any ancestor directories required. + * + * @param fs FileSystem in which to create directories + * @param dir path of directory to create + * @throws Exception if there is a failure + */ + private static void mkdirs(FileSystem fs, Path dir) throws Exception { + assertTrue("Failed to mkdir " + dir, fs.mkdirs(dir)); + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org