Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id D13C8200D0F for ; Fri, 15 Sep 2017 01:13:28 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id CFCC31609D3; Thu, 14 Sep 2017 23:13:28 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id CB2311609CF for ; Fri, 15 Sep 2017 01:13:26 +0200 (CEST) Received: (qmail 7273 invoked by uid 500); 14 Sep 2017 23:13:25 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 6259 invoked by uid 99); 14 Sep 2017 23:13:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 14 Sep 2017 23:13:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6F119F566E; Thu, 14 Sep 2017 23:13:22 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: fabbri@apache.org To: common-commits@hadoop.apache.org Date: Thu, 14 Sep 2017 23:13:26 -0000 Message-Id: <77ea67e60e6c472eabdee3f0954e04eb@git.apache.org> In-Reply-To: <8a06f3c5830a4b63baaba1ad083688a9@git.apache.org> References: <8a06f3c5830a4b63baaba1ad083688a9@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [5/5] hadoop git commit: HADOOP-14738 Remove S3N and obsolete bits of S3A; rework docs. Contributed by Steve Loughran. archived-at: Thu, 14 Sep 2017 23:13:29 -0000 HADOOP-14738 Remove S3N and obsolete bits of S3A; rework docs. Contributed by Steve Loughran. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/62e8a5ce Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/62e8a5ce Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/62e8a5ce Branch: refs/heads/branch-3.0 Commit: 62e8a5ceed71de335e1579edef9ef6c515d352c6 Parents: 8d49c2f Author: Aaron Fabbri Authored: Thu Sep 14 09:58:17 2017 -0700 Committer: Aaron Fabbri Committed: Thu Sep 14 16:12:29 2017 -0700 ---------------------------------------------------------------------- .../hadoop-client-minicluster/pom.xml | 23 - .../src/main/conf/log4j.properties | 2 - .../src/main/resources/core-default.xml | 124 +- .../src/site/markdown/filesystem/filesystem.md | 6 +- .../site/markdown/filesystem/introduction.md | 7 +- .../src/site/markdown/filesystem/testing.md | 10 +- .../conf/TestCommonConfigurationFields.java | 6 +- .../apache/hadoop/conf/TestConfigRedactor.java | 1 - .../hadoop/fs/FileSystemContractBaseTest.java | 41 +- .../src/test/resources/core-site.xml | 6 - .../src/test/resources/jets3t.properties | 16 - hadoop-project/pom.xml | 5 - hadoop-project/src/site/markdown/index.md.vm | 10 + .../hadoop-aws/dev-support/findbugs-exclude.xml | 4 - hadoop-tools/hadoop-aws/pom.xml | 7 - .../org/apache/hadoop/fs/s3a/Constants.java | 3 + .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 71 +- .../apache/hadoop/fs/s3a/S3AOutputStream.java | 143 -- .../apache/hadoop/fs/s3native/FileMetadata.java | 59 - .../s3native/Jets3tNativeFileSystemStore.java | 481 ----- .../fs/s3native/NativeFileSystemStore.java | 67 - .../hadoop/fs/s3native/NativeS3FileSystem.java | 799 +------- .../hadoop/fs/s3native/PartialListing.java | 64 - .../hadoop/fs/s3native/S3Credentials.java | 100 - .../apache/hadoop/fs/s3native/S3Exception.java | 39 - .../s3native/S3NativeFileSystemConfigKeys.java | 66 - .../org/apache/hadoop/fs/s3native/package.html | 5 +- .../markdown/tools/hadoop-aws/encryption.md | 427 +++++ .../src/site/markdown/tools/hadoop-aws/index.md | 1753 ++++++------------ .../site/markdown/tools/hadoop-aws/s3guard.md | 19 +- .../src/site/markdown/tools/hadoop-aws/s3n.md | 52 + .../site/markdown/tools/hadoop-aws/testing.md | 91 +- .../tools/hadoop-aws/troubleshooting_s3a.md | 701 ++++++- .../fs/contract/s3a/ITestS3AContractDistCp.java | 1 - .../fs/contract/s3n/ITestS3NContractCreate.java | 41 - .../fs/contract/s3n/ITestS3NContractDelete.java | 34 - .../fs/contract/s3n/ITestS3NContractMkdir.java | 34 - .../fs/contract/s3n/ITestS3NContractOpen.java | 34 - .../fs/contract/s3n/ITestS3NContractRename.java | 35 - .../contract/s3n/ITestS3NContractRootDir.java | 35 - .../fs/contract/s3n/ITestS3NContractSeek.java | 34 - .../fs/contract/s3n/NativeS3Contract.java | 50 - .../hadoop/fs/s3a/ITestS3ABlockOutputArray.java | 1 - ...ITestS3AEncryptionSSECBlockOutputStream.java | 1 - ...onSSEKMSUserDefinedKeyBlockOutputStream.java | 4 +- ...TestS3AEncryptionSSES3BlockOutputStream.java | 1 - .../fs/s3a/scale/AbstractSTestS3AHugeFiles.java | 1 - .../scale/ITestS3AHugeFilesClassicOutput.java | 41 - ...ITestInMemoryNativeS3FileSystemContract.java | 33 - .../ITestJets3tNativeFileSystemStore.java | 133 -- .../ITestJets3tNativeS3FileSystemContract.java | 33 - .../s3native/InMemoryNativeFileSystemStore.java | 213 --- .../NativeS3FileSystemContractBaseTest.java | 266 --- .../fs/s3native/S3NInMemoryFileSystem.java | 32 - .../hadoop/fs/s3native/TestS3Credentials.java | 129 -- .../fs/s3native/TestS3NInMemoryFileSystem.java | 69 - .../src/test/resources/contract/s3n.xml | 110 -- .../src/test/resources/log4j.properties | 3 + 58 files changed, 1853 insertions(+), 4723 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-client-modules/hadoop-client-minicluster/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index d6ed67b..815127d 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -355,29 +355,6 @@ true - net.java.dev.jets3t - jets3t - true - - - commons-codec - commons-codec - - - commons-logging - commons-logging - - - org.apache.httpcomponents - httpclient - - - org.apache.httpcomponents - httpcore - - - - com.jcraft jsch true http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties index 3752ad1..bc1fa6c 100644 --- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties @@ -182,8 +182,6 @@ log4j.appender.DNMETRICSRFA.MaxFileSize=64MB #log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG #log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG -# Jets3t library -log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR # AWS SDK & S3A FileSystem log4j.logger.com.amazonaws=ERROR http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 6cce647..a11e7c3 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -872,30 +872,6 @@ - fs.s3n.buffer.dir - ${hadoop.tmp.dir}/s3n - Determines where on the local filesystem the s3n:// filesystem - should store files before sending them to S3 - (or after retrieving them from S3). - - - - - fs.s3n.maxRetries - 4 - The maximum number of retries for reading or writing files to S3, - before we signal failure to the application. - - - - - fs.s3n.sleepTimeSeconds - 10 - The number of seconds to sleep between each S3 retry. - - - - fs.swift.impl org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem The implementation class of the OpenStack Swift Filesystem @@ -912,56 +888,6 @@ - fs.s3n.awsAccessKeyId - AWS access key ID used by S3 native file system. - - - - fs.s3n.awsSecretAccessKey - AWS secret key used by S3 native file system. - - - - fs.s3n.block.size - 67108864 - Block size to use when reading files using the native S3 - filesystem (s3n: URIs). - - - - fs.s3n.multipart.uploads.enabled - false - Setting this property to true enables multiple uploads to - native S3 filesystem. When uploading a file, it is split into blocks - if the size is larger than fs.s3n.multipart.uploads.block.size. - - - - - fs.s3n.multipart.uploads.block.size - 67108864 - The block size for multipart uploads to native S3 filesystem. - Default size is 64MB. - - - - - fs.s3n.multipart.copy.block.size - 5368709120 - The block size for multipart copy in native S3 filesystem. - Default size is 5GB. - - - - - fs.s3n.server-side-encryption-algorithm - - Specify a server-side encryption algorithm for S3. - Unset by default, and the only other currently allowable value is AES256. - - - - fs.s3a.access.key AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication. @@ -1235,21 +1161,11 @@ - fs.s3a.fast.upload - false - - Use the incremental block-based fast upload mechanism with - the buffering mechanism set in fs.s3a.fast.upload.buffer. - - - - fs.s3a.fast.upload.buffer disk - The buffering mechanism to use when using S3A fast upload - (fs.s3a.fast.upload=true). Values: disk, array, bytebuffer. - This configuration option has no effect if fs.s3a.fast.upload is false. + The buffering mechanism to for data being written. + Values: disk, array, bytebuffer. "disk" will use the directories listed in fs.s3a.buffer.dir as the location(s) to save data prior to being uploaded. @@ -1803,42 +1719,6 @@ Replication factor - - - - s3native.stream-buffer-size - 4096 - The size of buffer to stream files. - The size of this buffer should probably be a multiple of hardware - page size (4096 on Intel x86), and it determines how much data is - buffered during read and write operations. - - - - s3native.bytes-per-checksum - 512 - The number of bytes per checksum. Must not be larger than - s3native.stream-buffer-size - - - - s3native.client-write-packet-size - 65536 - Packet size for clients to write - - - - s3native.blocksize - 67108864 - Block size - - - - s3native.replication - 3 - Replication factor - - ftp.stream-buffer-size http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 1e522c7..e67cbe3 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -605,7 +605,7 @@ The result is `FSDataOutputStream`, which through its operations may generate ne clients creating files with `overwrite==true` to fail if the file is created by another client between the two tests. -* S3N, S3A, Swift and potentially other Object Stores do not currently change the FS state +* S3A, Swift and potentially other Object Stores do not currently change the FS state until the output stream `close()` operation is completed. This MAY be a bug, as it allows >1 client to create a file with `overwrite==false`, and potentially confuse file/directory logic @@ -961,7 +961,7 @@ The outcome is no change to FileSystem state, with a return value of false. FS' = FS; result = False -*Local Filesystem, S3N* +*Local Filesystem* The outcome is as a normal rename, with the additional (implicit) feature that the parent directories of the destination also exist. @@ -1262,4 +1262,4 @@ It currently supports to query: * `StreamCapabilties.HFLUSH` ("*hflush*"): the capability to flush out the data in client's buffer. * `StreamCapabilities.HSYNC` ("*hsync*"): capability to flush out the data in - client's buffer and the disk device. \ No newline at end of file + client's buffer and the disk device. http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md index 12a7967..37191a5 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/introduction.md @@ -29,11 +29,10 @@ return codes of Unix filesystem actions as a reference. Even so, there are places where HDFS diverges from the expected behaviour of a POSIX filesystem. -The behaviour of other Hadoop filesystems are not as rigorously tested. -The bundled S3N and S3A FileSystem clients make Amazon's S3 Object Store ("blobstore") +The bundled S3A FileSystem clients make Amazon's S3 Object Store ("blobstore") accessible through the FileSystem API. The Swift FileSystem driver provides similar -functionality for the OpenStack Swift blobstore. The Azure object storage -FileSystem talks to Microsoft's Azure equivalent. All of these +functionality for the OpenStack Swift blobstore. The Azure WASB and ADL object +storage FileSystems talks to Microsoft's Azure storage. All of these bind to object stores, which do have different behaviors, especially regarding consistency guarantees, and atomicity of operations. http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md index 6823e0c..4c6fa3f 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md @@ -195,21 +195,21 @@ equivalent. Furthermore, the build MUST be configured to never bundle this file In addition, `src/test/resources/auth-keys.xml` will need to be created. It can be a copy of `contract-test-options.xml`. The `AbstractFSContract` class automatically loads this resource file if present; specific keys for specific test cases can be added. -As an example, here are what S3N test keys look like: +As an example, here are what S3A test keys look like: - fs.contract.test.fs.s3n - s3n://tests3contract + fs.contract.test.fs.s3a + s3a://tests3contract - fs.s3n.awsAccessKeyId + fs.s3a.access.key DONOTPCOMMITTHISKEYTOSCM - fs.s3n.awsSecretAccessKey + fs.s3a.secret.key DONOTEVERSHARETHISSECRETKEY! http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java index 3324886..864c10c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestCommonConfigurationFields.java @@ -94,14 +94,10 @@ public class TestCommonConfigurationFields extends TestConfigurationFieldsBase { xmlPropsToSkipCompare.add("hadoop.tmp.dir"); xmlPropsToSkipCompare.add("nfs3.mountd.port"); xmlPropsToSkipCompare.add("nfs3.server.port"); - xmlPropsToSkipCompare.add("test.fs.s3n.name"); xmlPropsToSkipCompare.add("fs.viewfs.rename.strategy"); - // S3N/S3A properties are in a different subtree. - // - org.apache.hadoop.fs.s3native.S3NativeFileSystemConfigKeys + // S3A properties are in a different subtree. xmlPrefixToSkipCompare.add("fs.s3a."); - xmlPrefixToSkipCompare.add("fs.s3n."); - xmlPrefixToSkipCompare.add("s3native."); // WASB properties are in a different subtree. // - org.apache.hadoop.fs.azure.NativeAzureFileSystem http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java index 4790f7c..3133942 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfigRedactor.java @@ -54,7 +54,6 @@ public class TestConfigRedactor { "fs.s3a.bucket.BUCKET.secret.key", "fs.s3a.server-side-encryption.key", "fs.s3a.bucket.engineering.server-side-encryption.key", - "fs.s3n.awsSecretKey", "fs.azure.account.key.abcdefg.blob.core.windows.net", "fs.adl.oauth2.refresh.token", "fs.adl.oauth2.credential", http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java index 9d8cd64..b49dd53 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java @@ -246,39 +246,18 @@ public abstract class FileSystemContractBaseTest { @Test public void testMkdirsWithUmask() throws Exception { - if (!isS3(fs)) { - Configuration conf = fs.getConf(); - String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY); - try { - conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK); - final Path dir = path("newDir"); - assertTrue(fs.mkdirs(dir, new FsPermission((short) 0777))); - FileStatus status = fs.getFileStatus(dir); - assertTrue(status.isDirectory()); - assertEquals((short) 0715, status.getPermission().toShort()); - } finally { - conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask); - } - } - } - - /** - * Skip permission tests for S3FileSystem until HDFS-1333 is fixed. - * Classes that do not implement {@link FileSystem#getScheme()} method - * (e.g {@link RawLocalFileSystem}) will throw an - * {@link UnsupportedOperationException}. - * @param fileSystem FileSystem object to determine if it is S3 or not - * @return true if S3 false in any other case - */ - private boolean isS3(FileSystem fileSystem) { + Configuration conf = fs.getConf(); + String oldUmask = conf.get(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY); try { - if (fileSystem.getScheme().equals("s3n")) { - return true; - } - } catch (UnsupportedOperationException e) { - LOG.warn("Unable to determine the schema of filesystem."); + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, TEST_UMASK); + final Path dir = path("newDir"); + assertTrue(fs.mkdirs(dir, new FsPermission((short) 0777))); + FileStatus status = fs.getFileStatus(dir); + assertTrue(status.isDirectory()); + assertEquals((short) 0715, status.getPermission().toShort()); + } finally { + conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, oldUmask); } - return false; } @Test http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml b/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml index d85472c..d9144eb 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/core-site.xml @@ -45,12 +45,6 @@ This is required by FTPFileSystem - - test.fs.s3n.name - s3n:/// - The name of the s3n file system for testing. - - hadoop.security.authentication http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties b/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties deleted file mode 100644 index 09cc463..0000000 --- a/hadoop-common-project/hadoop-common/src/test/resources/jets3t.properties +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Speed up the s3native jets3t test - -s3service.max-thread-count=10 -threaded-service.max-thread-count=10 http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-project/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index aa20d7c..87272e1 100755 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -812,11 +812,6 @@ - net.java.dev.jets3t - jets3t - 0.9.0 - - com.amazonaws aws-java-sdk-bundle ${aws-java-sdk.version} http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-project/src/site/markdown/index.md.vm ---------------------------------------------------------------------- diff --git a/hadoop-project/src/site/markdown/index.md.vm b/hadoop-project/src/site/markdown/index.md.vm index bb7bda2..d9443d6 100644 --- a/hadoop-project/src/site/markdown/index.md.vm +++ b/hadoop-project/src/site/markdown/index.md.vm @@ -204,6 +204,16 @@ in both the task configuration and as a Java option. Existing configs that already specify both are not affected by this change. See the full release notes of MAPREDUCE-5785 for more details. +S3Guard: Consistency and Metadata Caching for the S3A filesystem client +--------------------- + +[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345) adds an +optional feature to the S3A client of Amazon S3 storage: the ability to use +a DynamoDB table as a fast and consistent store of file and directory +metadata. + +See [S3Guard](./hadoop-aws/tools/hadoop-aws/s3guard.html) for more details. + Getting Started =============== http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml index 82ec16e..2615566 100644 --- a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml @@ -22,10 +22,6 @@ - - - - http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/pom.xml ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index bcb0e07..4a51a1d 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -193,7 +193,6 @@ **/ITestJets3tNativeS3FileSystemContract.java **/ITestS3AContractRootDir.java - **/ITestS3NContractRootDir.java **/ITestS3AFileContextStatistics.java **/ITestS3AEncryptionSSEC*.java **/ITestS3AHuge*.java @@ -226,7 +225,6 @@ **/ITestJets3tNativeS3FileSystemContract.java **/ITestS3AContractRootDir.java - **/ITestS3NContractRootDir.java **/ITestS3AFileContextStatistics.java **/ITestS3AHuge*.java **/ITestS3AEncryptionSSEC*.java @@ -429,11 +427,6 @@ test-jar - net.java.dev.jets3t - jets3t - compile - - com.amazonaws aws-java-sdk-bundle compile http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 4e2af3a..d278bdf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -136,7 +136,10 @@ public final class Constants { public static final String BUFFER_DIR = "fs.s3a.buffer.dir"; // switch to the fast block-by-block upload mechanism + // this is the only supported upload mechanism + @Deprecated public static final String FAST_UPLOAD = "fs.s3a.fast.upload"; + @Deprecated public static final boolean DEFAULT_FAST_UPLOAD = false; //initial size of memory buffer for a fast upload http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e76ef0b..f4709a7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -163,7 +163,6 @@ public class S3AFileSystem extends FileSystem { // The maximum number of entries that can be deleted in any call to s3 private static final int MAX_ENTRIES_TO_DELETE = 1000; - private boolean blockUploadEnabled; private String blockOutputBuffer; private S3ADataBlocks.BlockFactory blockFactory; private int blockOutputActiveBlocks; @@ -281,21 +280,20 @@ public class S3AFileSystem extends FileSystem { inputPolicy = S3AInputPolicy.getPolicy( conf.getTrimmed(INPUT_FADVISE, INPUT_FADV_NORMAL)); - blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, DEFAULT_FAST_UPLOAD); - - if (blockUploadEnabled) { - blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, - DEFAULT_FAST_UPLOAD_BUFFER); - partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize); - blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer); - blockOutputActiveBlocks = intOption(conf, - FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1); - LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + - " queue limit={}", - blockOutputBuffer, partSize, blockOutputActiveBlocks); - } else { - LOG.debug("Using S3AOutputStream"); + boolean blockUploadEnabled = conf.getBoolean(FAST_UPLOAD, true); + + if (!blockUploadEnabled) { + LOG.warn("The \"slow\" output stream is no longer supported"); } + blockOutputBuffer = conf.getTrimmed(FAST_UPLOAD_BUFFER, + DEFAULT_FAST_UPLOAD_BUFFER); + partSize = ensureOutputParameterInRange(MULTIPART_SIZE, partSize); + blockFactory = S3ADataBlocks.createFactory(this, blockOutputBuffer); + blockOutputActiveBlocks = intOption(conf, + FAST_UPLOAD_ACTIVE_BLOCKS, DEFAULT_FAST_UPLOAD_ACTIVE_BLOCKS, 1); + LOG.debug("Using S3ABlockOutputStream with buffer = {}; block={};" + + " queue limit={}", + blockOutputBuffer, partSize, blockOutputActiveBlocks); metadataStore = S3Guard.getMetadataStore(this); allowAuthoritative = conf.getBoolean(METADATASTORE_AUTHORITATIVE, @@ -644,33 +642,18 @@ public class S3AFileSystem extends FileSystem { } instrumentation.fileCreated(); - FSDataOutputStream output; - if (blockUploadEnabled) { - output = new FSDataOutputStream( - new S3ABlockOutputStream(this, - key, - new SemaphoredDelegatingExecutor(boundedThreadPool, - blockOutputActiveBlocks, true), - progress, - partSize, - blockFactory, - instrumentation.newOutputStreamStatistics(statistics), - new WriteOperationHelper(key) - ), - null); - } else { - - // We pass null to FSDataOutputStream so it won't count writes that - // are being buffered to a file - output = new FSDataOutputStream( - new S3AOutputStream(getConf(), - this, - key, - progress - ), - null); - } - return output; + return new FSDataOutputStream( + new S3ABlockOutputStream(this, + key, + new SemaphoredDelegatingExecutor(boundedThreadPool, + blockOutputActiveBlocks, true), + progress, + partSize, + blockFactory, + instrumentation.newOutputStreamStatistics(statistics), + new WriteOperationHelper(key) + ), + null); } /** @@ -2471,7 +2454,9 @@ public class S3AFileSystem extends FileSystem { sb.append(", cannedACL=").append(cannedACL.toString()); } sb.append(", readAhead=").append(readAhead); - sb.append(", blockSize=").append(getDefaultBlockSize()); + if (getConf() != null) { + sb.append(", blockSize=").append(getDefaultBlockSize()); + } sb.append(", multiPartThreshold=").append(multiPartThreshold); if (serverSideEncryptionAlgorithm != null) { sb.append(", serverSideEncryptionAlgorithm='") http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java deleted file mode 100644 index e723b75..0000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AOutputStream.java +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.model.ObjectMetadata; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.LocalDirAllocator; -import org.apache.hadoop.util.Progressable; - -import org.slf4j.Logger; - -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicBoolean; - -import static org.apache.hadoop.fs.s3a.S3AUtils.*; - -/** - * Output stream to save data to S3. - */ -@InterfaceAudience.Private -@InterfaceStability.Evolving -public class S3AOutputStream extends OutputStream { - private final OutputStream backupStream; - private final File backupFile; - private final AtomicBoolean closed = new AtomicBoolean(false); - private final String key; - private final Progressable progress; - private final S3AFileSystem fs; - - public static final Logger LOG = S3AFileSystem.LOG; - - public S3AOutputStream(Configuration conf, - S3AFileSystem fs, - String key, - Progressable progress) - throws IOException { - this.key = key; - this.progress = progress; - this.fs = fs; - - - backupFile = fs.createTmpFileForWrite("output-", - LocalDirAllocator.SIZE_UNKNOWN, conf); - - LOG.debug("OutputStream for key '{}' writing to tempfile: {}", - key, backupFile); - - this.backupStream = new BufferedOutputStream( - new FileOutputStream(backupFile)); - } - - /** - * Check for the filesystem being open. - * @throws IOException if the filesystem is closed. - */ - void checkOpen() throws IOException { - if (closed.get()) { - throw new IOException("Output Stream closed"); - } - } - - @Override - public void flush() throws IOException { - checkOpen(); - backupStream.flush(); - } - - @Override - public void close() throws IOException { - if (closed.getAndSet(true)) { - return; - } - - backupStream.close(); - LOG.debug("OutputStream for key '{}' closed. Now beginning upload", key); - - try { - final ObjectMetadata om = fs.newObjectMetadata(backupFile.length()); - UploadInfo info = fs.putObject( - fs.newPutObjectRequest( - key, - om, - backupFile)); - ProgressableProgressListener listener = - new ProgressableProgressListener(fs, key, info.getUpload(), progress); - info.getUpload().addProgressListener(listener); - - info.getUpload().waitForUploadResult(); - listener.uploadCompleted(); - // This will delete unnecessary fake parent directories, update any - // MetadataStore - fs.finishedWrite(key, info.getLength()); - } catch (InterruptedException e) { - throw (InterruptedIOException) new InterruptedIOException(e.toString()) - .initCause(e); - } catch (AmazonClientException e) { - throw translateException("saving output", key , e); - } finally { - if (!backupFile.delete()) { - LOG.warn("Could not delete temporary s3a file: {}", backupFile); - } - super.close(); - } - LOG.debug("OutputStream for key '{}' upload complete", key); - } - - @Override - public void write(int b) throws IOException { - checkOpen(); - backupStream.write(b); - } - - @Override - public void write(byte[] b, int off, int len) throws IOException { - checkOpen(); - backupStream.write(b, off, len); - } - -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java deleted file mode 100644 index 2746af4..0000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/FileMetadata.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - *

- * Holds basic metadata for a file stored in a {@link NativeFileSystemStore}. - *

- */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -class FileMetadata { - private final String key; - private final long length; - private final long lastModified; - - public FileMetadata(String key, long length, long lastModified) { - this.key = key; - this.length = length; - this.lastModified = lastModified; - } - - public String getKey() { - return key; - } - - public long getLength() { - return length; - } - - public long getLastModified() { - return lastModified; - } - - @Override - public String toString() { - return "FileMetadata[" + key + ", " + length + ", " + lastModified + "]"; - } - -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java deleted file mode 100644 index c9c0f98..0000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/Jets3tNativeFileSystemStore.java +++ /dev/null @@ -1,481 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import static org.apache.hadoop.fs.s3native.NativeS3FileSystem.PATH_DELIMITER; - -import java.io.BufferedInputStream; -import java.io.ByteArrayInputStream; -import java.io.EOFException; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.security.AccessControlException; -import org.jets3t.service.S3Service; -import org.jets3t.service.S3ServiceException; -import org.jets3t.service.ServiceException; -import org.jets3t.service.StorageObjectsChunk; -import org.jets3t.service.impl.rest.HttpException; -import org.jets3t.service.impl.rest.httpclient.RestS3Service; -import org.jets3t.service.model.MultipartPart; -import org.jets3t.service.model.MultipartUpload; -import org.jets3t.service.model.S3Bucket; -import org.jets3t.service.model.S3Object; -import org.jets3t.service.model.StorageObject; -import org.jets3t.service.security.AWSCredentials; -import org.jets3t.service.utils.MultipartUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@InterfaceAudience.Private -@InterfaceStability.Unstable -class Jets3tNativeFileSystemStore implements NativeFileSystemStore { - - private S3Service s3Service; - private S3Bucket bucket; - - private long multipartBlockSize; - private boolean multipartEnabled; - private long multipartCopyBlockSize; - static final long MAX_PART_SIZE = (long)5 * 1024 * 1024 * 1024; - - private String serverSideEncryptionAlgorithm; - - public static final Logger LOG = - LoggerFactory.getLogger(Jets3tNativeFileSystemStore.class); - - @Override - public void initialize(URI uri, Configuration conf) throws IOException { - S3Credentials s3Credentials = new S3Credentials(); - s3Credentials.initialize(uri, conf); - try { - AWSCredentials awsCredentials = - new AWSCredentials(s3Credentials.getAccessKey(), - s3Credentials.getSecretAccessKey()); - this.s3Service = new RestS3Service(awsCredentials); - } catch (S3ServiceException e) { - handleException(e); - } - multipartEnabled = - conf.getBoolean("fs.s3n.multipart.uploads.enabled", false); - multipartBlockSize = Math.min( - conf.getLong("fs.s3n.multipart.uploads.block.size", 64 * 1024 * 1024), - MAX_PART_SIZE); - multipartCopyBlockSize = Math.min( - conf.getLong("fs.s3n.multipart.copy.block.size", MAX_PART_SIZE), - MAX_PART_SIZE); - serverSideEncryptionAlgorithm = conf.get("fs.s3n.server-side-encryption-algorithm"); - - bucket = new S3Bucket(uri.getHost()); - } - - @Override - public void storeFile(String key, File file, byte[] md5Hash) - throws IOException { - - if (multipartEnabled && file.length() >= multipartBlockSize) { - storeLargeFile(key, file, md5Hash); - return; - } - - BufferedInputStream in = null; - try { - in = new BufferedInputStream(new FileInputStream(file)); - S3Object object = new S3Object(key); - object.setDataInputStream(in); - object.setContentType("binary/octet-stream"); - object.setContentLength(file.length()); - object.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - if (md5Hash != null) { - object.setMd5Hash(md5Hash); - } - s3Service.putObject(bucket, object); - } catch (ServiceException e) { - handleException(e, key); - } finally { - IOUtils.closeStream(in); - } - } - - public void storeLargeFile(String key, File file, byte[] md5Hash) - throws IOException { - S3Object object = new S3Object(key); - object.setDataInputFile(file); - object.setContentType("binary/octet-stream"); - object.setContentLength(file.length()); - object.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - if (md5Hash != null) { - object.setMd5Hash(md5Hash); - } - - List objectsToUploadAsMultipart = - new ArrayList(); - objectsToUploadAsMultipart.add(object); - MultipartUtils mpUtils = new MultipartUtils(multipartBlockSize); - - try { - mpUtils.uploadObjects(bucket.getName(), s3Service, - objectsToUploadAsMultipart, null); - } catch (Exception e) { - handleException(e, key); - } - } - - @Override - public void storeEmptyFile(String key) throws IOException { - try { - S3Object object = new S3Object(key); - object.setDataInputStream(new ByteArrayInputStream(new byte[0])); - object.setContentType("binary/octet-stream"); - object.setContentLength(0); - object.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - s3Service.putObject(bucket, object); - } catch (ServiceException e) { - handleException(e, key); - } - } - - @Override - public FileMetadata retrieveMetadata(String key) throws IOException { - StorageObject object = null; - try { - LOG.debug("Getting metadata for key: {} from bucket: {}", - key, bucket.getName()); - object = s3Service.getObjectDetails(bucket.getName(), key); - return new FileMetadata(key, object.getContentLength(), - object.getLastModifiedDate().getTime()); - - } catch (ServiceException e) { - try { - // process - handleException(e, key); - return null; - } catch (FileNotFoundException fnfe) { - // and downgrade missing files - return null; - } - } finally { - if (object != null) { - object.closeDataInputStream(); - } - } - } - - /** - * @param key - * The key is the object name that is being retrieved from the S3 bucket - * @return - * This method returns null if the key is not found - * @throws IOException - */ - - @Override - public InputStream retrieve(String key) throws IOException { - try { - LOG.debug("Getting key: {} from bucket: {}", - key, bucket.getName()); - S3Object object = s3Service.getObject(bucket.getName(), key); - return object.getDataInputStream(); - } catch (ServiceException e) { - handleException(e, key); - return null; //return null if key not found - } - } - - /** - * - * @param key - * The key is the object name that is being retrieved from the S3 bucket - * @return - * This method returns null if the key is not found - * @throws IOException - */ - - @Override - public InputStream retrieve(String key, long byteRangeStart) - throws IOException { - try { - LOG.debug("Getting key: {} from bucket: {} with byteRangeStart: {}", - key, bucket.getName(), byteRangeStart); - S3Object object = s3Service.getObject(bucket, key, null, null, null, - null, byteRangeStart, null); - return object.getDataInputStream(); - } catch (ServiceException e) { - handleException(e, key); - return null; - } - } - - @Override - public PartialListing list(String prefix, int maxListingLength) - throws IOException { - return list(prefix, maxListingLength, null, false); - } - - @Override - public PartialListing list(String prefix, int maxListingLength, String priorLastKey, - boolean recurse) throws IOException { - - return list(prefix, recurse ? null : PATH_DELIMITER, maxListingLength, priorLastKey); - } - - /** - * list objects - * @param prefix prefix - * @param delimiter delimiter - * @param maxListingLength max no. of entries - * @param priorLastKey last key in any previous search - * @return a list of matches - * @throws IOException on any reported failure - */ - - private PartialListing list(String prefix, String delimiter, - int maxListingLength, String priorLastKey) throws IOException { - try { - if (!prefix.isEmpty() && !prefix.endsWith(PATH_DELIMITER)) { - prefix += PATH_DELIMITER; - } - StorageObjectsChunk chunk = s3Service.listObjectsChunked(bucket.getName(), - prefix, delimiter, maxListingLength, priorLastKey); - - FileMetadata[] fileMetadata = - new FileMetadata[chunk.getObjects().length]; - for (int i = 0; i < fileMetadata.length; i++) { - StorageObject object = chunk.getObjects()[i]; - fileMetadata[i] = new FileMetadata(object.getKey(), - object.getContentLength(), object.getLastModifiedDate().getTime()); - } - return new PartialListing(chunk.getPriorLastKey(), fileMetadata, - chunk.getCommonPrefixes()); - } catch (ServiceException e) { - handleException(e, prefix); - return null; // never returned - keep compiler happy - } - } - - @Override - public void delete(String key) throws IOException { - try { - LOG.debug("Deleting key: {} from bucket: {}", - key, bucket.getName()); - s3Service.deleteObject(bucket, key); - } catch (ServiceException e) { - handleException(e, key); - } - } - - public void rename(String srcKey, String dstKey) throws IOException { - try { - s3Service.renameObject(bucket.getName(), srcKey, new S3Object(dstKey)); - } catch (ServiceException e) { - handleException(e, srcKey); - } - } - - @Override - public void copy(String srcKey, String dstKey) throws IOException { - try { - if(LOG.isDebugEnabled()) { - LOG.debug("Copying srcKey: " + srcKey + "to dstKey: " + dstKey + "in bucket: " + bucket.getName()); - } - if (multipartEnabled) { - S3Object object = s3Service.getObjectDetails(bucket, srcKey, null, - null, null, null); - if (multipartCopyBlockSize > 0 && - object.getContentLength() > multipartCopyBlockSize) { - copyLargeFile(object, dstKey); - return; - } - } - - S3Object dstObject = new S3Object(dstKey); - dstObject.setServerSideEncryptionAlgorithm(serverSideEncryptionAlgorithm); - s3Service.copyObject(bucket.getName(), srcKey, bucket.getName(), - dstObject, false); - } catch (ServiceException e) { - handleException(e, srcKey); - } - } - - public void copyLargeFile(S3Object srcObject, String dstKey) throws IOException { - try { - long partCount = srcObject.getContentLength() / multipartCopyBlockSize + - (srcObject.getContentLength() % multipartCopyBlockSize > 0 ? 1 : 0); - - MultipartUpload multipartUpload = s3Service.multipartStartUpload - (bucket.getName(), dstKey, srcObject.getMetadataMap()); - - List listedParts = new ArrayList(); - for (int i = 0; i < partCount; i++) { - long byteRangeStart = i * multipartCopyBlockSize; - long byteLength; - if (i < partCount - 1) { - byteLength = multipartCopyBlockSize; - } else { - byteLength = srcObject.getContentLength() % multipartCopyBlockSize; - if (byteLength == 0) { - byteLength = multipartCopyBlockSize; - } - } - - MultipartPart copiedPart = s3Service.multipartUploadPartCopy - (multipartUpload, i + 1, bucket.getName(), srcObject.getKey(), - null, null, null, null, byteRangeStart, - byteRangeStart + byteLength - 1, null); - listedParts.add(copiedPart); - } - - Collections.reverse(listedParts); - s3Service.multipartCompleteUpload(multipartUpload, listedParts); - } catch (ServiceException e) { - handleException(e, srcObject.getKey()); - } - } - - @Override - public void purge(String prefix) throws IOException { - String key = ""; - try { - S3Object[] objects = - s3Service.listObjects(bucket.getName(), prefix, null); - for (S3Object object : objects) { - key = object.getKey(); - s3Service.deleteObject(bucket, key); - } - } catch (S3ServiceException e) { - handleException(e, key); - } - } - - @Override - public void dump() throws IOException { - StringBuilder sb = new StringBuilder("S3 Native Filesystem, "); - sb.append(bucket.getName()).append("\n"); - try { - S3Object[] objects = s3Service.listObjects(bucket.getName()); - for (S3Object object : objects) { - sb.append(object.getKey()).append("\n"); - } - } catch (S3ServiceException e) { - handleException(e); - } - System.out.println(sb); - } - - /** - * Handle any service exception by translating it into an IOException - * @param e exception - * @throws IOException exception -always - */ - private void handleException(Exception e) throws IOException { - throw processException(e, e, ""); - } - /** - * Handle any service exception by translating it into an IOException - * @param e exception - * @param key key sought from object store - - * @throws IOException exception -always - */ - private void handleException(Exception e, String key) throws IOException { - throw processException(e, e, key); - } - - /** - * Handle any service exception by translating it into an IOException - * @param thrown exception - * @param original original exception -thrown if no other translation could - * be made - * @param key key sought from object store or "" for undefined - * @return an exception to throw. If isProcessingCause==true this may be null. - */ - private IOException processException(Throwable thrown, Throwable original, - String key) { - IOException result; - if (thrown.getCause() != null) { - // recurse down - result = processException(thrown.getCause(), original, key); - } else if (thrown instanceof HttpException) { - // nested HttpException - examine error code and react - HttpException httpException = (HttpException) thrown; - String responseMessage = httpException.getResponseMessage(); - int responseCode = httpException.getResponseCode(); - String bucketName = "s3n://" + bucket.getName(); - String text = String.format("%s : %03d : %s", - bucketName, - responseCode, - responseMessage); - String filename = !key.isEmpty() ? (bucketName + "/" + key) : text; - IOException ioe; - switch (responseCode) { - case 404: - result = new FileNotFoundException(filename); - break; - case 416: // invalid range - result = new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF - +": " + filename); - break; - case 403: //forbidden - result = new AccessControlException("Permission denied" - +": " + filename); - break; - default: - result = new IOException(text); - } - result.initCause(thrown); - } else if (thrown instanceof S3ServiceException) { - S3ServiceException se = (S3ServiceException) thrown; - LOG.debug( - "S3ServiceException: {}: {} : {}", - se.getS3ErrorCode(), se.getS3ErrorMessage(), se, se); - if ("InvalidRange".equals(se.getS3ErrorCode())) { - result = new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); - } else { - result = new S3Exception(se); - } - } else if (thrown instanceof ServiceException) { - ServiceException se = (ServiceException) thrown; - LOG.debug("S3ServiceException: {}: {} : {}", - se.getErrorCode(), se.toString(), se, se); - result = new S3Exception(se); - } else if (thrown instanceof IOException) { - result = (IOException) thrown; - } else { - // here there is no exception derived yet. - // this means no inner cause, and no translation made yet. - // convert the original to an IOException -rather than just the - // exception at the base of the tree - result = new S3Exception(original); - } - - return result; - } -} http://git-wip-us.apache.org/repos/asf/hadoop/blob/62e8a5ce/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java deleted file mode 100644 index f26cdac..0000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/NativeFileSystemStore.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; - -/** - *

- * An abstraction for a key-based {@link File} store. - *

- */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -interface NativeFileSystemStore { - - void initialize(URI uri, Configuration conf) throws IOException; - - void storeFile(String key, File file, byte[] md5Hash) throws IOException; - void storeEmptyFile(String key) throws IOException; - - FileMetadata retrieveMetadata(String key) throws IOException; - InputStream retrieve(String key) throws IOException; - InputStream retrieve(String key, long byteRangeStart) throws IOException; - - PartialListing list(String prefix, int maxListingLength) throws IOException; - PartialListing list(String prefix, int maxListingLength, String priorLastKey, boolean recursive) - throws IOException; - - void delete(String key) throws IOException; - - void copy(String srcKey, String dstKey) throws IOException; - - /** - * Delete all keys with the given prefix. Used for testing. - * @throws IOException - */ - void purge(String prefix) throws IOException; - - /** - * Diagnostic method to dump state to the console. - * @throws IOException - */ - void dump() throws IOException; -} --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org