Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 07FB5200C4E for ; Fri, 21 Apr 2017 22:40:31 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 06814160B97; Fri, 21 Apr 2017 20:40:31 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A42E8160B86 for ; Fri, 21 Apr 2017 22:40:29 +0200 (CEST) Received: (qmail 28513 invoked by uid 500); 21 Apr 2017 20:40:28 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 28503 invoked by uid 99); 21 Apr 2017 20:40:28 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 21 Apr 2017 20:40:28 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9526BF21A4; Fri, 21 Apr 2017 20:40:28 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: liuml07@apache.org To: common-commits@hadoop.apache.org Message-Id: <24c3acbee61f402e9c7292ecb28fe016@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: HADOOP-14266. S3Guard: S3AFileSystem::listFiles() to employ MetadataStore. Contributed by Mingliang Liu Date: Fri, 21 Apr 2017 20:40:28 +0000 (UTC) archived-at: Fri, 21 Apr 2017 20:40:31 -0000 Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 d4fd991a9 -> 1b27f15d1 HADOOP-14266. S3Guard: S3AFileSystem::listFiles() to employ MetadataStore. Contributed by Mingliang Liu Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1b27f15d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1b27f15d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1b27f15d Branch: refs/heads/HADOOP-13345 Commit: 1b27f15d123564200cf9730a051055d72acb4866 Parents: d4fd991 Author: Mingliang Liu Authored: Thu Apr 13 13:35:58 2017 -0700 Committer: Mingliang Liu Committed: Fri Apr 21 13:34:30 2017 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/fs/s3a/Listing.java | 86 ++++++++----- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 55 +++++---- .../fs/s3a/s3guard/DescendantsIterator.java | 35 +++--- .../fs/s3a/s3guard/DynamoDBMetadataStore.java | 2 +- .../apache/hadoop/fs/s3a/s3guard/S3Guard.java | 9 +- .../fs/s3a/ITestS3GuardListConsistency.java | 121 ++++++++++++++++++- .../fs/s3a/s3guard/MetadataStoreTestBase.java | 2 +- 7 files changed, 231 insertions(+), 79 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java index c9366af..e91f2ec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java @@ -53,7 +53,6 @@ public class Listing { private final S3AFileSystem owner; private static final Logger LOG = S3AFileSystem.LOG; - private static final FileStatus[] EMPTY_FILE_STATUS_ARRAY = new FileStatus[0]; public Listing(S3AFileSystem owner) { this.owner = owner; @@ -64,21 +63,35 @@ public class Listing { * a given status filter. * * @param fileStatuses the provided list of file status. NO remote calls. - * @param filter file status filter + * @param filter file path filter on which paths to accept + * @param acceptor the file status acceptor * @return the file status iterator */ - ProvidedLocatedFileStatusIterator createProvidedLocatedFileStatusIterator( - FileStatus[] fileStatuses, ProvidedFileStatusFilter filter) { - return new ProvidedLocatedFileStatusIterator(fileStatuses, filter); + ProvidedFileStatusIterator createProvidedFileStatusIterator( + FileStatus[] fileStatuses, PathFilter filter, + FileStatusAcceptor acceptor) { + return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor); } + /** + * Create a FileStatus iterator against a path, with a given list object + * request. + * + * @param listPath path of the listing + * @param request initial request to make + * @param filter the filter on which paths to accept + * @param acceptor the class/predicate to decide which entries to accept + * in the listing based on the full file status. + * @return the iterator + * @throws IOException IO Problems + */ FileStatusListingIterator createFileStatusListingIterator( Path listPath, ListObjectsRequest request, PathFilter filter, Listing.FileStatusAcceptor acceptor) throws IOException { return createFileStatusListingIterator(listPath, request, filter, acceptor, - EMPTY_FILE_STATUS_ARRAY); + null); } /** @@ -99,7 +112,7 @@ public class Listing { ListObjectsRequest request, PathFilter filter, Listing.FileStatusAcceptor acceptor, - FileStatus[] providedStatus) throws IOException { + RemoteIterator providedStatus) throws IOException { return new FileStatusListingIterator( new ObjectListingIterator(listPath, request), filter, @@ -140,6 +153,13 @@ public class Listing { * should be generated.) */ boolean accept(Path keyPath, String commonPrefix); + + /** + * Predicate to decide whether or not to accept a file status. + * @param status file status containing file path information + * @return true if the status is accepted else false + */ + boolean accept(FileStatus status); } /** @@ -147,9 +167,9 @@ public class Listing { * value. * * If the status value is null, the iterator declares that it has no data. - * This iterator is used to handle {@link listStatus()} calls where the path - * handed in refers to a file, not a directory: this is the iterator - * returned. + * This iterator is used to handle {@link S3AFileSystem#listStatus} calls + * where the path handed in refers to a file, not a directory: this is the + * iterator returned. */ static final class SingleStatusRemoteIterator implements RemoteIterator { @@ -201,13 +221,6 @@ public class Listing { } /** - * Filter out a FileStatus object, unlike {@link PathFilter} against a path. - */ - interface ProvidedFileStatusFilter { - boolean accept(FileStatus status); - } - - /** * This wraps up a provided non-null list of file status as a remote iterator. * * It firstly filters the provided list and later {@link #next} call will get @@ -216,18 +229,18 @@ public class Listing { * * There is no remote data to fetch. */ - class ProvidedLocatedFileStatusIterator - implements RemoteIterator { + static class ProvidedFileStatusIterator + implements RemoteIterator { private final ArrayList filteredStatusList; private int index = 0; - ProvidedLocatedFileStatusIterator(FileStatus[] fileStatuses, - ProvidedFileStatusFilter filter) { + ProvidedFileStatusIterator(FileStatus[] fileStatuses, PathFilter filter, + FileStatusAcceptor acceptor) { Preconditions.checkArgument(fileStatuses != null, "Null status list!"); filteredStatusList = new ArrayList<>(fileStatuses.length); for (FileStatus status : fileStatuses) { - if (filter.accept(status)) { + if (filter.accept(status.getPath()) && acceptor.accept(status)) { filteredStatusList.add(status); } } @@ -240,8 +253,8 @@ public class Listing { } @Override - public LocatedFileStatus next() throws IOException { - return owner.toLocatedFileStatus(filteredStatusList.get(index++)); + public FileStatus next() throws IOException { + return filteredStatusList.get(index++); } } @@ -256,7 +269,7 @@ public class Listing { * iterator can declare that there is more data available. * * The need to filter the results precludes the iterator from simply - * declaring that if the {@link S3AFileSystem.ObjectListingIterator#hasNext()} + * declaring that if the {@link ObjectListingIterator#hasNext()} * is true then there are more results. Instead the next batch of results must * be retrieved and filtered. * @@ -301,13 +314,14 @@ public class Listing { FileStatusListingIterator(ObjectListingIterator source, PathFilter filter, FileStatusAcceptor acceptor, - FileStatus[] providedStatus) throws IOException { + RemoteIterator providedStatus) throws IOException { this.source = source; this.filter = filter; this.acceptor = acceptor; - this.providedStatus = new HashSet<>(providedStatus.length); - for (FileStatus status : providedStatus) { - if (filter.accept(status.getPath())) { + this.providedStatus = new HashSet<>(); + for (; providedStatus != null && providedStatus.hasNext();) { + final FileStatus status = providedStatus.next(); + if (filter.accept(status.getPath()) && acceptor.accept(status)) { this.providedStatus.add(status); } } @@ -367,7 +381,7 @@ public class Listing { /** * Try to retrieve another batch. * Note that for the initial batch, - * {@link S3AFileSystem.ObjectListingIterator} does not generate a request; + * {@link ObjectListingIterator} does not generate a request; * it simply returns the initial set. * * @return true if a new batch was created. @@ -467,7 +481,7 @@ public class Listing { * instance. * * 2. Second and later invocations will continue the ongoing listing, - * calling {@link #continueListObjects(ObjectListing)} to request the next + * calling {@link S3AFileSystem#continueListObjects} to request the next * batch of results. * * 3. The {@link #hasNext()} predicate returns true for the initial call, @@ -619,6 +633,11 @@ public class Listing { public boolean accept(Path keyPath, String prefix) { return false; } + + @Override + public boolean accept(FileStatus status) { + return (status != null) && status.isFile(); + } } /** @@ -690,6 +709,11 @@ public class Listing { public boolean accept(Path keyPath, String prefix) { return !keyPath.equals(qualifiedPath); } + + @Override + public boolean accept(FileStatus status) { + return (status != null) && !status.getPath().equals(qualifiedPath); + } } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 29f84b0..d16811b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -93,6 +93,7 @@ import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3a.s3guard.DescendantsIterator; import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata; import org.apache.hadoop.fs.s3a.s3guard.MetadataStore; import org.apache.hadoop.fs.s3a.s3guard.PathMetadata; @@ -2443,15 +2444,28 @@ public class S3AFileSystem extends FileSystem { String delimiter = recursive ? null : "/"; LOG.debug("Requesting all entries under {} with delimiter '{}'", key, delimiter); + final RemoteIterator cachedFilesIterator; + if (recursive) { + final PathMetadata pm = metadataStore.get(path, true); + cachedFilesIterator = new DescendantsIterator(metadataStore, pm); + } else { + final DirListingMetadata meta = metadataStore.listChildren(path); + cachedFilesIterator = listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor); + if (allowAuthoritative && meta != null && meta.isAuthoritative()) { + return listing.createLocatedFileStatusIterator(cachedFilesIterator); + } + } return listing.createLocatedFileStatusIterator( listing.createFileStatusListingIterator(path, createListObjectsRequest(key, delimiter), - ACCEPT_ALL, acceptor)); + ACCEPT_ALL, + acceptor, + cachedFilesIterator)); } } catch (AmazonClientException e) { // TODO s3guard: // 1. retry on file not found exception - // 2. merge listing with MetadataStore's view of directory tree throw translateException("listFiles", path, e); } } @@ -2496,28 +2510,21 @@ public class S3AFileSystem extends FileSystem { filter.accept(path) ? toLocatedFileStatus(fileStatus) : null); } else { // directory: trigger a lookup - final DirListingMetadata dirMeta = metadataStore.listChildren(path); - if (allowAuthoritative - && dirMeta != null - && dirMeta.isAuthoritative()) { - return listing.createProvidedLocatedFileStatusIterator( - S3Guard.dirMetaToStatuses(dirMeta), - new Listing.ProvidedFileStatusFilter() { - @Override - public boolean accept(FileStatus status) { - return filter.accept(status.getPath()); - } - }); - } - - String key = maybeAddTrailingSlash(pathToKey(path)); - return listing.createLocatedFileStatusIterator( - listing.createFileStatusListingIterator(path, - createListObjectsRequest(key, "/"), - filter, - new Listing.AcceptAllButSelfAndS3nDirs(path), - S3Guard.dirMetaToStatuses(dirMeta) - )); + final String key = maybeAddTrailingSlash(pathToKey(path)); + final Listing.FileStatusAcceptor acceptor = + new Listing.AcceptAllButSelfAndS3nDirs(path); + final DirListingMetadata meta = metadataStore.listChildren(path); + final RemoteIterator cachedFileStatusIterator = + listing.createProvidedFileStatusIterator( + S3Guard.dirMetaToStatuses(meta), filter, acceptor); + return (allowAuthoritative && meta != null && meta.isAuthoritative()) + ? listing.createLocatedFileStatusIterator(cachedFileStatusIterator) + : listing.createLocatedFileStatusIterator( + listing.createFileStatusListingIterator(path, + createListObjectsRequest(key, "/"), + filter, + acceptor, + cachedFileStatusIterator)); } } catch (AmazonClientException e) { throw translateException("listLocatedStatus", path, e); http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java index afd3266..d008972 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DescendantsIterator.java @@ -27,6 +27,7 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -42,7 +43,6 @@ import org.apache.hadoop.fs.RemoteIterator; * file system structure: * *
- * {@code
  * /dir1
  * |-- dir2
  * |   |-- file1
@@ -53,12 +53,10 @@ import org.apache.hadoop.fs.RemoteIterator;
  *     |-- dir5
  *     |   `-- file4
  *     `-- dir6
- * }
  * 
* * Consider this code sample: *
- * {@code
  * final PathMetadata dir1 = get(new Path("/dir1"));
  * for (DescendantsIterator descendants = new DescendantsIterator(dir1);
  *     descendants.hasNext(); ) {
@@ -66,12 +64,10 @@ import org.apache.hadoop.fs.RemoteIterator;
  *   System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
  *       status.getPath());
  * }
- * }
  * 
* * The output is: *
- * {@code
  * D /dir1
  * D /dir1/dir2
  * D /dir1/dir3
@@ -82,12 +78,11 @@ import org.apache.hadoop.fs.RemoteIterator;
  * F /dir1/dir3/dir4/file3
  * F /dir1/dir3/dir5/file4
  * D /dir1/dir3/dir6
- * }
  * 
*/ @InterfaceAudience.Private @InterfaceStability.Evolving -class DescendantsIterator implements RemoteIterator { +public class DescendantsIterator implements RemoteIterator { private final MetadataStore metadataStore; private final Queue queue = new LinkedList<>(); @@ -97,22 +92,24 @@ class DescendantsIterator implements RemoteIterator { * * @param ms the associated {@link MetadataStore} * @param meta base path for descendants iteration, which will be the first - * path returned during iteration (except root) + * returned during iteration (except root). Null makes empty iterator. + * @throws IOException if errors happen during metadata store listing */ - DescendantsIterator(MetadataStore ms, PathMetadata meta) + public DescendantsIterator(MetadataStore ms, PathMetadata meta) throws IOException { Preconditions.checkNotNull(ms); - Preconditions.checkNotNull(meta); this.metadataStore = ms; - final Path path = meta.getFileStatus().getPath(); - if (path.isRoot()) { - final DirListingMetadata rootListing = ms.listChildren(path); - if (rootListing != null) { - queue.addAll(rootListing.getListing()); + if (meta != null) { + final Path path = meta.getFileStatus().getPath(); + if (path.isRoot()) { + final DirListingMetadata rootListing = ms.listChildren(path); + if (rootListing != null) { + queue.addAll(rootListing.getListing()); + } + } else { + queue.add(meta); } - } else { - queue.add(meta); } } @@ -122,7 +119,7 @@ class DescendantsIterator implements RemoteIterator { } @Override - public PathMetadata next() throws IOException { + public FileStatus next() throws IOException { if (!hasNext()) { throw new NoSuchElementException("No more descendants."); } @@ -132,6 +129,6 @@ class DescendantsIterator implements RemoteIterator { final Path path = next.getFileStatus().getPath(); queue.addAll(metadataStore.listChildren(path).getListing()); } - return next; + return next.getFileStatus(); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java index 2b28c58..71f2497 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java @@ -312,7 +312,7 @@ public class DynamoDBMetadataStore implements MetadataStore { for (DescendantsIterator desc = new DescendantsIterator(this, meta); desc.hasNext();) { - delete(desc.next().getFileStatus().getPath()); + delete(desc.next().getPath()); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java index 64afa93..a393bfb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -58,6 +58,7 @@ public final class S3Guard { static final Class S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT = DynamoDBClientFactory.DefaultDynamoDBClientFactory.class; + private static final FileStatus[] EMPTY_LISTING = new FileStatus[0]; // Utility class. All static functions. private S3Guard() { } @@ -133,9 +134,15 @@ public final class S3Guard { return status; } + /** + * Convert the data of a directory listing to an array of {@link FileStatus} + * entries. If the listing is null an empty array is returned. + * @param dirMeta directory listing -may be null + * @return a possibly-empty array of file status entries + */ public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { if (dirMeta == null) { - return new FileStatus[0]; + return EMPTY_LISTING; } Collection listing = dirMeta.getListing(); http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java index cb26a15..47d88073 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3GuardListConsistency.java @@ -33,9 +33,13 @@ import org.junit.Test; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; import java.util.List; +import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING; /** * Test S3Guard list consistency feature by injecting delayed listObjects() @@ -62,7 +66,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client Path inconsistentPath = - path("a/b/dir3-" + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING); + path("a/b/dir3-" + DELAY_KEY_SUBSTRING); Path[] testDirs = {path("a/b/dir1"), path("a/b/dir2"), @@ -123,7 +127,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed // in listObjects() results via InconsistentS3Client testDirs.add(path("doTestConsistentListLocatedStatus/dir-" + index - + InconsistentAmazonS3Client.DELAY_KEY_SUBSTRING)); + + DELAY_KEY_SUBSTRING)); } for (Path path : testDirs) { @@ -148,6 +152,119 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase { } } + /** + * Similar to {@link #testConsistentListStatus()}, this tests that the S3AFS + * listFiles() call will return consistent file list. + */ + @Test + public void testConsistentListFiles() throws Exception { + final S3AFileSystem fs = getFileSystem(); + // This test will fail if NullMetadataStore (the default) is configured: + // skip it. + Assume.assumeTrue(fs.hasMetadataStore()); + + final int[] numOfPaths = {0, 1, 2}; + for (int dirNum : numOfPaths) { + for (int normalFile : numOfPaths) { + for (int delayedFile : numOfPaths) { + for (boolean recursive : new boolean[] {true, false}) { + doTestListFiles(fs, dirNum, normalFile, delayedFile, recursive); + } + } + } + } + } + + /** + * Helper method to implement the tests of consistent listFiles(). + * + * The file structure has dirNum subdirectories, and each directory (including + * the test base directory itself) has normalFileNum normal files and + * delayedFileNum delayed files. + * + * @param fs The S3 file system from contract + * @param dirNum number of subdirectories + * @param normalFileNum number files in each directory without delay to list + * @param delayedFileNum number files in each directory with delay to list + * @param recursive listFiles recursively if true + * @throws Exception if any unexpected error + */ + private void doTestListFiles(S3AFileSystem fs, int dirNum, int normalFileNum, + int delayedFileNum, boolean recursive) throws Exception { + describe("Testing dirNum=%d, normalFile=%d, delayedFile=%d, " + + "recursive=%s", dirNum, normalFileNum, delayedFileNum, recursive); + final Path baseTestDir = path("doTestListFiles-" + dirNum + "-" + + normalFileNum + "-" + delayedFileNum + "-" + recursive); + // delete the old test path (if any) so that when we call mkdirs() later, + // the to delay sub directories will be tracked via putObject() request. + fs.delete(baseTestDir, true); + + // make subdirectories (if any) + final List testDirs = new ArrayList<>(dirNum + 1); + assertTrue(fs.mkdirs(baseTestDir)); + testDirs.add(baseTestDir); + for (int i = 0; i < dirNum; i++) { + final Path subdir = path(baseTestDir + "/dir-" + i); + assertTrue(fs.mkdirs(subdir)); + testDirs.add(subdir); + } + + final Collection fileNames + = new ArrayList<>(normalFileNum + delayedFileNum); + int index = 0; + for (; index < normalFileNum; index++) { + fileNames.add("file-" + index); + } + for (; index < normalFileNum + delayedFileNum; index++) { + // Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed + // in listObjects() results via InconsistentS3Client + fileNames.add("file-" + index + "-" + DELAY_KEY_SUBSTRING); + } + + // create files under each test directory + for (Path dir : testDirs) { + for (String fileName : fileNames) { + writeTextFile(fs, new Path(dir, fileName), "I, " + fileName, false); + } + } + + // this should return the union data from S3 and MetadataStore + final RemoteIterator statusIterator + = fs.listFiles(baseTestDir, recursive); + final Collection listedFiles = new HashSet<>(); + for (; statusIterator.hasNext();) { + final FileStatus status = statusIterator.next(); + assertTrue("FileStatus " + status + " is not a file!", status.isFile()); + listedFiles.add(status.getPath()); + } + LOG.info("S3AFileSystem::listFiles('{}', {}) -> {}", + baseTestDir, recursive, listedFiles); + + // This should fail without S3Guard, and succeed with it because part of the + // files to list are delaying visibility + if (!recursive) { + // in this case only the top level files are listed + assertEquals("Unexpected number of files returned by listFiles() call", + normalFileNum + delayedFileNum, listedFiles.size()); + verifyFileIsListed(listedFiles, baseTestDir, fileNames); + } else { + assertEquals("Unexpected number of files returned by listFiles() call", + testDirs.size() * (normalFileNum + delayedFileNum), + listedFiles.size()); + for (Path dir : testDirs) { + verifyFileIsListed(listedFiles, dir, fileNames); + } + } + } + + private static void verifyFileIsListed(Collection listedFiles, + Path currentDir, Collection fileNames) { + for (String fileName : fileNames) { + final Path file = new Path(currentDir, fileName); + assertTrue(file + " should have been listed", listedFiles.contains(file)); + } + } + @Test public void testListStatusWriteBack() throws Exception { Assume.assumeTrue(getFileSystem().hasMetadataStore()); http://git-wip-us.apache.org/repos/asf/hadoop/blob/1b27f15d/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java index 88df45e..99acf6e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java @@ -164,7 +164,7 @@ public abstract class MetadataStoreTestBase extends Assert { final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/")); for (DescendantsIterator desc = new DescendantsIterator(ms, rootMeta); desc.hasNext();) { - final Path p = desc.next().getFileStatus().getPath(); + final Path p = desc.next().getPath(); actual.add(Path.getPathWithoutSchemeAndAuthority(p).toString()); } LOG.info("We got {} by iterating DescendantsIterator", actual); --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org