hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From fab...@apache.org
Subject hadoop git commit: S3Guard: DynamoDBMetadataStore::move() should populate ancestor directories. Contributed by Mingliang Liu.
Date Tue, 04 Apr 2017 18:36:24 GMT
Repository: hadoop
Updated Branches:
  refs/heads/HADOOP-13345 0c32daaae -> 48bda91e4


S3Guard: DynamoDBMetadataStore::move() should populate ancestor directories.  Contributed
by Mingliang Liu.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/48bda91e
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/48bda91e
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/48bda91e

Branch: refs/heads/HADOOP-13345
Commit: 48bda91e4888b6311bc327351fe9fb3512b8ec1e
Parents: 0c32daa
Author: Aaron Fabbri <fabbri@apache.org>
Authored: Tue Apr 4 11:35:42 2017 -0700
Committer: Aaron Fabbri <fabbri@apache.org>
Committed: Tue Apr 4 11:35:42 2017 -0700

----------------------------------------------------------------------
 .../fs/s3a/s3guard/DynamoDBMetadataStore.java   | 34 +++++++++-
 .../fs/s3a/s3guard/MetadataStoreTestBase.java   |  4 +-
 .../s3a/s3guard/TestDynamoDBMetadataStore.java  | 70 ++++++++++++++++++++
 3 files changed, 105 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/48bda91e/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index f598764..c3718d1 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Date;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
@@ -418,9 +419,40 @@ public class DynamoDBMetadataStore implements MetadataStore {
         pathsToCreate == null ? 0 : pathsToCreate.size());
     LOG.trace("move: pathsToDelete = {}, pathsToCreate = {}", pathsToDelete,
         pathsToCreate);
+
+    // In DynamoDBMetadataStore implementation, we assume that if a path
+    // exists, all its ancestors will also exist in the table.
+    // Following code is to maintain this invariant by putting all ancestor
+    // directories of the paths to create.
+    // ancestor paths that are not explicitly added to paths to create
+    Collection<PathMetadata> inferredPathsToCreate = null;
+    if (pathsToCreate != null) {
+      inferredPathsToCreate = new ArrayList<>(pathsToCreate);
+      // help set for fast look up; we should avoid putting duplicate paths
+      final Collection<Path> fullPathsToCreate = new HashSet<>();
+      for (PathMetadata meta : pathsToCreate) {
+        fullPathsToCreate.add(meta.getFileStatus().getPath());
+      }
+
+      for (PathMetadata meta : pathsToCreate) {
+        Preconditions.checkArgument(meta != null);
+        Path parent = meta.getFileStatus().getPath().getParent();
+        while (parent != null
+            && !parent.isRoot()
+            && !fullPathsToCreate.contains(parent)) {
+          LOG.debug("move: auto-create ancestor path {} for child path {}",
+              parent, meta.getFileStatus().getPath());
+          final FileStatus status = makeDirStatus(parent, username);
+          inferredPathsToCreate.add(new PathMetadata(status, Tristate.FALSE));
+          fullPathsToCreate.add(parent);
+          parent = parent.getParent();
+        }
+      }
+    }
+
     try {
       processBatchWriteRequest(pathToKey(pathsToDelete),
-          pathMetadataToItem(pathsToCreate));
+          pathMetadataToItem(inferredPathsToCreate));
     } catch (AmazonClientException e) {
       throw translateException("move", (String) null, e);
     }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/48bda91e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
index ae899ed..36a5cc5 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStoreTestBase.java
@@ -710,7 +710,7 @@ public abstract class MetadataStoreTestBase extends Assert {
     assertNull(pathStr + " should not be cached.", meta);
   }
 
-  private void assertCached(String pathStr) throws IOException {
+  protected void assertCached(String pathStr) throws IOException {
     Path path = strToPath(pathStr);
     PathMetadata meta = ms.get(path);
     assertNotNull(pathStr + " should be cached.", meta);
@@ -719,7 +719,7 @@ public abstract class MetadataStoreTestBase extends Assert {
   /**
    * Convenience to create a fully qualified Path from string.
    */
-  private Path strToPath(String p) {
+  Path strToPath(String p) {
     final Path path = new Path(p);
     assert path.isAbsolute();
     return path.makeQualified(contract.getFileSystem().getUri(), null);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/48bda91e/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
index ca5f95c..a841e21 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestDynamoDBMetadataStore.java
@@ -32,6 +32,7 @@ import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
 import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
 import com.amazonaws.services.dynamodbv2.model.TableDescription;
 
+import com.google.common.collect.Lists;
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.hadoop.fs.s3a.Tristate;
 
@@ -419,6 +420,75 @@ public class TestDynamoDBMetadataStore extends MetadataStoreTestBase
{
     }
   }
 
+  /**
+   * Test that when moving nested paths, all its ancestors up to destination
+   * root will also be created.
+   * Here is the directory tree before move:
+   *
+   * testMovePopulateAncestors
+   * ├── a
+   * │   └── b
+   * │       └── src
+   * │           ├── dir1
+   * │           │   └── dir2
+   * │           └── file1.txt
+   * └── c
+   *     └── d
+   *         └── dest
+   *
+   * As part of rename(a/b/src, d/c/dest), S3A will enumerate the subtree at
+   * a/b/src.  This test verifies that after the move, the new subtree at
+   * 'dest' is reachable from the root (i.e. c/ and c/d exist in the table.
+   * DynamoDBMetadataStore depends on this property to do recursive delete
+   * without a full table scan.
+   */
+  @Test
+  public void testMovePopulatesAncestors() throws IOException {
+    final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
+    final String testRoot = "/testMovePopulatesAncestors";
+    final String srcRoot = testRoot + "/a/b/src";
+    final String destRoot = testRoot + "/c/d/e/dest";
+
+    final Path nestedPath1 = strToPath(srcRoot + "/file1.txt");
+    ddbms.put(new PathMetadata(basicFileStatus(nestedPath1, 1024, false)));
+    final Path nestedPath2 = strToPath(srcRoot + "/dir1/dir2");
+    ddbms.put(new PathMetadata(basicFileStatus(nestedPath2, 0, true)));
+
+    // We don't put the destRoot path here, since put() would create ancestor
+    // entries, and we want to ensure that move() does it, instead.
+
+    // Build enumeration of src / dest paths and do the move()
+    final Collection<Path> fullSourcePaths = Lists.newArrayList(
+        strToPath(srcRoot),
+        strToPath(srcRoot + "/dir1"),
+        strToPath(srcRoot + "/dir1/dir2"),
+        strToPath(srcRoot + "/file1.txt")
+    );
+    final Collection<PathMetadata> pathsToCreate = Lists.newArrayList(
+        new PathMetadata(basicFileStatus(strToPath(destRoot),
+            0, true)),
+        new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1"),
+            0, true)),
+        new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1/dir2"),
+            0, true)),
+        new PathMetadata(basicFileStatus(strToPath(destRoot + "/file1.txt"),
+            1024, false))
+    );
+
+    ddbms.move(fullSourcePaths, pathsToCreate);
+
+    // assert that all the ancestors should have been populated automatically
+    assertCached(testRoot + "/c");
+    assertCached(testRoot + "/c/d");
+    assertCached(testRoot + "/c/d/e");
+    assertCached(destRoot /* /c/d/e/dest */);
+
+    // Also check moved files while we're at it
+    assertCached(destRoot + "/dir1");
+    assertCached(destRoot + "/dir1/dir2");
+    assertCached(destRoot + "/file1.txt");
+  }
+
   @Test
   public void testProvisionTable() throws IOException {
     final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message