hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ste...@apache.org
Subject [1/2] hadoop git commit: HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh Balamohan.
Date Thu, 29 Sep 2016 16:01:20 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 c52ad9ee8 -> 896df3f55
  refs/heads/trunk a1b8251bf -> ee0c722dc


HADOOP-13164 Optimize S3AFileSystem::deleteUnnecessaryFakeDirectories. Contributed by Rajesh
Balamohan.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/896df3f5
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/896df3f5
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/896df3f5

Branch: refs/heads/branch-2
Commit: 896df3f55abf211768cefaa9be8bad2e0b26a2f3
Parents: c52ad9e
Author: Steve Loughran <stevel@apache.org>
Authored: Thu Sep 29 16:59:33 2016 +0100
Committer: Steve Loughran <stevel@apache.org>
Committed: Thu Sep 29 17:00:34 2016 +0100

----------------------------------------------------------------------
 .../fs/contract/AbstractFSContractTestBase.java |  2 +-
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 65 +++++++--------
 .../hadoop/fs/s3a/S3AInstrumentation.java       | 10 +++
 .../org/apache/hadoop/fs/s3a/Statistic.java     |  4 +
 .../fs/s3a/ITestS3AFileOperationCost.java       | 85 ++++++++++++++++++++
 .../org/apache/hadoop/fs/s3a/S3ATestUtils.java  | 13 ++-
 6 files changed, 144 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/896df3f5/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java
index baea968..b2e68f5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java
@@ -359,7 +359,7 @@ public abstract class AbstractFSContractTestBase extends Assert
     assertEquals(text + " wrong read result " + result, -1, result);
   }
 
-  boolean rename(Path src, Path dst) throws IOException {
+  protected boolean rename(Path src, Path dst) throws IOException {
     return getFileSystem().rename(src, dst);
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/896df3f5/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 87bf774..b22f8c6 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -676,7 +676,7 @@ public class S3AFileSystem extends FileSystem {
           copyFile(summary.getKey(), newDstKey, summary.getSize());
 
           if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
-            removeKeys(keysToDelete, true);
+            removeKeys(keysToDelete, true, false);
           }
         }
 
@@ -684,7 +684,7 @@ public class S3AFileSystem extends FileSystem {
           objects = continueListObjects(objects);
         } else {
           if (!keysToDelete.isEmpty()) {
-            removeKeys(keysToDelete, false);
+            removeKeys(keysToDelete, false, false);
           }
           break;
         }
@@ -932,17 +932,25 @@ public class S3AFileSystem extends FileSystem {
    * @param keysToDelete collection of keys to delete on the s3-backend
    * @param clearKeys clears the keysToDelete-list after processing the list
    *            when set to true
+   * @param deleteFakeDir indicates whether this is for deleting fake dirs
    */
   private void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
-          boolean clearKeys) throws AmazonClientException {
+      boolean clearKeys, boolean deleteFakeDir) throws AmazonClientException {
+    if (keysToDelete.isEmpty()) {
+      // no keys
+      return;
+    }
     if (enableMultiObjectsDelete) {
       deleteObjects(new DeleteObjectsRequest(bucket).withKeys(keysToDelete));
-      instrumentation.fileDeleted(keysToDelete.size());
     } else {
       for (DeleteObjectsRequest.KeyVersion keyVersion : keysToDelete) {
         deleteObject(keyVersion.getKey());
       }
+    }
+    if (!deleteFakeDir) {
       instrumentation.fileDeleted(keysToDelete.size());
+    } else {
+      instrumentation.fakeDirsDeleted(keysToDelete.size());
     }
     if (clearKeys) {
       keysToDelete.clear();
@@ -1025,7 +1033,7 @@ public class S3AFileSystem extends FileSystem {
             LOG.debug("Got object to delete {}", summary.getKey());
 
             if (keys.size() == MAX_ENTRIES_TO_DELETE) {
-              removeKeys(keys, true);
+              removeKeys(keys, true, false);
             }
           }
 
@@ -1033,7 +1041,7 @@ public class S3AFileSystem extends FileSystem {
             objects = continueListObjects(objects);
           } else {
             if (!keys.isEmpty()) {
-              removeKeys(keys, false);
+              removeKeys(keys, false, false);
             }
             break;
           }
@@ -1512,37 +1520,30 @@ public class S3AFileSystem extends FileSystem {
   /**
    * Delete mock parent directories which are no longer needed.
    * This code swallows IO exceptions encountered
-   * @param f path
-   */
-  private void deleteUnnecessaryFakeDirectories(Path f) {
-    while (true) {
-      String key = "";
-      try {
-        key = pathToKey(f);
-        if (key.isEmpty()) {
-          break;
-        }
-
-        S3AFileStatus status = getFileStatus(f);
-
-        if (status.isDirectory() && status.isEmptyDirectory()) {
-          LOG.debug("Deleting fake directory {}/", key);
-          deleteObject(key + "/");
+   * @param path path
+   */
+  private void deleteUnnecessaryFakeDirectories(Path path) {
+    List<DeleteObjectsRequest.KeyVersion> keysToRemove = new ArrayList<>();
+    while (!path.isRoot()) {
+      String key = pathToKey(path);
+      key = (key.endsWith("/")) ? key : (key + "/");
+      keysToRemove.add(new DeleteObjectsRequest.KeyVersion(key));
+      path = path.getParent();
+    }
+    try {
+      removeKeys(keysToRemove, false, true);
+    } catch(AmazonClientException e) {
+      instrumentation.errorIgnored();
+      if (LOG.isDebugEnabled()) {
+        StringBuilder sb = new StringBuilder();
+        for(DeleteObjectsRequest.KeyVersion kv : keysToRemove) {
+          sb.append(kv.getKey()).append(",");
         }
-      } catch (IOException | AmazonClientException e) {
-        LOG.debug("While deleting key {} ", key, e);
-        instrumentation.errorIgnored();
+        LOG.debug("While deleting keys {} ", sb.toString(), e);
       }
-
-      if (f.isRoot()) {
-        break;
-      }
-
-      f = f.getParent();
     }
   }
 
-
   private void createFakeDirectory(final String objectName)
       throws AmazonClientException, AmazonServiceException,
       InterruptedIOException {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/896df3f5/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
index b4c4063..26b5b51 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java
@@ -75,6 +75,7 @@ public class S3AInstrumentation {
   private final MutableCounterLong numberOfFilesCopied;
   private final MutableCounterLong bytesOfFilesCopied;
   private final MutableCounterLong numberOfFilesDeleted;
+  private final MutableCounterLong numberOfFakeDirectoryDeletes;
   private final MutableCounterLong numberOfDirectoriesCreated;
   private final MutableCounterLong numberOfDirectoriesDeleted;
   private final Map<String, MutableCounterLong> streamMetrics =
@@ -135,6 +136,7 @@ public class S3AInstrumentation {
     numberOfFilesCopied = counter(FILES_COPIED);
     bytesOfFilesCopied = counter(FILES_COPIED_BYTES);
     numberOfFilesDeleted = counter(FILES_DELETED);
+    numberOfFakeDirectoryDeletes = counter(FAKE_DIRECTORIES_DELETED);
     numberOfDirectoriesCreated = counter(DIRECTORIES_CREATED);
     numberOfDirectoriesDeleted = counter(DIRECTORIES_DELETED);
     ignoredErrors = counter(IGNORED_ERRORS);
@@ -296,6 +298,14 @@ public class S3AInstrumentation {
   }
 
   /**
+   * Indicate that fake directory request was made.
+   * @param count number of directory entries included in the delete request.
+   */
+  public void fakeDirsDeleted(int count) {
+    numberOfFakeDirectoryDeletes.incr(count);
+  }
+
+  /**
    * Indicate that S3A created a directory.
    */
   public void directoryCreated() {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/896df3f5/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
index cbc34d6..d84a355 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
@@ -42,6 +42,10 @@ public enum Statistic {
       "Total number of files created through the object store."),
   FILES_DELETED("files_deleted",
       "Total number of files deleted from the object store."),
+  FAKE_DIRECTORIES_CREATED("fake_directories_created",
+      "Total number of fake directory entries created in the object store."),
+  FAKE_DIRECTORIES_DELETED("fake_directories_deleted",
+      "Total number of fake directory deletes submitted to object store."),
   IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
   INVOCATION_COPY_FROM_LOCAL_FILE(CommonStatisticNames.OP_COPY_FROM_LOCAL_FILE,
       "Calls of copyFromLocalFile()"),

http://git-wip-us.apache.org/repos/asf/hadoop/blob/896df3f5/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
index 2a6ba0c..f19ea95 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java
@@ -188,4 +188,89 @@ public class ITestS3AFileOperationCost extends AbstractFSContractTestBase
{
       tmpFile.delete();
     }
   }
+
+  private void reset(MetricDiff... diffs) {
+    for (MetricDiff diff : diffs) {
+      diff.reset();
+    }
+  }
+
+  @Test
+  public void testFakeDirectoryDeletion() throws Throwable {
+    describe("Verify whether create file works after renaming a file. "
+        + "In S3, rename deletes any fake directories as a part of "
+        + "clean up activity");
+    S3AFileSystem fs = getFileSystem();
+    Path srcBaseDir = path("src");
+    mkdirs(srcBaseDir);
+    MetricDiff deleteRequests =
+        new MetricDiff(fs, Statistic.OBJECT_DELETE_REQUESTS);
+    MetricDiff directoriesDeleted =
+        new MetricDiff(fs, Statistic.DIRECTORIES_DELETED);
+    MetricDiff fakeDirectoriesDeleted =
+        new MetricDiff(fs, Statistic.FAKE_DIRECTORIES_DELETED);
+    MetricDiff directoriesCreated =
+        new MetricDiff(fs, Statistic.DIRECTORIES_CREATED);
+
+    Path srcDir = new Path(srcBaseDir, "1/2/3/4/5/6");
+    Path srcFilePath = new Path(srcDir, "source.txt");
+    int srcDirDepth = directoriesInPath(srcDir);
+    // one dir created, one removed
+    mkdirs(srcDir);
+    String state = "after mkdir(srcDir)";
+    directoriesCreated.assertDiffEquals(state, 1);
+/*  TODO: uncomment once HADOOP-13222 is in
+    deleteRequests.assertDiffEquals(state, 1);
+    directoriesDeleted.assertDiffEquals(state, 0);
+    fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
+*/
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    // creating a file should trigger demise of the src dir
+    touch(fs, srcFilePath);
+    state = "after touch(fs, srcFilePath)";
+    deleteRequests.assertDiffEquals(state, 1);
+    directoriesCreated.assertDiffEquals(state, 0);
+    directoriesDeleted.assertDiffEquals(state, 0);
+    fakeDirectoriesDeleted.assertDiffEquals(state, srcDirDepth);
+
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    Path destBaseDir = path("dest");
+    Path destDir = new Path(destBaseDir, "1/2/3/4/5/6");
+    Path destFilePath = new Path(destDir, "dest.txt");
+    mkdirs(destDir);
+    state = "after mkdir(destDir)";
+
+    int destDirDepth = directoriesInPath(destDir);
+    directoriesCreated.assertDiffEquals(state, 1);
+/*  TODO: uncomment once HADOOP-13222 is in
+    deleteRequests.assertDiffEquals(state,1);
+    directoriesDeleted.assertDiffEquals(state,0);
+    fakeDirectoriesDeleted.assertDiffEquals(state,destDirDepth);
+*/
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    fs.rename(srcFilePath, destFilePath);
+    state = "after rename(srcFilePath, destFilePath)";
+    directoriesCreated.assertDiffEquals(state, 1);
+    // one for the renamed file, one for the parent
+    deleteRequests.assertDiffEquals(state, 2);
+    directoriesDeleted.assertDiffEquals(state, 0);
+    fakeDirectoriesDeleted.assertDiffEquals(state, destDirDepth);
+
+    reset(deleteRequests, directoriesCreated, directoriesDeleted,
+        fakeDirectoriesDeleted);
+
+    assertIsFile(destFilePath);
+    assertIsDirectory(srcDir);
+  }
+
+  private int directoriesInPath(Path path) {
+    return path.isRoot() ? 0 : 1 + directoriesInPath(path.getParent());
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/896df3f5/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
index e45db48..95f6d4b 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
@@ -298,14 +298,23 @@ public class S3ATestUtils {
 
     /**
      * Assert that the value of {@link #diff()} matches that expected.
+     * @param message message to print; metric name is appended
      * @param expected expected value.
      */
-    public void assertDiffEquals(long expected) {
-      Assert.assertEquals("Count of " + this,
+    public void assertDiffEquals(String message, long expected) {
+      Assert.assertEquals(message + ": " + statistic.getSymbol(),
           expected, diff());
     }
 
     /**
+     * Assert that the value of {@link #diff()} matches that expected.
+     * @param expected expected value.
+     */
+    public void assertDiffEquals(long expected) {
+      assertDiffEquals("Count of " + this, expected);
+    }
+
+    /**
      * Assert that the value of {@link #diff()} matches that of another
      * instance.
      * @param that the other metric diff instance.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message