hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ste...@apache.org
Subject hadoop git commit: HADOOP-14633. S3Guard: optimize create codepath. Contributed by Aaron Fabbri.
Date Wed, 09 Aug 2017 13:33:49 GMT
Repository: hadoop
Updated Branches:
  refs/heads/HADOOP-13345 7bd0284b9 -> 884ae451c


HADOOP-14633. S3Guard: optimize create codepath.
Contributed by Aaron Fabbri.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/884ae451
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/884ae451
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/884ae451

Branch: refs/heads/HADOOP-13345
Commit: 884ae451c67a9342d9939db2901181979d3ad267
Parents: 7bd0284
Author: Steve Loughran <stevel@apache.org>
Authored: Wed Aug 9 14:21:20 2017 +0100
Committer: Steve Loughran <stevel@apache.org>
Committed: Wed Aug 9 14:21:20 2017 +0100

----------------------------------------------------------------------
 .../apache/hadoop/fs/s3a/s3guard/S3Guard.java   | 60 ++++++++------
 .../fs/s3a/scale/ITestS3ACreatePerformance.java | 86 ++++++++++++++++++++
 2 files changed, 123 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/884ae451/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
index 51c370c..d8ae3fb 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -274,29 +274,43 @@ public final class S3Guard {
      *    true
      */
     FileStatus prevStatus = null;
-    // Iterate from leaf to root
-    for (int i = 0; i < dirs.size(); i++) {
-      boolean isLeaf = (prevStatus == null);
-      Path f = dirs.get(i);
-      assertQualified(f);
-      FileStatus status = S3AUtils.createUploadFileStatus(f, true, 0, 0, owner);
-      Collection<PathMetadata> children;
-      if (isLeaf) {
-        children = DirListingMetadata.EMPTY_DIR;
-      } else {
-        children = new ArrayList<>(1);
-        children.add(new PathMetadata(prevStatus));
-      }
-      DirListingMetadata dirMeta =
-          new DirListingMetadata(f, children, authoritative);
-      try {
-        ms.put(dirMeta);
-        ms.put(new PathMetadata(status));
-      } catch (IOException ioe) {
-        LOG.error("MetadataStore#put() failure:", ioe);
-        return;
+
+    // Use new batched put to reduce round trips.
+    List<PathMetadata> pathMetas = new ArrayList<>(dirs.size());
+
+    try {
+      // Iterate from leaf to root
+      for (int i = 0; i < dirs.size(); i++) {
+        boolean isLeaf = (prevStatus == null);
+        Path f = dirs.get(i);
+        assertQualified(f);
+        FileStatus status =
+            S3AUtils.createUploadFileStatus(f, true, 0, 0, owner);
+
+        // We only need to put a DirListingMetadata if we are setting
+        // authoritative bit
+        DirListingMetadata dirMeta = null;
+        if (authoritative) {
+          Collection<PathMetadata> children;
+          if (isLeaf) {
+            children = DirListingMetadata.EMPTY_DIR;
+          } else {
+            children = new ArrayList<>(1);
+            children.add(new PathMetadata(prevStatus));
+          }
+          dirMeta = new DirListingMetadata(f, children, authoritative);
+          ms.put(dirMeta);
+        }
+
+        pathMetas.add(new PathMetadata(status));
+        prevStatus = status;
       }
-      prevStatus = status;
+
+      // Batched put
+      ms.put(pathMetas);
+    } catch (IOException ioe) {
+      LOG.error("MetadataStore#put() failure:", ioe);
+      return;
     }
   }
 
@@ -407,7 +421,7 @@ public final class S3Guard {
       if (directory == null || directory.isDeleted()) {
         FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
             null, parent);
-        PathMetadata meta = new PathMetadata(status, Tristate.UNKNOWN, false);
+        PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false);
         newDirs.add(meta);
       } else {
         break;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/884ae451/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java
new file mode 100644
index 0000000..fd32ba5
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3ACreatePerformance.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.OutputStream;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+
+/**
+ * Tests for create(): performance and/or load testing.
+ */
+public class ITestS3ACreatePerformance extends S3AScaleTestBase {
+  private static final Logger LOG = LoggerFactory.getLogger(
+      ITestS3ADirectoryPerformance.class);
+
+  private Path basePath;
+  private int basePathDepth;
+  private static final int PATH_DEPTH = 10;
+
+  @Override
+  public void setup() throws Exception {
+    super.setup();
+    basePath = getTestPath();
+    basePathDepth = basePath.depth();
+  }
+
+  /**
+   * Test rate at which we can create deeply-nested files from a single thread.
+   * @throws Exception
+   */
+  @Test
+  public void testDeepSequentialCreate() throws Exception {
+    long numOperations = getOperationCount();
+    S3AFileSystem fs = getFileSystem();
+
+    NanoTimer timer = new NanoTimer();
+    for (int i = 0; i < numOperations; i++) {
+      Path p = getPathIteration(i, PATH_DEPTH);
+      OutputStream out = fs.create(p);
+      out.write(40);  // one byte file with some value 40
+      out.close();
+    }
+    timer.end("Time to create %d files of depth %d", getOperationCount(),
+        PATH_DEPTH);
+    LOG.info("Time per create: {} msec",
+        timer.nanosPerOperation(numOperations) / 1000);
+  }
+
+  /* Get a unique path of depth totalDepth for given test iteration. */
+  private Path getPathIteration(long iter, int totalDepth) throws Exception {
+    assertTrue("Test path too long, increase PATH_DEPTH in test.",
+        totalDepth > basePathDepth);
+
+    int neededDirs = totalDepth - basePathDepth - 1;
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < neededDirs; i++) {
+      sb.append("iter-").append(iter);
+      sb.append("-dir-").append(i);
+      sb.append("/");
+    }
+    sb.append("file").append(iter);
+    return new Path(basePath, sb.toString());
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message