hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cnaur...@apache.org
Subject [1/6] hadoop git commit: HADOOP-13171. Add StorageStatistics to S3A; instrument some more operations. Contributed by Steve Loughran.
Date Fri, 03 Jun 2016 16:33:29 GMT
Repository: hadoop
Updated Branches:
  refs/heads/branch-2 b6d5546e2 -> 043a0c2e6
  refs/heads/branch-2.8 d0dc5aaa2 -> b8216c10d
  refs/heads/trunk 97e244947 -> c58a59f70


http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
new file mode 100644
index 0000000..d29cb2f
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+/**
+ * Statistic which are collected in S3A.
+ * These statistics are available at a low level in {@link S3AStorageStatistics}
+ * and as metrics in {@link S3AInstrumentation}
+ */
+public enum Statistic {
+
+  DIRECTORIES_CREATED("directories_created",
+      "Total number of directories created through the object store."),
+  DIRECTORIES_DELETED("directories_deleted",
+      "Total number of directories deleted through the object store."),
+  FILES_COPIED("files_copied",
+      "Total number of files copied within the object store."),
+  FILES_COPIED_BYTES("files_copied_bytes",
+      "Total number of bytes copied within the object store."),
+  FILES_CREATED("files_created",
+      "Total number of files created through the object store."),
+  FILES_DELETED("files_deleted",
+      "Total number of files deleted from the object store."),
+  IGNORED_ERRORS("ignored_errors", "Errors caught and ignored"),
+  INVOCATION_COPY_FROM_LOCAL_FILE("invocations_copyfromlocalfile",
+      "Calls of copyFromLocalFile()"),
+  INVOCATION_EXISTS("invocations_exists",
+      "Calls of exists()"),
+  INVOCATION_GET_FILE_STATUS("invocations_getfilestatus",
+      "Calls of getFileStatus()"),
+  INVOCATION_GLOB_STATUS("invocations_globstatus",
+      "Calls of globStatus()"),
+  INVOCATION_IS_DIRECTORY("invocations_is_directory",
+      "Calls of isDirectory()"),
+  INVOCATION_IS_FILE("invocations_is_file",
+      "Calls of isFile()"),
+  INVOCATION_LIST_FILES("invocations_listfiles",
+      "Calls of listFiles()"),
+  INVOCATION_LIST_LOCATED_STATUS("invocations_listlocatedstatus",
+      "Calls of listLocatedStatus()"),
+  INVOCATION_LIST_STATUS("invocations_liststatus",
+      "Calls of listStatus()"),
+  INVOCATION_MKDIRS("invocations_mdkirs",
+      "Calls of mkdirs()"),
+  INVOCATION_RENAME("invocations_rename",
+      "Calls of rename()"),
+  OBJECT_COPY_REQUESTS("object_copy_requests", "Object copy requests"),
+  OBJECT_DELETE_REQUESTS("object_delete_requests", "Object delete requests"),
+  OBJECT_LIST_REQUESTS("object_list_requests",
+      "Number of object listings made"),
+  OBJECT_METADATA_REQUESTS("object_metadata_requests",
+      "Number of requests for object metadata"),
+  OBJECT_MULTIPART_UPLOAD_ABORTED("object_multipart_aborted",
+      "Object multipart upload aborted"),
+  OBJECT_PUT_REQUESTS("object_put_requests",
+      "Object put/multipart upload count"),
+  OBJECT_PUT_BYTES("object_put_bytes", "number of bytes uploaded"),
+  STREAM_ABORTED("streamAborted",
+      "Count of times the TCP stream was aborted"),
+  STREAM_BACKWARD_SEEK_OPERATIONS("streamBackwardSeekOperations",
+      "Number of executed seek operations which went backwards in a stream"),
+  STREAM_CLOSED("streamClosed", "Count of times the TCP stream was closed"),
+  STREAM_CLOSE_OPERATIONS("streamCloseOperations",
+      "Total count of times an attempt to close a data stream was made"),
+  STREAM_FORWARD_SEEK_OPERATIONS("streamForwardSeekOperations",
+      "Number of executed seek operations which went forward in a stream"),
+  STREAM_OPENED("streamOpened",
+      "Total count of times an input stream to object store was opened"),
+  STREAM_READ_EXCEPTIONS("streamReadExceptions",
+      "Number of seek operations invoked on input streams"),
+  STREAM_READ_FULLY_OPERATIONS("streamReadFullyOperations",
+      "count of readFully() operations in streams"),
+  STREAM_READ_OPERATIONS("streamReadOperations",
+      "Count of read() operations in streams"),
+  STREAM_READ_OPERATIONS_INCOMPLETE("streamReadOperationsIncomplete",
+      "Count of incomplete read() operations in streams"),
+  STREAM_SEEK_BYTES_BACKWARDS("streamBytesBackwardsOnSeek",
+      "Count of bytes moved backwards during seek operations"),
+  STREAM_SEEK_BYTES_READ("streamBytesRead",
+      "Count of bytes read during seek() in stream operations"),
+  STREAM_SEEK_BYTES_SKIPPED("streamBytesSkippedOnSeek",
+      "Count of bytes skipped during forward seek operation"),
+  STREAM_SEEK_OPERATIONS("streamSeekOperations",
+      "Number of read exceptions caught and attempted to recovered from");
+
+  Statistic(String symbol, String description) {
+    this.symbol = symbol;
+    this.description = description;
+  }
+
+  private final String symbol;
+  private final String description;
+
+  public String getSymbol() {
+    return symbol;
+  }
+
+  /**
+   * Get a statistic from a symbol.
+   * @param symbol statistic to look up
+   * @return the value or null.
+   */
+  public static Statistic fromSymbol(String symbol) {
+    if (symbol != null) {
+      for (Statistic opType : values()) {
+        if (opType.getSymbol().equals(symbol)) {
+          return opType;
+        }
+      }
+    }
+    return null;
+  }
+
+  public String getDescription() {
+    return description;
+  }
+
+  /**
+   * The string value is simply the symbol.
+   * This makes this operation very low cost.
+   * @return the symbol of this statistic.
+   */
+  @Override
+  public String toString() {
+    return symbol;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 4f5a077..7a5e455 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -735,9 +735,19 @@ The exact number of operations to perform is configurable in the option
 Larger values generate more load, and are recommended when testing locally,
 or in batch runs.
 
-Smaller values should result in faster test runs, especially when the object
+Smaller values results in faster test runs, especially when the object
 store is a long way away.
 
+Operations which work on directories have a separate option: this controls
+the width and depth of tests creating recursive directories. Larger
+values create exponentially more directories, with consequent performance
+impact.
+
+      <property>
+        <name>scale.test.directory.count</name>
+        <value>2</value>
+      </property>
+
 DistCp tests targeting S3A support a configurable file size.  The default is
 10 MB, but the configuration value is expressed in KB so that it can be tuned
 smaller to achieve faster test runs.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
index a4f9b99..04010d6 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
@@ -21,7 +21,9 @@ package org.apache.hadoop.fs.s3a;
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
+import org.junit.Assert;
 import org.junit.internal.AssumptionViolatedException;
+import org.slf4j.Logger;
 
 import java.io.IOException;
 import java.net.URI;
@@ -190,4 +192,155 @@ public class S3ATestUtils {
     }
   }
 
+  /**
+   * Reset all metrics in a list.
+   * @param metrics metrics to reset
+   */
+  public static void reset(S3ATestUtils.MetricDiff... metrics) {
+    for (S3ATestUtils.MetricDiff metric : metrics) {
+      metric.reset();
+    }
+  }
+
+  /**
+   * Print all metrics in a list.
+   * @param log log to print the metrics to.
+   * @param metrics metrics to process
+   */
+  public static void print(Logger log, S3ATestUtils.MetricDiff... metrics) {
+    for (S3ATestUtils.MetricDiff metric : metrics) {
+      log.info(metric.toString());
+    }
+  }
+
+  /**
+   * Print all metrics in a list, then reset them.
+   * @param log log to print the metrics to.
+   * @param metrics metrics to process
+   */
+  public static void printThenReset(Logger log,
+      S3ATestUtils.MetricDiff... metrics) {
+    print(log, metrics);
+    reset(metrics);
+  }
+
+  /**
+   * Helper class to do diffs of metrics.
+   */
+  public static final class MetricDiff {
+    private final S3AFileSystem fs;
+    private final Statistic statistic;
+    private long startingValue;
+
+    /**
+     * Constructor.
+     * Invokes {@link #reset()} so it is immediately capable of measuring the
+     * difference in metric values.
+     *
+     * @param fs the filesystem to monitor
+     * @param statistic the statistic to monitor.
+     */
+    public MetricDiff(S3AFileSystem fs, Statistic statistic) {
+      this.fs = fs;
+      this.statistic = statistic;
+      reset();
+    }
+
+    /**
+     * Reset the starting value to the current value.
+     * Diffs will be against this new value.
+     */
+    public void reset() {
+      startingValue = currentValue();
+    }
+
+    /**
+     * Get the current value of the metric.
+     * @return the latest value.
+     */
+    public long currentValue() {
+      return fs.getInstrumentation().getCounterValue(statistic);
+    }
+
+    /**
+     * Get the difference between the the current value and
+     * {@link #startingValue}.
+     * @return the difference.
+     */
+    public long diff() {
+      return currentValue() - startingValue;
+    }
+
+    @Override
+    public String toString() {
+      long c = currentValue();
+      final StringBuilder sb = new StringBuilder(statistic.getSymbol());
+      sb.append(" starting=").append(startingValue);
+      sb.append(" current=").append(c);
+      sb.append(" diff=").append(c - startingValue);
+      return sb.toString();
+    }
+
+    /**
+     * Assert that the value of {@link #diff()} matches that expected.
+     * @param expected expected value.
+     */
+    public void assertDiffEquals(long expected) {
+      Assert.assertEquals("Count of " + this,
+          expected, diff());
+    }
+
+    /**
+     * Assert that the value of {@link #diff()} matches that of another
+     * instance.
+     * @param that the other metric diff instance.
+     */
+    public void assertDiffEquals(MetricDiff that) {
+      Assert.assertEquals(this.toString() + " != " + that,
+          this.diff(), that.diff());
+    }
+
+    /**
+     * Comparator for assertions.
+     * @param that other metric diff
+     * @return true if the value is {@code ==} the other's
+     */
+    public boolean diffEquals(MetricDiff that) {
+      return this.currentValue() == that.currentValue();
+    }
+
+    /**
+     * Comparator for assertions.
+     * @param that other metric diff
+     * @return true if the value is {@code <} the other's
+     */
+    public boolean diffLessThan(MetricDiff that) {
+      return this.currentValue() < that.currentValue();
+    }
+
+    /**
+     * Comparator for assertions.
+     * @param that other metric diff
+     * @return true if the value is {@code <=} the other's
+     */
+    public boolean diffLessThanOrEquals(MetricDiff that) {
+      return this.currentValue() <= that.currentValue();
+    }
+
+    /**
+     * Get the statistic
+     * @return the statistic
+     */
+    public Statistic getStatistic() {
+      return statistic;
+    }
+
+    /**
+     * Get the starting value; that set in the last {@link #reset()}.
+     * @return the starting value for diffs.
+     */
+    public long getStartingValue() {
+      return startingValue;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
new file mode 100644
index 0000000..0a8dd2d
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileOperationCost.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.AbstractFSContract;
+import org.apache.hadoop.fs.contract.AbstractFSContractTestBase;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.contract.s3a.S3AContract;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.net.URI;
+
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.MetricDiff;
+import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
+
+/**
+ * Use metrics to assert about the cost of file status queries.
+ * {@link S3AFileSystem#getFileStatus(Path)}.
+ */
+public class TestS3AFileOperationCost extends AbstractFSContractTestBase {
+
+  private MetricDiff metadataRequests;
+  private MetricDiff listRequests;
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(TestS3AFileOperationCost.class);
+
+  @Override
+  protected AbstractFSContract createContract(Configuration conf) {
+    return new S3AContract(conf);
+  }
+
+  @Override
+  public S3AFileSystem getFileSystem() {
+    return (S3AFileSystem) super.getFileSystem();
+  }
+
+  @Override
+  public void setup() throws Exception {
+    super.setup();
+    S3AFileSystem fs = getFileSystem();
+    metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
+    listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
+  }
+
+  @Test
+  public void testCostOfGetFileStatusOnFile() throws Throwable {
+    describe("performing getFileStatus on a file");
+    Path simpleFile = path("simple.txt");
+    S3AFileSystem fs = getFileSystem();
+    touch(fs, simpleFile);
+    resetMetricDiffs();
+    S3AFileStatus status = fs.getFileStatus(simpleFile);
+    assertTrue("not a file: " + status, status.isFile());
+    metadataRequests.assertDiffEquals(1);
+    listRequests.assertDiffEquals(0);
+  }
+
+  private void resetMetricDiffs() {
+    reset(metadataRequests, listRequests);
+  }
+
+  @Test
+  public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
+    describe("performing getFileStatus on an empty directory");
+    S3AFileSystem fs = getFileSystem();
+    Path dir = path("empty");
+    fs.mkdirs(dir);
+    resetMetricDiffs();
+    S3AFileStatus status = fs.getFileStatus(dir);
+    assertTrue("not empty: " + status, status.isEmptyDirectory());
+    metadataRequests.assertDiffEquals(2);
+    listRequests.assertDiffEquals(0);
+  }
+
+  @Test
+  public void testCostOfGetFileStatusOnMissingFile() throws Throwable {
+    describe("performing getFileStatus on a missing file");
+    S3AFileSystem fs = getFileSystem();
+    Path path = path("missing");
+    resetMetricDiffs();
+    try {
+      S3AFileStatus status = fs.getFileStatus(path);
+      fail("Got a status back from a missing file path " + status);
+    } catch (FileNotFoundException expected) {
+      // expected
+    }
+    metadataRequests.assertDiffEquals(2);
+    listRequests.assertDiffEquals(1);
+  }
+
+  @Test
+  public void testCostOfGetFileStatusOnMissingSubPath() throws Throwable {
+    describe("performing getFileStatus on a missing file");
+    S3AFileSystem fs = getFileSystem();
+    Path path = path("missingdir/missingpath");
+    resetMetricDiffs();
+    try {
+      S3AFileStatus status = fs.getFileStatus(path);
+      fail("Got a status back from a missing file path " + status);
+    } catch (FileNotFoundException expected) {
+      // expected
+    }
+    metadataRequests.assertDiffEquals(2);
+    listRequests.assertDiffEquals(1);
+  }
+
+  @Test
+  public void testCostOfGetFileStatusOnNonEmptyDir() throws Throwable {
+    describe("performing getFileStatus on a non-empty directory");
+    S3AFileSystem fs = getFileSystem();
+    Path dir = path("empty");
+    fs.mkdirs(dir);
+    Path simpleFile = new Path(dir, "simple.txt");
+    touch(fs, simpleFile);
+    resetMetricDiffs();
+    S3AFileStatus status = fs.getFileStatus(dir);
+    if (status.isEmptyDirectory()) {
+      // erroneous state
+      String fsState = fs.toString();
+      fail("FileStatus says directory isempty: " + status
+          + "\n" + ContractTestUtils.ls(fs, dir)
+          + "\n" + fsState);
+    }
+    metadataRequests.assertDiffEquals(2);
+    listRequests.assertDiffEquals(1);
+  }
+
+  @Test
+  public void testCostOfCopyFromLocalFile() throws Throwable {
+    describe("testCostOfCopyFromLocalFile");
+    File localTestDir = getTestDir("tmp");
+    localTestDir.mkdirs();
+    File tmpFile = File.createTempFile("tests3acost", ".txt",
+        localTestDir);
+    tmpFile.delete();
+    try {
+      URI localFileURI = tmpFile.toURI();
+      FileSystem localFS = FileSystem.get(localFileURI,
+          getFileSystem().getConf());
+      Path localPath = new Path(localFileURI);
+      int len = 10 * 1024;
+      byte[] data = dataset(len, 'A', 'Z');
+      writeDataset(localFS, localPath, data, len, 1024, true);
+      S3AFileSystem s3a = getFileSystem();
+      MetricDiff copyLocalOps = new MetricDiff(s3a,
+          INVOCATION_COPY_FROM_LOCAL_FILE);
+      MetricDiff putRequests = new MetricDiff(s3a,
+          OBJECT_PUT_REQUESTS);
+      MetricDiff putBytes = new MetricDiff(s3a,
+          OBJECT_PUT_BYTES);
+
+      Path remotePath = path("copied");
+      s3a.copyFromLocalFile(false, true, localPath, remotePath);
+      verifyFileContents(s3a, remotePath, data);
+      copyLocalOps.assertDiffEquals(1);
+      putRequests.assertDiffEquals(1);
+      putBytes.assertDiffEquals(len);
+      // print final stats
+      LOG.info("Filesystem {}", s3a);
+    } finally {
+      tmpFile.delete();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
index d65f693..21639b1 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java
@@ -34,13 +34,11 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Rule;
 import org.junit.rules.TestName;
+import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.InputStream;
-import java.util.Locale;
-
-import static org.junit.Assume.assumeTrue;
 
 /**
  * Base class for scale tests; here is where the common scale configuration
@@ -51,11 +49,57 @@ public class S3AScaleTestBase extends Assert implements S3ATestConstants
{
   @Rule
   public TestName methodName = new TestName();
 
+  @Rule
+  public Timeout testTimeout = new Timeout(30 * 60 * 1000);
+
   @BeforeClass
   public static void nameThread() {
     Thread.currentThread().setName("JUnit");
   }
 
+  /**
+   * The number of operations to perform: {@value}.
+   */
+  public static final String KEY_OPERATION_COUNT =
+      SCALE_TEST + "operation.count";
+
+  /**
+   * The number of directory operations to perform: {@value}.
+   */
+  public static final String KEY_DIRECTORY_COUNT =
+      SCALE_TEST + "directory.count";
+
+  /**
+   * The readahead buffer: {@value}.
+   */
+  public static final String KEY_READ_BUFFER_SIZE =
+      S3A_SCALE_TEST + "read.buffer.size";
+
+  public static final int DEFAULT_READ_BUFFER_SIZE = 16384;
+
+  /**
+   * Key for a multi MB test file: {@value}.
+   */
+  public static final String KEY_CSVTEST_FILE =
+      S3A_SCALE_TEST + "csvfile";
+
+  /**
+   * Default path for the multi MB test file: {@value}.
+   */
+  public static final String DEFAULT_CSVTEST_FILE
+      = "s3a://landsat-pds/scene_list.gz";
+
+  /**
+   * The default number of operations to perform: {@value}.
+   */
+  public static final long DEFAULT_OPERATION_COUNT = 2005;
+
+  /**
+   * Default number of directories to create when performing
+   * directory performance/scale tests.
+   */
+  public static final int DEFAULT_DIRECTORY_COUNT = 2;
+
   protected S3AFileSystem fs;
 
   protected static final Logger LOG =
@@ -132,108 +176,4 @@ public class S3AScaleTestBase extends Assert implements S3ATestConstants
{
     }
   }
 
-  /**
-   * Make times more readable, by adding a "," every three digits.
-   * @param nanos nanos or other large number
-   * @return a string for logging
-   */
-  protected static String toHuman(long nanos) {
-    return String.format(Locale.ENGLISH, "%,d", nanos);
-  }
-
-  /**
-   * Log the bandwidth of a timer as inferred from the number of
-   * bytes processed.
-   * @param timer timer
-   * @param bytes bytes processed in the time period
-   */
-  protected void bandwidth(NanoTimer timer, long bytes) {
-    LOG.info("Bandwidth = {}  MB/S",
-        timer.bandwidthDescription(bytes));
-  }
-
-  /**
-   * Work out the bandwidth in MB/s
-   * @param bytes bytes
-   * @param durationNS duration in nanos
-   * @return the number of megabytes/second of the recorded operation
-   */
-  public static double bandwidthMBs(long bytes, long durationNS) {
-    return (bytes * 1000.0 ) / durationNS;
-  }
-
-  /**
-   * A simple class for timing operations in nanoseconds, and for
-   * printing some useful results in the process.
-   */
-  protected static class NanoTimer {
-    final long startTime;
-    long endTime;
-
-    public NanoTimer() {
-      startTime = now();
-    }
-
-    /**
-     * End the operation
-     * @return the duration of the operation
-     */
-    public long end() {
-      endTime = now();
-      return duration();
-    }
-
-    /**
-     * End the operation; log the duration
-     * @param format message
-     * @param args any arguments
-     * @return the duration of the operation
-     */
-    public long end(String format, Object... args) {
-      long d = end();
-      LOG.info("Duration of {}: {} nS",
-          String.format(format, args), toHuman(d));
-      return d;
-    }
-
-    long now() {
-      return System.nanoTime();
-    }
-
-    long duration() {
-      return endTime - startTime;
-    }
-
-    double bandwidth(long bytes) {
-      return S3AScaleTestBase.bandwidthMBs(bytes, duration());
-    }
-
-    /**
-     * Bandwidth as bytes per second
-     * @param bytes bytes in
-     * @return the number of bytes per second this operation timed.
-     */
-    double bandwidthBytes(long bytes) {
-      return (bytes * 1.0 ) / duration();
-    }
-
-    /**
-     * How many nanoseconds per byte
-     * @param bytes bytes processed in this time period
-     * @return the nanoseconds it took each byte to be processed
-     */
-    long nanosPerByte(long bytes) {
-      return duration() / bytes;
-    }
-
-    /**
-     * Get a description of the bandwidth, even down to fractions of
-     * a MB
-     * @param bytes bytes processed
-     * @return bandwidth
-     */
-    String bandwidthDescription(long bytes) {
-      return String.format("%,.6f", bandwidth(bytes));
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
index af1883e..5e07dcb 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java
@@ -20,9 +20,7 @@ package org.apache.hadoop.fs.s3a.scale;
 
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.contract.ContractTestUtils;
-import org.junit.Rule;
 import org.junit.Test;
-import org.junit.rules.Timeout;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -34,15 +32,13 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
-import static org.junit.Assert.assertEquals;
-
+/**
+ * Test some scalable operations related to file renaming and deletion.
+ */
 public class TestS3ADeleteManyFiles extends S3AScaleTestBase {
   private static final Logger LOG =
       LoggerFactory.getLogger(TestS3ADeleteManyFiles.class);
 
-  @Rule
-  public Timeout testTimeout = new Timeout(30 * 60 * 1000);
-
   /**
    * CAUTION: If this test starts failing, please make sure that the
    * {@link org.apache.hadoop.fs.s3a.Constants#MAX_THREADS} configuration is not

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
new file mode 100644
index 0000000..7ece394
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADirectoryPerformance.java
@@ -0,0 +1,189 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.scale;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Statistic;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+import static org.apache.hadoop.fs.s3a.Statistic.*;
+import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+
+/**
+ * Test the performance of listing files/directories.
+ */
+public class TestS3ADirectoryPerformance extends S3AScaleTestBase {
+  private static final Logger LOG = LoggerFactory.getLogger(
+      TestS3ADirectoryPerformance.class);
+
+  @Test
+  public void testListOperations() throws Throwable {
+    describe("Test recursive list operations");
+    final Path scaleTestDir = getTestPath();
+    final Path listDir = new Path(scaleTestDir, "lists");
+
+    // scale factor.
+    int scale = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
+    int width = scale;
+    int depth = scale;
+    int files = scale;
+    MetricDiff metadataRequests = new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
+    MetricDiff listRequests = new MetricDiff(fs, OBJECT_LIST_REQUESTS);
+    MetricDiff listStatusCalls = new MetricDiff(fs, INVOCATION_LIST_FILES);
+    MetricDiff getFileStatusCalls =
+        new MetricDiff(fs, INVOCATION_GET_FILE_STATUS);
+    NanoTimer createTimer = new NanoTimer();
+    TreeScanResults created =
+        createSubdirs(fs, listDir, depth, width, files, 0);
+    // add some empty directories
+    int emptyDepth = 1 * scale;
+    int emptyWidth = 3 * scale;
+
+    created.add(createSubdirs(fs, listDir, emptyDepth, emptyWidth, 0,
+        0, "empty", "f-", ""));
+    createTimer.end("Time to create %s", created);
+    LOG.info("Time per operation: {}",
+        toHuman(createTimer.nanosPerOperation(created.totalCount())));
+    printThenReset(LOG,
+        metadataRequests,
+        listRequests,
+        listStatusCalls,
+        getFileStatusCalls);
+
+    try {
+      // Scan the directory via an explicit tree walk.
+      // This is the baseline for any listing speedups.
+      MetricDiff treewalkMetadataRequests =
+          new MetricDiff(fs, OBJECT_METADATA_REQUESTS);
+      MetricDiff treewalkListRequests = new MetricDiff(fs,
+          OBJECT_LIST_REQUESTS);
+      MetricDiff treewalkListStatusCalls = new MetricDiff(fs,
+          INVOCATION_LIST_FILES);
+      MetricDiff treewalkGetFileStatusCalls =
+          new MetricDiff(fs, INVOCATION_GET_FILE_STATUS);
+      NanoTimer treeWalkTimer = new NanoTimer();
+      TreeScanResults treewalkResults = treeWalk(fs, listDir);
+      treeWalkTimer.end("List status via treewalk");
+
+      print(LOG,
+          treewalkMetadataRequests,
+          treewalkListRequests,
+          treewalkListStatusCalls,
+          treewalkGetFileStatusCalls);
+      assertEquals("Files found in listFiles(recursive=true) " +
+              " created=" + created + " listed=" + treewalkResults,
+          created.getFileCount(), treewalkResults.getFileCount());
+
+
+      // listFiles() does the recursion internally
+      NanoTimer listFilesRecursiveTimer = new NanoTimer();
+
+      TreeScanResults listFilesResults = new TreeScanResults(
+          fs.listFiles(listDir, true));
+
+      listFilesRecursiveTimer.end("listFiles(recursive=true) of %s", created);
+      assertEquals("Files found in listFiles(recursive=true) " +
+          " created=" + created  + " listed=" + listFilesResults,
+          created.getFileCount(), listFilesResults.getFileCount());
+
+      treewalkListRequests.assertDiffEquals(listRequests);
+      printThenReset(LOG,
+          metadataRequests, listRequests,
+          listStatusCalls, getFileStatusCalls);
+
+      NanoTimer globStatusTimer = new NanoTimer();
+      FileStatus[] globStatusFiles = fs.globStatus(listDir);
+      globStatusTimer.end("Time to globStatus() %s", globStatusTimer);
+      LOG.info("Time for glob status {} entries: {}",
+          globStatusFiles.length,
+          toHuman(createTimer.duration()));
+      printThenReset(LOG,
+          metadataRequests,
+          listRequests,
+          listStatusCalls,
+          getFileStatusCalls);
+
+    } finally {
+      // deletion at the end of the run
+      NanoTimer deleteTimer = new NanoTimer();
+      fs.delete(listDir, true);
+      deleteTimer.end("Deleting directory tree");
+      printThenReset(LOG,
+          metadataRequests, listRequests,
+          listStatusCalls, getFileStatusCalls);
+    }
+  }
+
+  @Test
+  public void testTimeToStatEmptyDirectory() throws Throwable {
+    describe("Time to stat an empty directory");
+    Path path = new Path(getTestPath(), "empty");
+    fs.mkdirs(path);
+    timeToStatPath(path);
+  }
+
+  @Test
+  public void testTimeToStatNonEmptyDirectory() throws Throwable {
+    describe("Time to stat a non-empty directory");
+    Path path = new Path(getTestPath(), "dir");
+    fs.mkdirs(path);
+    touch(fs, new Path(path, "file"));
+    timeToStatPath(path);
+  }
+
+  @Test
+  public void testTimeToStatFile() throws Throwable {
+    describe("Time to stat a simple file");
+    Path path = new Path(getTestPath(), "file");
+    touch(fs, path);
+    timeToStatPath(path);
+  }
+
+  @Test
+  public void testTimeToStatRoot() throws Throwable {
+    describe("Time to stat the root path");
+    timeToStatPath(new Path("/"));
+  }
+
+  private void timeToStatPath(Path path) throws IOException {
+    describe("Timing getFileStatus(\"%s\")", path);
+    MetricDiff metadataRequests =
+        new MetricDiff(fs, Statistic.OBJECT_METADATA_REQUESTS);
+    MetricDiff listRequests =
+        new MetricDiff(fs, Statistic.OBJECT_LIST_REQUESTS);
+    long attempts = getOperationCount();
+    NanoTimer timer = new NanoTimer();
+    for (long l = 0; l < attempts; l++) {
+      fs.getFileStatus(path);
+    }
+    timer.end("Time to execute %d getFileStatusCalls", attempts);
+    LOG.info("Time per call: {}", toHuman(timer.nanosPerOperation(attempts)));
+    LOG.info("metadata: {}", metadataRequests);
+    LOG.info("metadata per operation {}", metadataRequests.diff() / attempts);
+    LOG.info("listObjects: {}", listRequests);
+    LOG.info("listObjects: per operation {}", listRequests.diff() / attempts);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
index 0c8b273..5222a4e 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3AInputStreamPerformance.java
@@ -36,8 +36,10 @@ import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 
+import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
+
 /**
- * Look at the performance of S3a operations
+ * Look at the performance of S3a operations.
  */
 public class TestS3AInputStreamPerformance extends S3AScaleTestBase {
   private static final Logger LOG = LoggerFactory.getLogger(
@@ -151,7 +153,7 @@ public class TestS3AInputStreamPerformance extends S3AScaleTestBase {
     readTimer.end("Time to read %d bytes", len);
     bandwidth(readTimer, count);
     assertEquals("Not enough bytes were read)", len, count);
-    long nanosPerByte = readTimer.nanosPerByte(count);
+    long nanosPerByte = readTimer.nanosPerOperation(count);
     LOG.info("An open() call has the equivalent duration of reading {} bytes",
         toHuman( timeOpen.duration() / nanosPerByte));
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/c58a59f7/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
index bc85425..1330ed1 100644
--- a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
+++ b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties
@@ -15,7 +15,9 @@ log4j.rootLogger=info,stdout
 log4j.threshold=ALL
 log4j.appender.stdout=org.apache.log4j.ConsoleAppender
 log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
-log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) -
%m%n
+
+log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
 
 # for debugging low level S3a operations, uncomment this line
 # log4j.logger.org.apache.hadoop.fs.s3a=DEBUG


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message