hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From inigo...@apache.org
Subject [13/48] hadoop git commit: HADOOP-13345 HS3Guard: Improved Consistency for S3A. Contributed by: Chris Nauroth, Aaron Fabbri, Mingliang Liu, Lei (Eddy) Xu, Sean Mackrory, Steve Loughran and others.
Date Sat, 02 Sep 2017 21:20:57 GMT
http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
new file mode 100644
index 0000000..8515bfb
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.Collection;
+
+import com.amazonaws.services.dynamodbv2.document.Item;
+import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
+import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
+import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
+import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
+import com.amazonaws.services.dynamodbv2.model.KeyType;
+import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.Tristate;
+
+/**
+ * Defines methods for translating between domain model objects and their
+ * representations in the DynamoDB schema.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+final class PathMetadataDynamoDBTranslation {
+
+  /** The HASH key name of each item. */
+  @VisibleForTesting
+  static final String PARENT = "parent";
+  /** The RANGE key name of each item. */
+  @VisibleForTesting
+  static final String CHILD = "child";
+  @VisibleForTesting
+  static final String IS_DIR = "is_dir";
+  @VisibleForTesting
+  static final String MOD_TIME = "mod_time";
+  @VisibleForTesting
+  static final String FILE_LENGTH = "file_length";
+  @VisibleForTesting
+  static final String BLOCK_SIZE = "block_size";
+  static final String IS_DELETED = "is_deleted";
+
+  /** Table version field {@value} in version marker item. */
+  @VisibleForTesting
+  static final String TABLE_VERSION = "table_version";
+
+  /** Table creation timestampfield {@value} in version marker item. */
+  @VisibleForTesting
+  static final String TABLE_CREATED = "table_created";
+
+  /** The version marker field is invalid. */
+  static final String E_NOT_VERSION_MARKER = "Not a version marker: ";
+
+  /**
+   * Returns the key schema for the DynamoDB table.
+   *
+   * @return DynamoDB key schema
+   */
+  static Collection<KeySchemaElement> keySchema() {
+    return Arrays.asList(
+        new KeySchemaElement(PARENT, KeyType.HASH),
+        new KeySchemaElement(CHILD, KeyType.RANGE));
+  }
+
+  /**
+   * Returns the attribute definitions for the DynamoDB table.
+   *
+   * @return DynamoDB attribute definitions
+   */
+  static Collection<AttributeDefinition> attributeDefinitions() {
+    return Arrays.asList(
+        new AttributeDefinition(PARENT, ScalarAttributeType.S),
+        new AttributeDefinition(CHILD, ScalarAttributeType.S));
+  }
+
+  /**
+   * Converts a DynamoDB item to a {@link PathMetadata}.
+   *
+   * @param item DynamoDB item to convert
+   * @return {@code item} converted to a {@link PathMetadata}
+   */
+  static PathMetadata itemToPathMetadata(Item item, String username)
+      throws IOException {
+    if (item == null) {
+      return null;
+    }
+
+    String parentStr = item.getString(PARENT);
+    Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
+    String childStr = item.getString(CHILD);
+    Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
+
+    // Skip table version markers, which are only non-absolute paths stored.
+    Path rawPath = new Path(parentStr, childStr);
+    if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
+      return null;
+    }
+
+    Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
+    Path path = new Path(parent, childStr);
+
+    boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
+    final FileStatus fileStatus;
+    if (isDir) {
+      fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username);
+    } else {
+      long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
+      long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
+      long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
+      fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null,
+          username, username, path);
+    }
+    boolean isDeleted =
+        item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
+
+    return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
+  }
+
+  /**
+   * Converts a {@link PathMetadata} to a DynamoDB item.
+   *
+   * @param meta {@link PathMetadata} to convert
+   * @return {@code meta} converted to DynamoDB item
+   */
+  static Item pathMetadataToItem(PathMetadata meta) {
+    Preconditions.checkNotNull(meta);
+    final FileStatus status = meta.getFileStatus();
+    final Item item = new Item().withPrimaryKey(pathToKey(status.getPath()));
+    if (status.isDirectory()) {
+      item.withBoolean(IS_DIR, true);
+    } else {
+      item.withLong(FILE_LENGTH, status.getLen())
+          .withLong(MOD_TIME, status.getModificationTime())
+          .withLong(BLOCK_SIZE, status.getBlockSize());
+    }
+    item.withBoolean(IS_DELETED, meta.isDeleted());
+    return item;
+  }
+
+  /**
+   * The version marker has a primary key whose PARENT is {@code name};
+   * this MUST NOT be a value which represents an absolute path.
+   * @param name name of the version marker
+   * @param version version number
+   * @param timestamp creation timestamp
+   * @return an item representing a version marker.
+   */
+  static Item createVersionMarker(String name, int version, long timestamp) {
+    return new Item().withPrimaryKey(createVersionMarkerPrimaryKey(name))
+        .withInt(TABLE_VERSION, version)
+        .withLong(TABLE_CREATED, timestamp);
+  }
+
+  /**
+   * Create the primary key of the version marker.
+   * @param name key name
+   * @return the key to use when registering or resolving version markers
+   */
+  static PrimaryKey createVersionMarkerPrimaryKey(String name) {
+    return new PrimaryKey(PARENT, name, CHILD, name);
+  }
+
+  /**
+   * Extract the version from a version marker item.
+   * @param marker version marker item
+   * @return the extracted version field
+   * @throws IOException if the item is not a version marker
+   */
+  static int extractVersionFromMarker(Item marker) throws IOException {
+    if (marker.hasAttribute(TABLE_VERSION)) {
+      return marker.getInt(TABLE_VERSION);
+    } else {
+      throw new IOException(E_NOT_VERSION_MARKER + marker);
+    }
+  }
+
+  /**
+   * Extract the creation time, if present.
+   * @param marker version marker item
+   * @return the creation time, or null
+   * @throws IOException if the item is not a version marker
+   */
+  static Long extractCreationTimeFromMarker(Item marker) throws IOException {
+    if (marker.hasAttribute(TABLE_CREATED)) {
+      return marker.getLong(TABLE_CREATED);
+    } else {
+      return null;
+    }
+  }
+
+  /**
+   * Converts a collection {@link PathMetadata} to a collection DynamoDB items.
+   *
+   * @see #pathMetadataToItem(PathMetadata)
+   */
+  static Item[] pathMetadataToItem(Collection<PathMetadata> metas) {
+    if (metas == null) {
+      return null;
+    }
+
+    final Item[] items = new Item[metas.size()];
+    int i = 0;
+    for (PathMetadata meta : metas) {
+      items[i++] = pathMetadataToItem(meta);
+    }
+    return items;
+  }
+
+  /**
+   * Converts a {@link Path} to a DynamoDB equality condition on that path as
+   * parent, suitable for querying all direct children of the path.
+   *
+   * @param path the path; can not be null
+   * @return DynamoDB equality condition on {@code path} as parent
+   */
+  static KeyAttribute pathToParentKeyAttribute(Path path) {
+    return new KeyAttribute(PARENT, pathToParentKey(path));
+  }
+
+  /**
+   * e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
+   * @param path path to convert
+   * @return string for parent key
+   */
+  static String pathToParentKey(Path path) {
+    Preconditions.checkNotNull(path);
+    Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
+    URI uri = path.toUri();
+    String bucket = uri.getHost();
+    Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
+        "Path missing bucket");
+    String pKey = "/" + bucket + uri.getPath();
+
+    // Strip trailing slash
+    if (pKey.endsWith("/")) {
+      pKey = pKey.substring(0, pKey.length() - 1);
+    }
+    return pKey;
+  }
+
+  /**
+   * Converts a {@link Path} to a DynamoDB key, suitable for getting the item
+   * matching the path.
+   *
+   * @param path the path; can not be null
+   * @return DynamoDB key for item matching {@code path}
+   */
+  static PrimaryKey pathToKey(Path path) {
+    Preconditions.checkArgument(!path.isRoot(),
+        "Root path is not mapped to any PrimaryKey");
+    return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD,
+        path.getName());
+  }
+
+  /**
+   * Converts a collection of {@link Path} to a collection of DynamoDB keys.
+   *
+   * @see #pathToKey(Path)
+   */
+  static PrimaryKey[] pathToKey(Collection<Path> paths) {
+    if (paths == null) {
+      return null;
+    }
+
+    final PrimaryKey[] keys = new PrimaryKey[paths.size()];
+    int i = 0;
+    for (Path p : paths) {
+      keys[i++] = pathToKey(p);
+    }
+    return keys;
+  }
+
+  /**
+   * There is no need to instantiate this class.
+   */
+  private PathMetadataDynamoDBTranslation() {
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
new file mode 100644
index 0000000..7e4aec1
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java
@@ -0,0 +1,463 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Set;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AInstrumentation;
+import org.apache.hadoop.fs.s3a.Tristate;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
+import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
+import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
+import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
+
+/**
+ * Logic for integrating MetadataStore with S3A.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+public final class S3Guard {
+  private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class);
+
+  @InterfaceAudience.Private
+  @InterfaceStability.Unstable
+  @VisibleForTesting
+  public static final String S3GUARD_DDB_CLIENT_FACTORY_IMPL =
+      "fs.s3a.s3guard.ddb.client.factory.impl";
+
+  static final Class<? extends DynamoDBClientFactory>
+      S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT =
+      DynamoDBClientFactory.DefaultDynamoDBClientFactory.class;
+  private static final FileStatus[] EMPTY_LISTING = new FileStatus[0];
+
+  // Utility class.  All static functions.
+  private S3Guard() { }
+
+  /* Utility functions. */
+
+  /**
+   * Create a new instance of the configured MetadataStore.
+   * The returned MetadataStore will have been initialized via
+   * {@link MetadataStore#initialize(FileSystem)} by this function before
+   * returning it.  Callers must clean up by calling
+   * {@link MetadataStore#close()} when done using the MetadataStore.
+   *
+   * @param fs  FileSystem whose Configuration specifies which
+   *            implementation to use.
+   * @return Reference to new MetadataStore.
+   * @throws IOException if the metadata store cannot be instantiated
+   */
+  public static MetadataStore getMetadataStore(FileSystem fs)
+      throws IOException {
+    Preconditions.checkNotNull(fs);
+    Configuration conf = fs.getConf();
+    Preconditions.checkNotNull(conf);
+    MetadataStore msInstance;
+    try {
+      Class<? extends MetadataStore> msClass = getMetadataStoreClass(conf);
+      msInstance = ReflectionUtils.newInstance(msClass, conf);
+      LOG.debug("Using {} metadata store for {} filesystem",
+          msClass.getSimpleName(), fs.getScheme());
+      msInstance.initialize(fs);
+      return msInstance;
+    } catch (RuntimeException | IOException e) {
+      String message = "Failed to instantiate metadata store " +
+          conf.get(S3_METADATA_STORE_IMPL)
+          + " defined in " + S3_METADATA_STORE_IMPL
+          + ": " + e;
+      LOG.error(message, e);
+      if (e instanceof IOException) {
+        throw e;
+      } else {
+        throw new IOException(message, e);
+      }
+    }
+  }
+
+  private static Class<? extends MetadataStore> getMetadataStoreClass(
+      Configuration conf) {
+    if (conf == null) {
+      return NullMetadataStore.class;
+    }
+
+    return conf.getClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+            MetadataStore.class);
+  }
+
+
+  /**
+   * Helper function which puts a given S3AFileStatus into the MetadataStore and
+   * returns the same S3AFileStatus. Instrumentation monitors the put operation.
+   * @param ms MetadataStore to {@code put()} into.
+   * @param status status to store
+   * @param instrumentation instrumentation of the s3a file system
+   * @return The same status as passed in
+   * @throws IOException if metadata store update failed
+   */
+  public static S3AFileStatus putAndReturn(MetadataStore ms,
+      S3AFileStatus status,
+      S3AInstrumentation instrumentation) throws IOException {
+    long startTimeNano = System.nanoTime();
+    ms.put(new PathMetadata(status));
+    instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
+        (System.nanoTime() - startTimeNano));
+    instrumentation.incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1);
+    return status;
+  }
+
+  /**
+   * Convert the data of a directory listing to an array of {@link FileStatus}
+   * entries. Tombstones are filtered out at this point. If the listing is null
+   * an empty array is returned.
+   * @param dirMeta directory listing -may be null
+   * @return a possibly-empty array of file status entries
+   */
+  public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta)  {
+    if (dirMeta == null) {
+      return EMPTY_LISTING;
+    }
+
+    Collection<PathMetadata> listing = dirMeta.getListing();
+    List<FileStatus> statuses = new ArrayList<>();
+
+    for (PathMetadata pm : listing) {
+      if (!pm.isDeleted()) {
+        statuses.add(pm.getFileStatus());
+      }
+    }
+
+    return statuses.toArray(new FileStatus[0]);
+  }
+
+  /**
+   * Given directory listing metadata from both the backing store and the
+   * MetadataStore, merge the two sources of truth to create a consistent
+   * view of the current directory contents, which can be returned to clients.
+   *
+   * Also update the MetadataStore to reflect the resulting directory listing.
+   *
+   * @param ms MetadataStore to use.
+   * @param path path to directory
+   * @param backingStatuses Directory listing from the backing store.
+   * @param dirMeta  Directory listing from MetadataStore.  May be null.
+   * @param isAuthoritative State of authoritative mode
+   * @return Final result of directory listing.
+   * @throws IOException if metadata store update failed
+   */
+  public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
+      List<FileStatus> backingStatuses, DirListingMetadata dirMeta,
+      boolean isAuthoritative) throws IOException {
+
+    // Fast-path for NullMetadataStore
+    if (isNullMetadataStore(ms)) {
+      return backingStatuses.toArray(new FileStatus[backingStatuses.size()]);
+    }
+
+    assertQualified(path);
+
+    if (dirMeta == null) {
+      // The metadataStore had zero state for this directory
+      dirMeta = new DirListingMetadata(path, DirListingMetadata.EMPTY_DIR,
+          false);
+    }
+
+    Set<Path> deleted = dirMeta.listTombstones();
+
+    // Since we treat the MetadataStore as a "fresher" or "consistent" view
+    // of metadata, we always use its metadata first.
+
+    // Since the authoritative case is already handled outside this function,
+    // we will basically start with the set of directory entries in the
+    // DirListingMetadata, and add any that only exist in the backingStatuses.
+
+    boolean changed = false;
+    for (FileStatus s : backingStatuses) {
+      if (deleted.contains(s.getPath())) {
+        continue;
+      }
+
+      // Minor race condition here.  Multiple threads could add to this
+      // mutable DirListingMetadata.  Since it is backed by a
+      // ConcurrentHashMap, the last put() wins.
+      // More concerning is two threads racing on listStatus() and delete().
+      // Any FileSystem has similar race conditions, but we could persist
+      // a stale entry longer.  We could expose an atomic
+      // DirListingMetadata#putIfNotPresent()
+      boolean updated = dirMeta.put(s);
+      changed = changed || updated;
+    }
+
+    if (changed && isAuthoritative) {
+      dirMeta.setAuthoritative(true); // This is the full directory contents
+      ms.put(dirMeta);
+    }
+
+    return dirMetaToStatuses(dirMeta);
+  }
+
+  /**
+   * Although NullMetadataStore does nothing, callers may wish to avoid work
+   * (fast path) when the NullMetadataStore is in use.
+   * @param ms The MetadataStore to test
+   * @return true iff the MetadataStore is the null, or no-op, implementation.
+   */
+  public static boolean isNullMetadataStore(MetadataStore ms) {
+    return (ms instanceof NullMetadataStore);
+  }
+
+  /**
+   * Update MetadataStore to reflect creation of the given  directories.
+   *
+   * If an IOException is raised while trying to update the entry, this
+   * operation catches the exception and returns.
+   * @param ms    MetadataStore to update.
+   * @param dirs  null, or an ordered list of directories from leaf to root.
+   *              E.g. if /a/ exists, and  mkdirs(/a/b/c/d) is called, this
+   *              list will contain [/a/b/c/d, /a/b/c, /a/b].   /a/b/c/d is
+   *              an empty, dir, and the other dirs only contain their child
+   *              dir.
+   * @param owner Hadoop user name.
+   * @param authoritative Whether to mark new directories as authoritative.
+   */
+  public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
+      String owner, boolean authoritative) {
+    if (dirs == null) {
+      return;
+    }
+
+    /* We discussed atomicity of this implementation.
+     * The concern is that multiple clients could race to write different
+     * cached directories to the MetadataStore.  Two solutions are proposed:
+     * 1. Move mkdirs() into MetadataStore interface and let implementations
+     *    ensure they are atomic.
+     * 2. Specify that the semantics of MetadataStore#putListStatus() is
+     *    always additive,  That is, if MetadataStore has listStatus() state
+     *    for /a/b that contains [/a/b/file0, /a/b/file1], and we then call
+     *    putListStatus(/a/b -> [/a/b/file2, /a/b/file3], isAuthoritative=true),
+     *    then we will end up with final state of
+     *    [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative =
+     *    true
+     */
+    FileStatus prevStatus = null;
+
+    // Use new batched put to reduce round trips.
+    List<PathMetadata> pathMetas = new ArrayList<>(dirs.size());
+
+    try {
+      // Iterate from leaf to root
+      for (int i = 0; i < dirs.size(); i++) {
+        boolean isLeaf = (prevStatus == null);
+        Path f = dirs.get(i);
+        assertQualified(f);
+        FileStatus status =
+            createUploadFileStatus(f, true, 0, 0, owner);
+
+        // We only need to put a DirListingMetadata if we are setting
+        // authoritative bit
+        DirListingMetadata dirMeta = null;
+        if (authoritative) {
+          Collection<PathMetadata> children;
+          if (isLeaf) {
+            children = DirListingMetadata.EMPTY_DIR;
+          } else {
+            children = new ArrayList<>(1);
+            children.add(new PathMetadata(prevStatus));
+          }
+          dirMeta = new DirListingMetadata(f, children, authoritative);
+          ms.put(dirMeta);
+        }
+
+        pathMetas.add(new PathMetadata(status));
+        prevStatus = status;
+      }
+
+      // Batched put
+      ms.put(pathMetas);
+    } catch (IOException ioe) {
+      LOG.error("MetadataStore#put() failure:", ioe);
+    }
+  }
+
+  /**
+   * Helper function that records the move of directory paths, adding
+   * resulting metadata to the supplied lists.
+   * Does not store in MetadataStore.
+   * @param ms  MetadataStore, used to make this a no-op, when it is
+   *            NullMetadataStore.
+   * @param srcPaths stores the source path here
+   * @param dstMetas stores destination metadata here
+   * @param srcPath  source path to store
+   * @param dstPath  destination path to store
+   * @param owner file owner to use in created records
+   */
+  public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
+      Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
+      String owner) {
+    if (isNullMetadataStore(ms)) {
+      return;
+    }
+    assertQualified(srcPath, dstPath);
+
+    FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
+    addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
+  }
+
+  /**
+   * Like {@link #addMoveDir(MetadataStore, Collection, Collection, Path,
+   * Path, String)} (), but for files.
+   * @param ms  MetadataStore, used to make this a no-op, when it is
+   *            NullMetadataStore.
+   * @param srcPaths stores the source path here
+   * @param dstMetas stores destination metadata here
+   * @param srcPath  source path to store
+   * @param dstPath  destination path to store
+   * @param size length of file moved
+   * @param blockSize  blocksize to associate with destination file
+   * @param owner file owner to use in created records
+   */
+  public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
+      Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
+      long size, long blockSize, String owner) {
+    if (isNullMetadataStore(ms)) {
+      return;
+    }
+    assertQualified(srcPath, dstPath);
+    FileStatus dstStatus = createUploadFileStatus(dstPath, false,
+        size, blockSize, owner);
+    addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
+  }
+
+  /**
+   * Helper method that records the move of all ancestors of a path.
+   *
+   * In S3A, an optimization is to delete unnecessary fake directory objects if
+   * the directory is non-empty. In that case, for a nested child to move, S3A
+   * is not listing and thus moving all its ancestors (up to source root). So we
+   * take care of those inferred directories of this path explicitly.
+   *
+   * As {@link #addMoveFile} and {@link #addMoveDir}, this method adds resulting
+   * metadata to the supplied lists. It does not store in MetadataStore.
+   *
+   * @param ms MetadataStore, no-op if it is NullMetadataStore
+   * @param srcPaths stores the source path here
+   * @param dstMetas stores destination metadata here
+   * @param srcRoot source root up to which (exclusive) should we add ancestors
+   * @param srcPath source path of the child to add ancestors
+   * @param dstPath destination path of the child to add ancestors
+   * @param owner Hadoop user name
+   */
+  public static void addMoveAncestors(MetadataStore ms,
+      Collection<Path> srcPaths, Collection<PathMetadata> dstMetas,
+      Path srcRoot, Path srcPath, Path dstPath, String owner) {
+    if (isNullMetadataStore(ms)) {
+      return;
+    }
+
+    assertQualified(srcRoot, srcPath, dstPath);
+
+    if (srcPath.equals(srcRoot)) {
+      LOG.debug("Skip moving ancestors of source root directory {}", srcRoot);
+      return;
+    }
+
+    Path parentSrc = srcPath.getParent();
+    Path parentDst = dstPath.getParent();
+    while (parentSrc != null
+        && !parentSrc.isRoot()
+        && !parentSrc.equals(srcRoot)
+        && !srcPaths.contains(parentSrc)) {
+      LOG.debug("Renaming non-listed parent {} to {}", parentSrc, parentDst);
+      S3Guard.addMoveDir(ms, srcPaths, dstMetas, parentSrc, parentDst, owner);
+      parentSrc = parentSrc.getParent();
+      parentDst = parentDst.getParent();
+    }
+  }
+
+  public static void addAncestors(MetadataStore metadataStore,
+      Path qualifiedPath, String username) throws IOException {
+    Collection<PathMetadata> newDirs = new ArrayList<>();
+    Path parent = qualifiedPath.getParent();
+    while (!parent.isRoot()) {
+      PathMetadata directory = metadataStore.get(parent);
+      if (directory == null || directory.isDeleted()) {
+        FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
+            null, parent);
+        PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false);
+        newDirs.add(meta);
+      } else {
+        break;
+      }
+      parent = parent.getParent();
+    }
+    metadataStore.put(newDirs);
+  }
+
+  private static void addMoveStatus(Collection<Path> srcPaths,
+      Collection<PathMetadata> dstMetas,
+      Path srcPath,
+      FileStatus dstStatus) {
+    srcPaths.add(srcPath);
+    dstMetas.add(new PathMetadata(dstStatus));
+  }
+
+  /**
+   * Assert that the path is qualified with a host and scheme.
+   * @param p path to check
+   * @throws NullPointerException if either argument does not hold
+   */
+  public static void assertQualified(Path p) {
+    URI uri = p.toUri();
+    // Paths must include bucket in case MetadataStore is shared between
+    // multiple S3AFileSystem instances
+    Preconditions.checkNotNull(uri.getHost(), "Null host in " + uri);
+
+    // This should never fail, but is retained for completeness.
+    Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
+  }
+
+  /**
+   * Assert that all paths are valid.
+   * @param paths path to check
+   * @throws NullPointerException if either argument does not hold
+   */
+  public static void assertQualified(Path...paths) {
+    for (Path path : paths) {
+      assertQualified(path);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
new file mode 100644
index 0000000..be271ae
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -0,0 +1,924 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.Constants;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.shell.CommandFormat;
+import org.apache.hadoop.util.GenericOptionsParser;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import static org.apache.hadoop.fs.s3a.Constants.*;
+
+/**
+ * CLI to manage S3Guard Metadata Store.
+ */
+public abstract class S3GuardTool extends Configured implements Tool {
+  private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
+
+  private static final String NAME = "s3guard";
+  private static final String COMMON_USAGE =
+      "When possible and not overridden by more specific options, metadata\n" +
+      "repository information will be inferred from the S3A URL (if provided)" +
+      "\n\n" +
+      "Generic options supported are:\n" +
+      "  -conf <config file> - specify an application configuration file\n" +
+      "  -D <property=value> - define a value for a given property\n";
+
+  private static final String USAGE = NAME +
+      " [command] [OPTIONS] [s3a://BUCKET]\n\n" +
+      "Commands: \n" +
+      "\t" + Init.NAME + " - " + Init.PURPOSE + "\n" +
+      "\t" + Destroy.NAME + " - " + Destroy.PURPOSE + "\n" +
+      "\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
+      "\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
+      "\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
+  private static final String DATA_IN_S3_IS_PRESERVED
+      = "(all data in S3 is preserved";
+
+  abstract public String getUsage();
+
+  // Exit codes
+  static final int SUCCESS = 0;
+  static final int INVALID_ARGUMENT = 1;
+  static final int ERROR = 99;
+
+  private S3AFileSystem filesystem;
+  private MetadataStore store;
+  private final CommandFormat commandFormat;
+
+  private static final String META_FLAG = "meta";
+  private static final String DAYS_FLAG = "days";
+  private static final String HOURS_FLAG = "hours";
+  private static final String MINUTES_FLAG = "minutes";
+  private static final String SECONDS_FLAG = "seconds";
+
+  private static final String REGION_FLAG = "region";
+  private static final String READ_FLAG = "read";
+  private static final String WRITE_FLAG = "write";
+
+  /**
+   * Constructor a S3Guard tool with HDFS configuration.
+   * @param conf Configuration.
+   */
+  protected S3GuardTool(Configuration conf) {
+    super(conf);
+
+    commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
+    // For metadata store URI
+    commandFormat.addOptionWithValue(META_FLAG);
+    // DDB region.
+    commandFormat.addOptionWithValue(REGION_FLAG);
+  }
+
+  /**
+   * Return sub-command name.
+   */
+  abstract String getName();
+
+  /**
+   * Parse DynamoDB region from either -m option or a S3 path.
+   *
+   * This function should only be called from {@link Init} or
+   * {@link Destroy}.
+   *
+   * @param paths remaining parameters from CLI.
+   * @return false for invalid parameters.
+   * @throws IOException on I/O errors.
+   */
+  boolean parseDynamoDBRegion(List<String> paths) throws IOException {
+    Configuration conf = getConf();
+    String fromCli = getCommandFormat().getOptValue(REGION_FLAG);
+    String fromConf = conf.get(S3GUARD_DDB_REGION_KEY);
+    boolean hasS3Path = !paths.isEmpty();
+
+    if (fromCli != null) {
+      if (fromCli.isEmpty()) {
+        System.err.println("No region provided with -" + REGION_FLAG + " flag");
+        return false;
+      }
+      if (hasS3Path) {
+        System.err.println("Providing both an S3 path and the -" + REGION_FLAG
+            + " flag is not supported. If you need to specify a different "
+            + "region than the S3 bucket, configure " + S3GUARD_DDB_REGION_KEY);
+        return false;
+      }
+      conf.set(S3GUARD_DDB_REGION_KEY, fromCli);
+      return true;
+    }
+
+    if (fromConf != null) {
+      if (fromConf.isEmpty()) {
+        System.err.printf("No region provided with config %s, %n",
+            S3GUARD_DDB_REGION_KEY);
+        return false;
+      }
+      return true;
+    }
+
+    if (hasS3Path) {
+      String s3Path = paths.get(0);
+      initS3AFileSystem(s3Path);
+      return true;
+    }
+
+    System.err.println("No region found from -" + REGION_FLAG + " flag, " +
+        "config, or S3 bucket");
+    return false;
+  }
+
+  /**
+   * Parse metadata store from command line option or HDFS configuration.
+   *
+   * @param forceCreate override the auto-creation setting to true.
+   * @return a initialized metadata store.
+   */
+  MetadataStore initMetadataStore(boolean forceCreate) throws IOException {
+    if (getStore() != null) {
+      return getStore();
+    }
+    Configuration conf;
+    if (filesystem == null) {
+      conf = getConf();
+    } else {
+      conf = filesystem.getConf();
+    }
+    String metaURI = getCommandFormat().getOptValue(META_FLAG);
+    if (metaURI != null && !metaURI.isEmpty()) {
+      URI uri = URI.create(metaURI);
+      LOG.info("create metadata store: {}", uri + " scheme: "
+          + uri.getScheme());
+      switch (uri.getScheme().toLowerCase(Locale.ENGLISH)) {
+      case "local":
+        setStore(new LocalMetadataStore());
+        break;
+      case "dynamodb":
+        setStore(new DynamoDBMetadataStore());
+        conf.set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority());
+        if (forceCreate) {
+          conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+        }
+        break;
+      default:
+        throw new IOException(
+            String.format("Metadata store %s is not supported", uri));
+      }
+    } else {
+      // CLI does not specify metadata store URI, it uses default metadata store
+      // DynamoDB instead.
+      setStore(new DynamoDBMetadataStore());
+      if (forceCreate) {
+        conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
+      }
+    }
+
+    if (filesystem == null) {
+      getStore().initialize(conf);
+    } else {
+      getStore().initialize(filesystem);
+    }
+    LOG.info("Metadata store {} is initialized.", getStore());
+    return getStore();
+  }
+
+  /**
+   * Initialize S3A FileSystem instance.
+   *
+   * @param path s3a URI
+   * @throws IOException
+   */
+  void initS3AFileSystem(String path) throws IOException {
+    URI uri;
+    try {
+      uri = new URI(path);
+    } catch (URISyntaxException e) {
+      throw new IOException(e);
+    }
+    // Make sure that S3AFileSystem does not hold an actual MetadataStore
+    // implementation.
+    Configuration conf = getConf();
+    conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+        MetadataStore.class);
+    FileSystem fs = FileSystem.get(uri, getConf());
+    if (!(fs instanceof S3AFileSystem)) {
+      throw new IOException(
+          String.format("URI %s is not a S3A file system: %s", uri,
+              fs.getClass().getName()));
+    }
+    filesystem = (S3AFileSystem) fs;
+  }
+
+  /**
+   * Parse CLI arguments and returns the position arguments.
+   * The options are stored in {@link #commandFormat}
+   *
+   * @param args command line arguments.
+   * @return the position arguments from CLI.
+   */
+  List<String> parseArgs(String[] args) {
+    return getCommandFormat().parse(args, 1);
+  }
+
+  protected S3AFileSystem getFilesystem() {
+    return filesystem;
+  }
+
+  protected void setFilesystem(S3AFileSystem filesystem) {
+    this.filesystem = filesystem;
+  }
+
+  @VisibleForTesting
+  public MetadataStore getStore() {
+    return store;
+  }
+
+  @VisibleForTesting
+  protected void setStore(MetadataStore store) {
+    Preconditions.checkNotNull(store);
+    this.store = store;
+  }
+
+  protected CommandFormat getCommandFormat() {
+    return commandFormat;
+  }
+
+  /**
+   * Create the metadata store.
+   */
+  static class Init extends S3GuardTool {
+    private static final String NAME = "init";
+    public static final String PURPOSE = "initialize metadata repository";
+    private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+        "\t" + PURPOSE + "\n\n" +
+        "Common options:\n" +
+        "  -" + META_FLAG + " URL - Metadata repository details " +
+          "(implementation-specific)\n" +
+        "\n" +
+        "Amazon DynamoDB-specific options:\n" +
+        "  -" + REGION_FLAG + " REGION - Service region for connections\n" +
+        "  -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" +
+        "  -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" +
+        "\n" +
+        "  URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+        "  Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+        "  is not supported.";
+
+    Init(Configuration conf) {
+      super(conf);
+      // read capacity.
+      getCommandFormat().addOptionWithValue(READ_FLAG);
+      // write capacity.
+      getCommandFormat().addOptionWithValue(WRITE_FLAG);
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    @Override
+    public String getUsage() {
+      return USAGE;
+    }
+
+    @Override
+    public int run(String[] args) throws IOException {
+      List<String> paths = parseArgs(args);
+
+      String readCap = getCommandFormat().getOptValue(READ_FLAG);
+      if (readCap != null && !readCap.isEmpty()) {
+        int readCapacity = Integer.parseInt(readCap);
+        getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
+      }
+      String writeCap = getCommandFormat().getOptValue(WRITE_FLAG);
+      if (writeCap != null && !writeCap.isEmpty()) {
+        int writeCapacity = Integer.parseInt(writeCap);
+        getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
+      }
+
+      // Validate parameters.
+      if (!parseDynamoDBRegion(paths)) {
+        System.err.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+      initMetadataStore(true);
+      return SUCCESS;
+    }
+  }
+
+  /**
+   * Destroy a metadata store.
+   */
+  static class Destroy extends S3GuardTool {
+    private static final String NAME = "destroy";
+    public static final String PURPOSE = "destroy Metadata Store data "
+        + DATA_IN_S3_IS_PRESERVED;
+    private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+        "\t" + PURPOSE + "\n\n" +
+        "Common options:\n" +
+        "  -" + META_FLAG + " URL - Metadata repository details " +
+          "(implementation-specific)\n" +
+        "\n" +
+        "Amazon DynamoDB-specific options:\n" +
+        "  -" + REGION_FLAG + " REGION - Service region for connections\n" +
+        "\n" +
+        "  URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+        "  Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+        "  is not supported.";
+
+    Destroy(Configuration conf) {
+      super(conf);
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    @Override
+    public String getUsage() {
+      return USAGE;
+    }
+
+    public int run(String[] args) throws IOException {
+      List<String> paths = parseArgs(args);
+      if (!parseDynamoDBRegion(paths)) {
+        System.err.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+
+      try {
+        initMetadataStore(false);
+      } catch (FileNotFoundException e) {
+        // indication that the table was not found
+        LOG.debug("Failed to bind to store to be destroyed", e);
+        LOG.info("Metadata Store does not exist.");
+        return SUCCESS;
+      }
+
+      Preconditions.checkState(getStore() != null,
+          "Metadata Store is not initialized");
+
+      getStore().destroy();
+      LOG.info("Metadata store is deleted.");
+      return SUCCESS;
+    }
+  }
+
+  /**
+   * Import s3 metadata to the metadata store.
+   */
+  static class Import extends S3GuardTool {
+    private static final String NAME = "import";
+    public static final String PURPOSE = "import metadata from existing S3 " +
+        "data";
+    private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+        "\t" + PURPOSE + "\n\n" +
+        "Common options:\n" +
+        "  -" + META_FLAG + " URL - Metadata repository details " +
+        "(implementation-specific)\n" +
+        "\n" +
+        "Amazon DynamoDB-specific options:\n" +
+        "  -" + REGION_FLAG + " REGION - Service region for connections\n" +
+        "\n" +
+        "  URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+        "  Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+        "  is not supported.";
+
+    private final Set<Path> dirCache = new HashSet<>();
+
+    Import(Configuration conf) {
+      super(conf);
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    @Override
+    public String getUsage() {
+      return USAGE;
+    }
+
+    /**
+     * Put parents into MS and cache if the parents are not presented.
+     *
+     * @param f the file or an empty directory.
+     * @throws IOException on I/O errors.
+     */
+    private void putParentsIfNotPresent(FileStatus f) throws IOException {
+      Preconditions.checkNotNull(f);
+      Path parent = f.getPath().getParent();
+      while (parent != null) {
+        if (dirCache.contains(parent)) {
+          return;
+        }
+        FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
+            f.getOwner());
+        getStore().put(new PathMetadata(dir));
+        dirCache.add(parent);
+        parent = parent.getParent();
+      }
+    }
+
+    /**
+     * Recursively import every path under path.
+     * @return number of items inserted into MetadataStore
+     * @throws IOException on I/O errors.
+     */
+    private long importDir(FileStatus status) throws IOException {
+      Preconditions.checkArgument(status.isDirectory());
+      RemoteIterator<LocatedFileStatus> it = getFilesystem()
+          .listFilesAndEmptyDirectories(status.getPath(), true);
+      long items = 0;
+
+      while (it.hasNext()) {
+        LocatedFileStatus located = it.next();
+        FileStatus child;
+        if (located.isDirectory()) {
+          child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
+              located.getOwner());
+          dirCache.add(child.getPath());
+        } else {
+          child = new S3AFileStatus(located.getLen(),
+              located.getModificationTime(),
+              located.getPath(),
+              located.getBlockSize(),
+              located.getOwner());
+        }
+        putParentsIfNotPresent(child);
+        getStore().put(new PathMetadata(child));
+        items++;
+      }
+      return items;
+    }
+
+    @Override
+    public int run(String[] args) throws IOException {
+      List<String> paths = parseArgs(args);
+      if (paths.isEmpty()) {
+        System.err.println(getUsage());
+        return INVALID_ARGUMENT;
+      }
+      String s3Path = paths.get(0);
+      initS3AFileSystem(s3Path);
+
+      URI uri;
+      try {
+        uri = new URI(s3Path);
+      } catch (URISyntaxException e) {
+        throw new IOException(e);
+      }
+      String filePath = uri.getPath();
+      if (filePath.isEmpty()) {
+        // If they specify a naked S3 URI (e.g. s3a://bucket), we'll consider
+        // root to be the path
+        filePath = "/";
+      }
+      Path path = new Path(filePath);
+      FileStatus status = getFilesystem().getFileStatus(path);
+
+      initMetadataStore(false);
+
+      long items = 1;
+      if (status.isFile()) {
+        PathMetadata meta = new PathMetadata(status);
+        getStore().put(meta);
+      } else {
+        items = importDir(status);
+      }
+
+      System.out.printf("Inserted %d items into Metadata Store%n", items);
+
+      return SUCCESS;
+    }
+  }
+
+  /**
+   * Show diffs between the s3 and metadata store.
+   */
+  static class Diff extends S3GuardTool {
+    private static final String NAME = "diff";
+    public static final String PURPOSE = "report on delta between S3 and " +
+        "repository";
+    private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" +
+        "\t" + PURPOSE + "\n\n" +
+        "Common options:\n" +
+        "  -" + META_FLAG + " URL - Metadata repository details " +
+        "(implementation-specific)\n" +
+        "\n" +
+        "Amazon DynamoDB-specific options:\n" +
+        "  -" + REGION_FLAG + " REGION - Service region for connections\n" +
+        "\n" +
+        "  URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+        "  Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+        "  is not supported.";
+
+    private static final String SEP = "\t";
+    static final String S3_PREFIX = "S3";
+    static final String MS_PREFIX = "MS";
+
+    Diff(Configuration conf) {
+      super(conf);
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    @Override
+    public String getUsage() {
+      return USAGE;
+    }
+
+    /**
+     * Formats the output of printing a FileStatus in S3guard diff tool.
+     * @param status the status to print.
+     * @return the string of output.
+     */
+    private static String formatFileStatus(FileStatus status) {
+      return String.format("%s%s%d%s%s",
+          status.isDirectory() ? "D" : "F",
+          SEP,
+          status.getLen(),
+          SEP,
+          status.getPath().toString());
+    }
+
+    /**
+     * Compares metadata from 2 S3 FileStatus's to see if they differ.
+     * @param thisOne
+     * @param thatOne
+     * @return true if the metadata is not identical
+     */
+    private static boolean differ(FileStatus thisOne, FileStatus thatOne) {
+      Preconditions.checkArgument(!(thisOne == null && thatOne == null));
+      return (thisOne == null || thatOne == null) ||
+          (thisOne.getLen() != thatOne.getLen()) ||
+          (thisOne.isDirectory() != thatOne.isDirectory()) ||
+          (!thisOne.isDirectory() &&
+              thisOne.getModificationTime() != thatOne.getModificationTime());
+    }
+
+    /**
+     * Print difference, if any, between two file statuses to the output stream.
+     *
+     * @param msStatus file status from metadata store.
+     * @param s3Status file status from S3.
+     * @param out output stream.
+     */
+    private static void printDiff(FileStatus msStatus,
+                                  FileStatus s3Status,
+                                  PrintStream out) {
+      Preconditions.checkArgument(!(msStatus == null && s3Status == null));
+      if (msStatus != null && s3Status != null) {
+        Preconditions.checkArgument(
+            msStatus.getPath().equals(s3Status.getPath()),
+            String.format("The path from metadata store and s3 are different:" +
+            " ms=%s s3=%s", msStatus.getPath(), s3Status.getPath()));
+      }
+
+      if (differ(msStatus, s3Status)) {
+        if (s3Status != null) {
+          out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status));
+        }
+        if (msStatus != null) {
+          out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus));
+        }
+      }
+    }
+
+    /**
+     * Compare the metadata of the directory with the same path, on S3 and
+     * the metadata store, respectively. If one of them is null, consider the
+     * metadata of the directory and all its subdirectories are missing from
+     * the source.
+     *
+     * Pass the FileStatus obtained from s3 and metadata store to avoid one
+     * round trip to fetch the same metadata twice, because the FileStatus
+     * hve already been obtained from listStatus() / listChildren operations.
+     *
+     * @param msDir the directory FileStatus obtained from the metadata store.
+     * @param s3Dir the directory FileStatus obtained from S3.
+     * @param out the output stream to generate diff results.
+     * @throws IOException on I/O errors.
+     */
+    private void compareDir(FileStatus msDir, FileStatus s3Dir,
+                            PrintStream out) throws IOException {
+      Preconditions.checkArgument(!(msDir == null && s3Dir == null));
+      if (msDir != null && s3Dir != null) {
+        Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()),
+            String.format("The path from metadata store and s3 are different:" +
+             " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath()));
+      }
+
+      Map<Path, FileStatus> s3Children = new HashMap<>();
+      if (s3Dir != null && s3Dir.isDirectory()) {
+        for (FileStatus status : getFilesystem().listStatus(s3Dir.getPath())) {
+          s3Children.put(status.getPath(), status);
+        }
+      }
+
+      Map<Path, FileStatus> msChildren = new HashMap<>();
+      if (msDir != null && msDir.isDirectory()) {
+        DirListingMetadata dirMeta =
+            getStore().listChildren(msDir.getPath());
+
+        if (dirMeta != null) {
+          for (PathMetadata meta : dirMeta.getListing()) {
+            FileStatus status = meta.getFileStatus();
+            msChildren.put(status.getPath(), status);
+          }
+        }
+      }
+
+      Set<Path> allPaths = new HashSet<>(s3Children.keySet());
+      allPaths.addAll(msChildren.keySet());
+
+      for (Path path : allPaths) {
+        FileStatus s3Status = s3Children.get(path);
+        FileStatus msStatus = msChildren.get(path);
+        printDiff(msStatus, s3Status, out);
+        if ((s3Status != null && s3Status.isDirectory()) ||
+            (msStatus != null && msStatus.isDirectory())) {
+          compareDir(msStatus, s3Status, out);
+        }
+      }
+      out.flush();
+    }
+
+    /**
+     * Compare both metadata store and S3 on the same path.
+     *
+     * @param path the path to be compared.
+     * @param out  the output stream to display results.
+     * @throws IOException on I/O errors.
+     */
+    private void compareRoot(Path path, PrintStream out) throws IOException {
+      Path qualified = getFilesystem().qualify(path);
+      FileStatus s3Status = null;
+      try {
+        s3Status = getFilesystem().getFileStatus(qualified);
+      } catch (FileNotFoundException e) {
+      }
+      PathMetadata meta = getStore().get(qualified);
+      FileStatus msStatus = (meta != null && !meta.isDeleted()) ?
+          meta.getFileStatus() : null;
+      compareDir(msStatus, s3Status, out);
+    }
+
+    @VisibleForTesting
+    public int run(String[] args, PrintStream out) throws IOException {
+      List<String> paths = parseArgs(args);
+      if (paths.isEmpty()) {
+        out.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+      String s3Path = paths.get(0);
+      initS3AFileSystem(s3Path);
+      initMetadataStore(true);
+
+      URI uri;
+      try {
+        uri = new URI(s3Path);
+      } catch (URISyntaxException e) {
+        throw new IOException(e);
+      }
+      Path root;
+      if (uri.getPath().isEmpty()) {
+        root = new Path("/");
+      } else {
+        root = new Path(uri.getPath());
+      }
+      root = getFilesystem().qualify(root);
+      compareRoot(root, out);
+      out.flush();
+      return SUCCESS;
+    }
+
+    @Override
+    public int run(String[] args) throws IOException {
+      return run(args, System.out);
+    }
+  }
+
+  /**
+   * Prune metadata that has not been modified recently.
+   */
+  static class Prune extends S3GuardTool {
+    private static final String NAME = "prune";
+    public static final String PURPOSE = "truncate older metadata from " +
+        "repository "
+        + DATA_IN_S3_IS_PRESERVED;;
+    private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
+        "\t" + PURPOSE + "\n\n" +
+        "Common options:\n" +
+        "  -" + META_FLAG + " URL - Metadata repository details " +
+        "(implementation-specific)\n" +
+        "\n" +
+        "Amazon DynamoDB-specific options:\n" +
+        "  -" + REGION_FLAG + " REGION - Service region for connections\n" +
+        "\n" +
+        "  URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
+        "  Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
+        "  is not supported.";
+
+    Prune(Configuration conf) {
+      super(conf);
+
+      CommandFormat format = getCommandFormat();
+      format.addOptionWithValue(DAYS_FLAG);
+      format.addOptionWithValue(HOURS_FLAG);
+      format.addOptionWithValue(MINUTES_FLAG);
+      format.addOptionWithValue(SECONDS_FLAG);
+    }
+
+    @VisibleForTesting
+    void setMetadataStore(MetadataStore ms) {
+      Preconditions.checkNotNull(ms);
+      this.setStore(ms);
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    @Override
+    public String getUsage() {
+      return USAGE;
+    }
+
+    private long getDeltaComponent(TimeUnit unit, String arg) {
+      String raw = getCommandFormat().getOptValue(arg);
+      if (raw == null || raw.isEmpty()) {
+        return 0;
+      }
+      Long parsed = Long.parseLong(raw);
+      return unit.toMillis(parsed);
+    }
+
+    @VisibleForTesting
+    public int run(String[] args, PrintStream out) throws
+        InterruptedException, IOException {
+      List<String> paths = parseArgs(args);
+      if (!parseDynamoDBRegion(paths)) {
+        System.err.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+      initMetadataStore(false);
+
+      Configuration conf = getConf();
+      long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0);
+
+      long cliDelta = 0;
+      cliDelta += getDeltaComponent(TimeUnit.DAYS, "days");
+      cliDelta += getDeltaComponent(TimeUnit.HOURS, "hours");
+      cliDelta += getDeltaComponent(TimeUnit.MINUTES, "minutes");
+      cliDelta += getDeltaComponent(TimeUnit.SECONDS, "seconds");
+
+      if (confDelta <= 0 && cliDelta <= 0) {
+        System.err.println(
+            "You must specify a positive age for metadata to prune.");
+      }
+
+      // A delta provided on the CLI overrides if one is configured
+      long delta = confDelta;
+      if (cliDelta > 0) {
+        delta = cliDelta;
+      }
+
+      long now = System.currentTimeMillis();
+      long divide = now - delta;
+
+      getStore().prune(divide);
+
+      out.flush();
+      return SUCCESS;
+    }
+
+    @Override
+    public int run(String[] args) throws InterruptedException, IOException {
+      return run(args, System.out);
+    }
+  }
+
+  private static S3GuardTool command;
+
+  private static void printHelp() {
+    if (command == null) {
+      System.err.println("Usage: hadoop " + USAGE);
+      System.err.println("\tperform S3Guard metadata store " +
+          "administrative commands.");
+    } else {
+      System.err.println("Usage: hadoop " + command.getUsage());
+    }
+    System.err.println();
+    System.err.println(COMMON_USAGE);
+  }
+
+  /**
+   * Execute the command with the given arguments.
+   *
+   * @param args command specific arguments.
+   * @param conf Hadoop configuration.
+   * @return exit code.
+   * @throws Exception on I/O errors.
+   */
+  public static int run(String[] args, Configuration conf) throws
+      Exception {
+    /* ToolRunner.run does this too, but we must do it before looking at
+    subCommand or instantiating the cmd object below */
+    String[] otherArgs = new GenericOptionsParser(conf, args)
+        .getRemainingArgs();
+    if (otherArgs.length == 0) {
+      printHelp();
+      return INVALID_ARGUMENT;
+    }
+    final String subCommand = otherArgs[0];
+    switch (subCommand) {
+    case Init.NAME:
+      command = new Init(conf);
+      break;
+    case Destroy.NAME:
+      command = new Destroy(conf);
+      break;
+    case Import.NAME:
+      command = new Import(conf);
+      break;
+    case Diff.NAME:
+      command = new Diff(conf);
+      break;
+    case Prune.NAME:
+      command = new Prune(conf);
+      break;
+    default:
+      printHelp();
+      return INVALID_ARGUMENT;
+    }
+    return ToolRunner.run(conf, command, otherArgs);
+  }
+
+  /**
+   * Main entry point. Calls {@code System.exit()} on all execution paths.
+   * @param args argument list
+   */
+  public static void main(String[] args) {
+    try {
+      int ret = run(args, new Configuration());
+      System.exit(ret);
+    } catch (CommandFormat.UnknownOptionException e) {
+      System.err.println(e.getMessage());
+      printHelp();
+      System.exit(INVALID_ARGUMENT);
+    } catch (Throwable e) {
+      e.printStackTrace(System.err);
+      System.exit(ERROR);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
new file mode 100644
index 0000000..d430315
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package contains classes related to S3Guard: a feature of S3A to mask
+ * the eventual consistency behavior of S3 and optimize access patterns by
+ * coordinating with a strongly consistent external store for file system
+ * metadata.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
index 862ce6b..ce79284 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java
@@ -105,6 +105,10 @@ public final class S3xLoginHelper {
    * @return a login tuple, possibly empty.
    */
   public static Login extractLoginDetails(URI name) {
+    if (name == null) {
+      return Login.EMPTY;
+    }
+
     try {
       String authority = name.getAuthority();
       if (authority == null) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh
new file mode 100644
index 0000000..039b077
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if ! declare -f hadoop_subcommand_s3guard >/dev/null 2>/dev/null; then
+
+  if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
+    hadoop_add_subcommand "s3guard" client "manage metadata on S3"
+  fi
+
+  # this can't be indented otherwise shelldocs won't get it
+
+## @description  s3guard command for hadoop
+## @audience     public
+## @stability    stable
+## @replaceable  yes
+function hadoop_subcommand_s3guard
+{
+  # shellcheck disable=SC2034
+  HADOOP_CLASSNAME=org.apache.hadoop.fs.s3a.s3guard.S3GuardTool
+  hadoop_add_to_classpath_tools hadoop-aws
+}
+
+fi

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index 182f060..b8d37c6 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -46,6 +46,7 @@ See also:
 
 * [Testing](testing.html)
 * [Troubleshooting S3a](troubleshooting_s3a.html)
+* [S3Guard](s3guard.html)
 
 ### Warning #1: Object Stores are not filesystems
 
@@ -1552,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
 
 SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
 encrypt the objects. The user may specify a specific CMK or leave the
-`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
+`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
 in AWS IAM.  Each CMK configured in AWS IAM is region specific, and cannot be
 used in a in a S3 bucket in a different region.  There is can also be policies
 assigned to the CMK that prohibit or restrict its use for users causing S3A

http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
new file mode 100644
index 0000000..fe67d69
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md
@@ -0,0 +1,610 @@
+<!---
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+
+# S3Guard: Consistency and Metadata Caching for S3A
+
+**Experimental Feature**
+
+<!-- MACRO{toc|fromDepth=0|toDepth=5} -->
+
+## Overview
+
+*S3Guard* is an experimental feature for the S3A client of the S3 object store,
+which can use a (consistent) database as the store of metadata about objects
+in an S3 bucket.
+
+S3Guard
+
+1. May improve performance on directory listing/scanning operations,
+including those which take place during the partitioning period of query
+execution, the process where files are listed and the work divided up amongst
+processes.
+
+1. Permits a consistent view of the object store. Without this, changes in
+objects may not be immediately visible, especially in listing operations.
+
+1. Offers a platform for future performance improvements for running Hadoop
+workloads on top of object stores
+
+The basic idea is that, for each operation in the Hadoop S3 client (s3a) that
+reads or modifies metadata, a shadow copy of that metadata is stored in a
+separate MetadataStore implementation.  Each MetadataStore implementation
+offers HDFS-like consistency for the metadata, and may also provide faster
+lookups for things like file status or directory listings.
+
+For links to early design documents and related patches, see
+[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345).
+
+*Important*
+
+* S3Guard is experimental and should be considered unstable.
+
+* While all underlying data is persisted in S3, if, for some reason,
+the S3Guard-cached metadata becomes inconsistent with that in S3,
+queries on the data may become incorrect.
+For example, new datasets may be omitted, objects may be overwritten,
+or clients may not be aware that some data has been deleted.
+It is essential for all clients writing to an S3Guard-enabled
+S3 Repository to use the feature. Clients reading the data may work directly
+with the S3A data, in which case the normal S3 consistency guarantees apply.
+
+
+## Setting up S3Guard
+
+The latest configuration parameters are defined in `core-default.xml`.  You
+should consult that file for full information, but a summary is provided here.
+
+
+### 1. Choose the Database
+
+A core concept of S3Guard is that the directory listing data of the object
+store, *the metadata* is replicated in a higher-performance, consistent,
+database. In S3Guard, this database is called *The Metadata Store*
+
+By default, S3Guard is not enabled.
+
+The Metadata Store to use in production is bonded to Amazon's DynamoDB
+database service.  The following setting will enable this Metadata Store:
+
+```xml
+<property>
+    <name>fs.s3a.metadatastore.impl</name>
+    <value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
+</property>
+```
+
+Note that the `NullMetadataStore` store can be explicitly requested if desired.
+This offers no metadata storage, and effectively disables S3Guard.
+
+```xml
+<property>
+    <name>fs.s3a.metadatastore.impl</name>
+    <value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
+</property>
+```
+
+### 2. Configure S3Guard Settings
+
+More settings will may be added in the future.
+Currently the only Metadata Store-independent setting, besides the
+implementation class above, is the *allow authoritative* flag.
+
+It is recommended that you leave the default setting here:
+
+```xml
+<property>
+    <name>fs.s3a.metadatastore.authoritative</name>
+    <value>false</value>
+</property>
+
+```
+
+Setting this to `true` is currently an experimental feature.  When true, the
+S3A client will avoid round-trips to S3 when getting directory listings, if
+there is a fully-cached version of the directory stored in the Metadata Store.
+
+Note that if this is set to true, it may exacerbate or persist existing race
+conditions around multiple concurrent modifications and listings of a given
+directory tree.
+
+In particular: **If the Metadata Store is declared as authoritative,
+all interactions with the S3 bucket(s) must be through S3A clients sharing
+the same Metadata Store**
+
+
+### 3. Configure the Metadata Store.
+
+Here are the `DynamoDBMetadataStore` settings.  Other Metadata Store
+implementations will have their own configuration parameters.
+
+
+### 4. Name Your Table
+
+First, choose the name of the table you wish to use for the S3Guard metadata
+storage in your DynamoDB instance.  If you leave it unset/empty, a
+separate table will be created for each S3 bucket you access, and that
+bucket's name will be used for the name of the DynamoDB table.  For example,
+this sets the table name to `my-ddb-table-name`
+
+```xml
+<property>
+  <name>fs.s3a.s3guard.ddb.table</name>
+  <value>my-ddb-table-name</value>
+  <description>
+    The DynamoDB table name to operate. Without this property, the respective
+    S3 bucket names will be used.
+  </description>
+</property>
+```
+
+It is good to share a table across multiple buckets for multiple reasons.
+
+1. You are billed for the I/O capacity allocated to the table,
+*even when the table is not used*. Sharing capacity can reduce costs.
+
+1. You can share the "provision burden" across the buckets. That is, rather
+than allocating for the peak load on a single bucket, you can allocate for
+the peak load *across all the buckets*, which is likely to be significantly
+lower.
+
+1. It's easier to measure and tune the load requirements and cost of
+S3Guard, because there is only one table to review and configure in the
+AWS management console.
+
+When wouldn't you want to share a table?
+
+1. When you do explicitly want to provision I/O capacity to a specific bucket
+and table, isolated from others.
+
+1. When you are using separate billing for specific buckets allocated
+to specific projects.
+
+1. When different users/roles have different access rights to different buckets.
+As S3Guard requires all users to have R/W access to the table, all users will
+be able to list the metadata in all buckets, even those to which they lack
+read access.
+
+### 5. Locate your Table
+
+You may also wish to specify the region to use for DynamoDB.  If a region
+is not configured, S3A will assume that it is in the same region as the S3
+bucket. A list of regions for the DynamoDB service can be found in
+[Amazon's documentation](http://docs.aws.amazon.com/general/latest/gr/rande.html#ddb_region).
+In this example, to use the US West 2 region:
+
+```xml
+<property>
+  <name>fs.s3a.s3guard.ddb.region</name>
+  <value>us-west-2</value>
+</property>
+```
+
+When working with S3Guard-managed buckets from EC2 VMs running in AWS
+infrastructure, using a local DynamoDB region ensures the lowest latency
+and highest reliability, as well as avoiding all long-haul network charges.
+The S3Guard tables, and indeed, the S3 buckets, should all be in the same
+region as the VMs.
+
+### 6. Optional: Create your Table
+
+Next, you can choose whether or not the table will be automatically created
+(if it doesn't already exist).  If you want this feature, set the
+`fs.s3a.s3guard.ddb.table.create` option to `true`.
+
+```xml
+<property>
+  <name>fs.s3a.s3guard.ddb.table.create</name>
+  <value>true</value>
+  <description>
+    If true, the S3A client will create the table if it does not already exist.
+  </description>
+</property>
+```
+
+### 7. If creating a table: Set your DynamoDB IO Capacity
+
+Next, you need to set the DynamoDB read and write throughput requirements you
+expect to need for your cluster.  Setting higher values will cost you more
+money.  *Note* that these settings only affect table creation when
+`fs.s3a.s3guard.ddb.table.create` is enabled.  To change the throughput for
+an existing table, use the AWS console or CLI tool.
+
+For more details on DynamoDB capacity units, see the AWS page on [Capacity
+Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
+
+The charges are incurred per hour for the life of the table, *even when the
+table and the underlying S3 buckets are not being used*.
+
+There are also charges incurred for data storage and for data IO outside of the
+region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path names
+and summary details of objects —the actual data is stored in S3, so billed at S3
+rates.
+
+```xml
+<property>
+  <name>fs.s3a.s3guard.ddb.table.capacity.read</name>
+  <value>500</value>
+  <description>
+    Provisioned throughput requirements for read operations in terms of capacity
+    units for the DynamoDB table.  This config value will only be used when
+    creating a new DynamoDB table, though later you can manually provision by
+    increasing or decreasing read capacity as needed for existing tables.
+    See DynamoDB documents for more information.
+  </description>
+</property>
+
+<property>
+  <name>fs.s3a.s3guard.ddb.table.capacity.write</name>
+  <value>100</value>
+  <description>
+    Provisioned throughput requirements for write operations in terms of
+    capacity units for the DynamoDB table.  Refer to related config
+    fs.s3a.s3guard.ddb.table.capacity.read before usage.
+  </description>
+</property>
+```
+
+Attempting to perform more IO than the capacity requested simply throttles the
+IO; small capacity numbers are recommended when initially experimenting
+with S3Guard.
+
+## Authenticating with S3Guard
+
+The DynamoDB metadata store takes advantage of the fact that the DynamoDB
+service uses the same authentication mechanisms as S3. S3Guard
+gets all its credentials from the S3A client that is using it.
+
+All existing S3 authentication mechanisms can be used, except for one
+exception. Credentials placed in URIs are not supported for S3Guard, for security
+reasons.
+
+## Per-bucket S3Guard configuration
+
+In production, it is likely only some buckets will have S3Guard enabled;
+those which are read-only may have disabled, for example. Equally importantly,
+buckets in different regions should have different tables, each
+in the relevant region.
+
+These options can be managed through S3A's [per-bucket configuration
+mechanism](./index.html#Configuring_different_S3_buckets).
+All options with the under `fs.s3a.bucket.BUCKETNAME.KEY` are propagated
+to the options `fs.s3a.KEY` *for that bucket only*.
+
+As an example, here is a configuration to use different metadata stores
+and tables for different buckets
+
+First, we define shortcuts for the metadata store classnames
+
+
+```xml
+<property>
+  <name>s3guard.null</name>
+  <value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
+</property>
+
+<property>
+  <name>s3guard.dynamo</name>
+  <value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
+</property>
+```
+
+Next, Amazon's public landsat database is configured with no
+metadata store
+
+```xml
+<property>
+  <name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name>
+  <value>${s3guard.null}</value>
+  <description>The read-only landsat-pds repository isn't
+  managed by S3Guard</description>
+</property>
+```
+
+Next the `ireland-2` and `ireland-offline` buckets are configured with
+DynamoDB as the store, and a shared table `production-table`
+
+
+```xml
+<property>
+  <name>fs.s3a.bucket.ireland-2.metadatastore.impl</name>
+  <value>${s3guard.dynamo}</value>
+</property>
+
+<property>
+  <name>fs.s3a.bucket.ireland-offline.metadatastore.impl</name>
+  <value>${s3guard.dynamo}</value>
+</property>
+
+<property>
+  <name>fs.s3a.bucket.ireland-2.s3guard.ddb.table</name>
+  <value>production-table</value>
+</property>
+```
+
+The region of this table is automatically set to be that of the buckets,
+here `eu-west-1`; the same table name may actually be used in different
+regions.
+
+Together then, this configuration enables the DynamoDB Metadata Store
+for two buckets with a shared table, while disabling it for the public
+bucket.
+
+
+## S3Guard Command Line Interface (CLI)
+
+Note that in some cases an AWS region or `s3a://` URI can be provided.
+
+Metadata store URIs include a scheme that designates the backing store. For
+example (e.g. `dynamodb://table_name`;). As documented above, the
+AWS region can be inferred if the URI to an existing bucket is provided.
+
+
+The S3A URI must also be provided for per-bucket configuration options
+to be picked up. That is: when an s3a URL is provided on the command line,
+all its "resolved" per-bucket settings are used to connect to, authenticate
+with and configure the S3Guard table. If no such URL is provided, then
+the base settings are picked up.
+
+
+### Create a table: `s3guard init`
+
+```bash
+hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
+```
+
+Creates and initializes an empty metadata store.
+
+A DynamoDB metadata store can be initialized with additional parameters
+pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html):
+
+```bash
+[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
+```
+
+Example 1
+
+```bash
+hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1
+```
+
+Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads,
+in the same location as the bucket "ireland-1".
+
+
+Example 2
+
+```bash
+hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1
+```
+
+Creates a table "ireland-team" in the same region "s3-eu-west-1.amazonaws.com"
+
+
+### Import a bucket: `s3guard import`
+
+```bash
+hadoop s3guard import [-meta URI] s3a://BUCKET
+```
+
+Pre-populates a metadata store according to the current contents of an S3
+bucket. If the `-meta` option is omitted, the binding information is taken
+from the `core-site.xml` configuration.
+
+Example
+
+```bash
+hadoop s3guard import s3a://ireland-1
+```
+
+### Audit a table: `s3guard diff`
+
+```bash
+hadoop s3guard diff [-meta URI] s3a://BUCKET
+```
+
+Lists discrepancies between a metadata store and bucket. Note that depending on
+how S3Guard is used, certain discrepancies are to be expected.
+
+Example
+
+```bash
+hadoop s3guard diff s3a://ireland-1
+```
+
+### Delete a table: `s3guard destroy`
+
+
+Deletes a metadata store. With DynamoDB as the store, this means
+the specific DynamoDB table use to store the metadata.
+
+```bash
+hadoop s3guard destroy [-meta URI] ( -region REGION | s3a://BUCKET )
+```
+
+This *does not* delete the bucket, only the S3Guard table which it is bound
+to.
+
+
+Examples
+
+```bash
+hadoop s3guard destroy s3a://ireland-1
+```
+
+Deletes the table which the bucket ireland-1 is configured to use
+as its MetadataStore.
+
+```bash
+hadoop s3guard destroy -meta dynamodb://ireland-team -region eu-west-1
+```
+
+
+
+### Clean up a table, `s3guard prune`
+
+Delete all file entries in the MetadataStore table whose object "modification
+time" is older than the specified age.
+
+```bash
+hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
+    [-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET )
+```
+
+A time value must be supplied.
+
+1. This does not delete the entries in the bucket itself.
+1. The modification time is effectively the creation time of the objects
+in the S3 Bucket.
+1. Even when an S3A URI is supplied, all entries in the table older than
+a specific age are deleted &mdash; even those from other buckets.
+
+Example
+
+```bash
+hadoop s3guard prune -days 7 s3a://ireland-1
+```
+
+Deletes all entries in the S3Guard table for files older than seven days from
+the table associated with `s3a://ireland-1`.
+
+```bash
+hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
+```
+
+Delete all entries more than 90 minutes old from the table "ireland-team" in
+the region "eu-west-1".
+
+
+
+## Debugging and Error Handling
+
+If you run into network connectivity issues, or have a machine failure in the
+middle of an operation, you may end up with your metadata store having state
+that differs from S3.  The S3Guard CLI commands, covered in the CLI section
+above, can be used to diagnose and repair these issues.
+
+There are some logs whose log level can be increased to provide more
+information.
+
+```properties
+# Log S3Guard classes
+log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
+
+# Log all S3A classes
+log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
+
+# Enable debug logging of AWS DynamoDB client
+log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB
+
+# Log all HTTP requests made; includes S3 interaction. This may
+# include sensitive information such as account IDs in HTTP headers.
+log4j.logger.com.amazonaws.request=DEBUG
+
+```
+
+If all else fails, S3Guard is designed to allow for easy recovery by deleting
+the metadata store data. In DynamoDB, this can be accomplished by simply
+deleting the table, and allowing S3Guard to recreate it from scratch.  Note
+that S3Guard tracks recent changes to file metadata to implement consistency.
+Deleting the metadata store table will simply result in a period of eventual
+consistency for any file modifications that were made right before the table
+was deleted.
+
+### Failure Semantics
+
+Operations which modify metadata will make changes to S3 first. If, and only
+if, those operations succeed, the equivalent changes will be made to the
+Metadata Store.
+
+These changes to S3 and Metadata Store are not fully-transactional:  If the S3
+operations succeed, and the subsequent Metadata Store updates fail, the S3
+changes will *not* be rolled back.  In this case, an error message will be
+logged.
+
+### Versioning
+
+S3Guard tables are created with a version marker, an entry with the primary
+key and child entry of `../VERSION`; the use of a relative path guarantees
+that it will not be resolved.
+
+#### Versioning policy.
+
+1. The version number of an S3Guard table will only be incremented when
+an incompatible change is made to the table structure —that is, the structure
+has changed so that it is no longer readable by older versions, or because
+it has added new mandatory fields which older versions do not create.
+1. The version number of S3Guard tables will only be changed by incrementing
+the value.
+1. Updated versions of S3Guard MAY continue to support older version tables.
+1. If an incompatible change is made such that existing tables are not compatible,
+then a means shall be provided to update existing tables. For example:
+an option in the Command Line Interface, or an option to upgrade tables
+during S3Guard initialization.
+
+*Note*: this policy does not indicate any intent to upgrade table structures
+in an incompatible manner. The version marker in tables exists to support
+such an option if it ever becomes necessary, by ensuring that all S3Guard
+client can recognise any version mismatch.
+
+### Security
+
+All users of the DynamoDB table must have write access to it. This
+effectively means they must have write access to the entire object store.
+
+There's not been much testing of using a S3Guard Metadata Store
+with a read-only S3 Bucket. It *should* work, provided all users
+have write access to the DynamoDB table. And, as updates to the Metadata Store
+are only made after successful file creation, deletion and rename, the
+store is *unlikely* to get out of sync, it is still something which
+merits more testing before it could be considered reliable.
+
+### Troubleshooting
+
+#### Error: `S3Guard table lacks version marker.`
+
+The table which was intended to be used as a S3guard metadata store
+does not have any version marker indicating that it is a S3Guard table.
+
+It may be that this is not a S3Guard table.
+
+* Make sure that this is the correct table name.
+* Delete the table, so it can be rebuilt.
+
+#### Error: `Database table is from an incompatible S3Guard version`
+
+This indicates that the version of S3Guard which created (or possibly updated)
+the database table is from a different version that that expected by the S3A
+client.
+
+This error will also include the expected and actual version numbers.
+
+If the expected version is lower than the actual version, then the version
+of the S3A client library is too old to interact with this S3Guard-managed
+bucket. Upgrade the application/library.
+
+If the expected version is higher than the actual version, then the table
+itself will need upgrading.
+
+#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"`
+
+S3Guard could not find the DynamoDB table for the Metadata Store,
+and it was not configured to create it. Either the table was missing,
+or the configuration is preventing S3Guard from finding the table.
+
+1. Verify that the value of `fs.s3a.s3guard.ddb.table` is correct.
+1. If the region for an existing table has been set in
+`fs.s3a.s3guard.ddb.region`, verify that the value is correct.
+1. If the region is not set, verify that the table exists in the same
+region as the bucket being used.
+1. Create the table if necessary.


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message