hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From l..@apache.org
Subject hadoop git commit: HADOOP-13650. S3Guard: Provide command line tools to manipulate metadata store. (Lei Xu)
Date Sat, 14 Jan 2017 09:24:15 GMT
Repository: hadoop
Updated Branches:
  refs/heads/HADOOP-13345 2220b787c -> f10114c14


HADOOP-13650. S3Guard: Provide command line tools to manipulate metadata store. (Lei Xu)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f10114c1
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f10114c1
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f10114c1

Branch: refs/heads/HADOOP-13345
Commit: f10114c144f17c142b597b6a71fa856474ba21af
Parents: 2220b78
Author: Lei Xu <lei@apache.org>
Authored: Sat Jan 14 17:23:45 2017 +0800
Committer: Lei Xu <lei@apache.org>
Committed: Sat Jan 14 17:23:45 2017 +0800

----------------------------------------------------------------------
 .../main/resources/assemblies/hadoop-tools.xml  |  13 +
 hadoop-tools/hadoop-aws/pom.xml                 |  25 +
 .../org/apache/hadoop/fs/s3a/S3AFileSystem.java |   5 +-
 .../fs/s3a/s3guard/DynamoDBMetadataStore.java   |  44 +-
 .../fs/s3a/s3guard/LocalMetadataStore.java      |   6 +-
 .../hadoop/fs/s3a/s3guard/MetadataStore.java    |  10 +
 .../fs/s3a/s3guard/NullMetadataStore.java       |   5 +
 .../hadoop/fs/s3a/s3guard/S3GuardTool.java      | 636 +++++++++++++++++++
 .../src/main/shellprofile.d/hadoop-s3a.sh       |  37 ++
 .../org/apache/hadoop/fs/s3a/S3ATestUtils.java  |   9 +
 .../hadoop/fs/s3a/s3guard/TestS3GuardTool.java  | 300 +++++++++
 11 files changed, 1085 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
----------------------------------------------------------------------
diff --git a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
index bc9548b..0a4367d 100644
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
@@ -174,6 +174,19 @@
       <directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
       <outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
     </fileSet>
+    <fileSet>
+      <directory>../hadoop-aws/src/main/bin</directory>
+      <outputDirectory>/bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>../hadoop-aws/src/main/shellprofile.d</directory>
+      <includes>
+        <include>*</include>
+      </includes>
+      <outputDirectory>/libexec/shellprofile.d</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
   </fileSets>
   <dependencySets>
     <dependencySet>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml
index 35163fa..fa86b4c 100644
--- a/hadoop-tools/hadoop-aws/pom.xml
+++ b/hadoop-tools/hadoop-aws/pom.xml
@@ -324,6 +324,31 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${project.build.directory}/lib</outputDirectory>
+            </configuration>
+          </execution>
+          <execution>
+            <id>deplist</id>
+            <phase>compile</phase>
+            <goals>
+              <goal>list</goal>
+            </goals>
+            <configuration>
+              <!-- referenced by a built-in command -->
+              <outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt</outputFile>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
index 345deca..2154ea6 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java
@@ -435,7 +435,8 @@ public class S3AFileSystem extends FileSystem {
    * @param path input path, may be relative to the working dir
    * @return a key excluding the leading "/", or, if it is the root path, ""
    */
-  private String pathToKey(Path path) {
+  @VisibleForTesting
+  public String pathToKey(Path path) {
     if (!path.isAbsolute()) {
       path = new Path(workingDir, path);
     }
@@ -486,7 +487,7 @@ public class S3AFileSystem extends FileSystem {
    * @param path path to qualify
    * @return a qualified path.
    */
-  Path qualify(Path path) {
+  public Path qualify(Path path) {
     return path.makeQualified(uri, workingDir);
   }
 

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
index 6ff0ee1..f5da22b 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java
@@ -163,6 +163,45 @@ public class DynamoDBMetadataStore implements MetadataStore {
   private URI s3Uri;
   private String username;
 
+  /**
+   * A utility function to create DynamoDB instance.
+   * @param fs S3A file system.
+   * @return DynamoDB instance.
+   */
+  @VisibleForTesting
+  static DynamoDB createDynamoDB(S3AFileSystem fs) throws IOException {
+    Preconditions.checkNotNull(fs);
+    String region;
+    try {
+      region = fs.getAmazonS3Client().getBucketLocation(fs.getBucket());
+    } catch (AmazonClientException e) {
+      throw translateException("Determining bucket location",
+          fs.getUri().toString(), e);
+    }
+    return createDynamoDB(fs, region);
+  }
+
+  /**
+   * A utility function to create DynamoDB instance.
+   * @param fs S3A file system.
+   * @param region region of the S3A file system.
+   * @return DynamoDB instance.
+   */
+  private static DynamoDB createDynamoDB(S3AFileSystem fs, String region)
+      throws IOException {
+    Preconditions.checkNotNull(fs);
+    Preconditions.checkNotNull(region);
+    final Configuration conf = fs.getConf();
+    Class<? extends DynamoDBClientFactory> cls = conf.getClass(
+        S3GUARD_DDB_CLIENT_FACTORY_IMPL,
+        S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT,
+        DynamoDBClientFactory.class);
+    LOG.debug("Creating dynamo DB client {}", cls);
+    AmazonDynamoDBClient dynamoDBClient = ReflectionUtils.newInstance(cls, conf)
+        .createDynamoDBClient(fs.getUri(), region);
+    return new DynamoDB(dynamoDBClient);
+  }
+
   @Override
   public void initialize(FileSystem fs) throws IOException {
     Preconditions.checkArgument(fs instanceof S3AFileSystem,
@@ -208,7 +247,8 @@ public class DynamoDBMetadataStore implements MetadataStore {
    * @see #initialize(FileSystem)
    * @throws IOException if there is an error
    */
-  void initialize(Configuration config) throws IOException {
+  @Override
+  public void initialize(Configuration config) throws IOException {
     conf = config;
     // use the bucket as the DynamoDB table name if not specified in config
     tableName = conf.getTrimmed(S3GUARD_DDB_TABLE_NAME_KEY);
@@ -472,7 +512,7 @@ public class DynamoDBMetadataStore implements MetadataStore {
   @Override
   public void destroy() throws IOException {
     if (table == null) {
-      LOG.debug("In destroy(): no table to delete");
+      LOG.info("In destroy(): no table to delete");
       return;
     }
     LOG.info("Deleting DynamoDB table {} in region {}", tableName, region);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
index bf1fdd7..3f108de 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/LocalMetadataStore.java
@@ -85,7 +85,11 @@ public class LocalMetadataStore implements MetadataStore {
       uriHost = null;
     }
 
-    Configuration conf = fs.getConf();
+    initialize(fs.getConf());
+  }
+
+  @Override
+  public void initialize(Configuration conf) throws IOException {
     Preconditions.checkNotNull(conf);
     int maxRecords = conf.getInt(CONF_MAX_RECORDS, DEFAULT_MAX_RECORDS);
     if (maxRecords < 4) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
index 5c611c2..59b39a5 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/MetadataStore.java
@@ -24,6 +24,7 @@ import java.util.Collection;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -45,6 +46,15 @@ public interface MetadataStore extends Closeable {
   void initialize(FileSystem fs) throws IOException;
 
   /**
+   * Performs one-time initialization of the metadata store via configuration.
+   *
+   * @see #initialize(FileSystem)
+   * @param conf Configuration.
+   * @throws IOException if there is an error
+   */
+  void initialize(Configuration conf) throws IOException;
+
+  /**
    * Deletes exactly one path.
    *
    * @param path the path to delete

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
index 8041870..7f55707 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/NullMetadataStore.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.fs.s3a.s3guard;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
@@ -37,6 +38,10 @@ public class NullMetadataStore implements MetadataStore {
   }
 
   @Override
+  public void initialize(Configuration conf) throws IOException {
+  }
+
+  @Override
   public void close() throws IOException {
     return;
   }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
new file mode 100644
index 0000000..77a1a23
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java
@@ -0,0 +1,636 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.shell.CommandFormat;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.apache.hadoop.fs.s3a.Constants.*;
+
+/**
+ * Manage S3Guard Metadata Store.
+ */
+public abstract class S3GuardTool extends Configured implements Tool {
+  private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
+
+  private static final String NAME = "s3a";
+
+  // Exit codes
+  static final int SUCCESS = 0;
+  static final int INVALID_ARGUMENT = -1;
+  static final int ERROR = -99;
+
+  protected S3AFileSystem s3a;
+  protected MetadataStore ms;
+  protected CommandFormat commandFormat;
+
+  /**
+   * Constructor a S3Guard tool with HDFS configuration.
+   * @param conf Configuration.
+   */
+  public S3GuardTool(Configuration conf) {
+    super(conf);
+
+    commandFormat = new CommandFormat(0, Integer.MAX_VALUE, "h");
+    // For metadata store URI
+    commandFormat.addOptionWithValue("m");
+    // DDB endpoint.
+    commandFormat.addOptionWithValue("e");
+  }
+
+  /**
+   * Return sub-command name.
+   */
+  abstract String getName();
+
+  @VisibleForTesting
+  public MetadataStore getMetadataStore() {
+    return ms;
+  }
+
+  /**
+   * Parse dynamodb Endpoint from either -m option or a S3 path.
+   *
+   * This function should only be called from {@link InitMetadata} or
+   * {@link DestroyMetadata}.
+   *
+   * @param paths remaining parameters from CLI.
+   * @return false for invalid parameters.
+   * @throws IOException on I/O errors.
+   */
+  boolean parseDynamoDBEndPoint(List<String> paths) throws IOException {
+    // Validate parameters.
+    boolean hasMsURI = commandFormat.getOptValue("m") != null &&
+        !commandFormat.getOptValue("m").isEmpty();
+    boolean hasS3Path = !paths.isEmpty();
+    if (hasMsURI && hasS3Path) {
+      System.out.println("Must specify either -m URI or s3a://bucket");
+      return false;
+    } else if (hasMsURI) {
+      Configuration conf = getConf();
+      String endpoint = conf.get(S3GUARD_DDB_ENDPOINT_KEY);
+      String param = commandFormat.getOptValue("e");  // endpoint param
+      if ((endpoint == null || endpoint.isEmpty())
+          && (param == null || param.isEmpty())) {
+        System.out.printf("Must specify -e ENDPOINT or %s in conf.%n",
+            S3GUARD_DDB_ENDPOINT_KEY);
+        return false;
+      }
+
+      if (param != null && !param.isEmpty()) {
+        conf.set(S3GUARD_DDB_ENDPOINT_KEY, param);
+      }
+    } else if (hasS3Path) {
+      // This CLI has a valid S3 path, so it uses S3AFileSystem instance
+      // to configure metadata store later.
+      String s3Path = paths.get(0);
+      initS3AFileSystem(s3Path);
+    } else {
+      return false;
+    }
+    return true;
+  }
+
+  /**
+   * Parse metadata store from command line option or HDFS configuration.
+   *
+   * @param create create the metadata store if it does not exist.
+   * @return a initialized metadata store.
+   */
+  MetadataStore initMetadataStore(boolean create) throws IOException {
+    if (ms != null) {
+      return ms;
+    }
+    String metaURI = commandFormat.getOptValue("m");
+    if (metaURI != null && !metaURI.isEmpty()) {
+      URI uri = URI.create(metaURI);
+      LOG.info("create metadata store: {}", uri + " scheme: "
+          + uri.getScheme());
+      switch (uri.getScheme().toLowerCase()) {
+      case "local":
+        ms = new LocalMetadataStore();
+        break;
+      case "dynamodb":
+        ms = new DynamoDBMetadataStore();
+        getConf().set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority());
+        getConf().setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, create);
+        break;
+      default:
+        throw new IOException(
+            String.format("Metadata store %s is not supported", uri));
+      }
+    } else {
+      // CLI does not specify metadata store URI, it uses default metadata store
+      // DynamoDB instead.
+      ms = new DynamoDBMetadataStore();
+      getConf().setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, create);
+    }
+
+    if (s3a == null) {
+      ms.initialize(getConf());
+    } else {
+      ms.initialize(s3a);
+    }
+    LOG.info("Metadata store {} is initialized.", ms);
+    return ms;
+  }
+
+  /**
+   * Initialize S3A FileSystem instance.
+   *
+   * @param path s3a URI
+   * @throws IOException
+   */
+  void initS3AFileSystem(String path) throws IOException {
+    URI uri;
+    try {
+      uri = new URI(path);
+    } catch (URISyntaxException e) {
+      throw new IOException(e);
+    }
+    // Make sure that S3AFileSystem does not hold an actual MetadataStore
+    // implementation.
+    Configuration conf = getConf();
+    conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
+        MetadataStore.class);
+    FileSystem fs = FileSystem.get(uri, getConf());
+    if (!(fs instanceof S3AFileSystem)) {
+      throw new IOException(
+          String.format("URI %s is not a S3A file system: %s", uri,
+              fs.getClass().getName()));
+    }
+    s3a = (S3AFileSystem) fs;
+  }
+
+  /**
+   * Parse CLI arguments and returns the position arguments. The options are
+   * stored in {@link #commandFormat}
+   *
+   * @param args command line arguments.
+   * @return the position arguments from CLI.
+   */
+  List<String> parseArgs(String[] args) {
+    return commandFormat.parse(args, 1);
+  }
+
+  /**
+   * Create the metadata store.
+   */
+  static class InitMetadata extends S3GuardTool {
+    private static final String NAME = "init";
+    private static final String USAGE = NAME +
+        " [-r UNIT] [-w UNIT] [-e ENDPOINT -m URI|s3a://bucket]";
+
+    InitMetadata(Configuration conf) {
+      super(conf);
+      // read capacity.
+      commandFormat.addOptionWithValue("r");
+      // write capacity.
+      commandFormat.addOptionWithValue("w");
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    @Override
+    public int run(String[] args) throws IOException {
+      List<String> paths = parseArgs(args);
+
+      String readCap = commandFormat.getOptValue("r");
+      if (readCap != null && !readCap.isEmpty()) {
+        int readCapacity = Integer.parseInt(readCap);
+        getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
+      }
+      String writeCap = commandFormat.getOptValue("w");
+      if (writeCap != null && !writeCap.isEmpty()) {
+        int writeCapacity = Integer.parseInt(writeCap);
+        getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
+      }
+
+      // Validate parameters.
+      if (!parseDynamoDBEndPoint(paths)) {
+        return INVALID_ARGUMENT;
+      }
+      initMetadataStore(true);
+      return SUCCESS;
+    }
+  }
+
+  /**
+   * Destroy a metadata store.
+   */
+  static class DestroyMetadata extends S3GuardTool {
+    private static final String NAME = "destroy";
+    private static final String USAGE =
+        NAME + " [-e ENDPOINT -m URI|s3a://bucket]";
+
+    DestroyMetadata(Configuration conf) {
+      super(conf);
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    public int run(String[] args) throws IOException {
+      List<String> paths = parseArgs(args);
+      if (!parseDynamoDBEndPoint(paths)) {
+        System.out.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+
+      initMetadataStore(false);
+      Preconditions.checkState(ms != null, "Metadata store is not initialized");
+
+      ms.destroy();
+      LOG.info("Metadata store is deleted.");
+      return SUCCESS;
+    }
+  }
+
+  /**
+   * Import s3 metadata to the metadata store.
+   */
+  static class Import extends S3GuardTool {
+    private static final String NAME = "import";
+    private static final String USAGE = NAME +
+        " [-m URI] s3a://bucket/path/";
+
+    private final Set<Path> dirCache = new HashSet<>();
+
+    Import(Configuration conf) {
+      super(conf);
+    }
+
+    // temporary: for metadata store.
+    @VisibleForTesting
+    void setMetadataStore(MetadataStore ms) {
+      this.ms = ms;
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    private String getUsage() {
+      return USAGE;
+    }
+
+    /**
+     * Put parents into MS and cache if the parents are not presented.
+     *
+     * @param f the file or an empty directory.
+     * @throws IOException on I/O errors.
+     */
+    private void putParentsIfNotPresent(S3AFileStatus f) throws IOException {
+      Preconditions.checkNotNull(f);
+      Path parent = f.getPath().getParent();
+      while (parent != null) {
+        if (dirCache.contains(parent)) {
+          return;
+        }
+        S3AFileStatus dir = new S3AFileStatus(false, parent, f.getOwner());
+        ms.put(new PathMetadata(dir));
+        dirCache.add(parent);
+        parent = parent.getParent();
+      }
+    }
+
+    /**
+     * Recursively import every path under path
+     */
+    private void importDir(S3AFileStatus status) throws IOException {
+      Preconditions.checkArgument(status.isDirectory());
+      RemoteIterator<LocatedFileStatus> it =
+          s3a.listFiles(status.getPath(), true);
+
+      while (it.hasNext()) {
+        LocatedFileStatus located = it.next();
+        S3AFileStatus child;
+        if (located.isDirectory()) {
+          // Note that {@link S3AFileStatus#isEmptyDirectory} is erased in
+          // {@link LocatedFileStatus}, the metadata store impl can choose
+          // how to set isEmptyDir when we import the subfiles after creating
+          // the directory in the metadata store.
+          final boolean isEmptyDir = true;
+          child = new S3AFileStatus(isEmptyDir, located.getPath(),
+              located.getOwner());
+          dirCache.add(child.getPath());
+        } else {
+          child = new S3AFileStatus(located.getLen(),
+              located.getModificationTime(),
+              located.getPath(),
+              located.getBlockSize(),
+              located.getOwner());
+        }
+        putParentsIfNotPresent(child);
+        ms.put(new PathMetadata(child));
+      }
+    }
+
+    @Override
+    public int run(String[] args) throws IOException {
+      List<String> paths = parseArgs(args);
+      if (paths.isEmpty()) {
+        System.out.println(getUsage());
+        return INVALID_ARGUMENT;
+      }
+      String s3Path = paths.get(0);
+      initS3AFileSystem(s3Path);
+
+      URI uri;
+      try {
+        uri = new URI(s3Path);
+      } catch (URISyntaxException e) {
+        throw new IOException(e);
+      }
+      String filePath = uri.getPath();
+      Path path = new Path(filePath);
+      S3AFileStatus status = s3a.getFileStatus(path);
+      if (status.isFile()) {
+        PathMetadata meta = new PathMetadata(status);
+        ms.put(meta);
+      } else {
+        importDir(status);
+      }
+
+      return SUCCESS;
+    }
+  }
+
+  /**
+   * Show diffs between the s3 and metadata store.
+   */
+  static class Diff extends S3GuardTool {
+    private static final String NAME = "diff";
+    private static final String USAGE = NAME +
+        " [-m URI] s3a://bucket/path/";
+    private static final String SEP = "\t";
+    static final String S3_PREFIX = "S3";
+    static final String MS_PREFIX = "MS";
+
+    Diff(Configuration conf) {
+      super(conf);
+    }
+
+    @VisibleForTesting
+    void setMetadataStore(MetadataStore ms) {
+      Preconditions.checkNotNull(ms);
+      this.ms = ms;
+    }
+
+    @Override
+    String getName() {
+      return NAME;
+    }
+
+    /**
+     * Formats the output of printing a FileStatus in S3guard diff tool.
+     * @param status the status to print.
+     * @return the string of output.
+     */
+    private static String formatFileStatus(S3AFileStatus status) {
+      return String.format("%s%s%s",
+          status.isDirectory() ? "D" : "F",
+          SEP,
+          status.getPath().toString());
+    }
+
+    /**
+     * Print difference, if any, between two file statuses to the output stream.
+     *
+     * @param statusFromMS file status from metadata store.
+     * @param statusFromS3 file status from S3.
+     * @param out output stream.
+     */
+    private static void printDiff(S3AFileStatus statusFromMS,
+                                  S3AFileStatus statusFromS3,
+                                  PrintStream out) {
+      Preconditions.checkArgument(
+          !(statusFromMS == null && statusFromS3 == null));
+      if (statusFromMS == null) {
+        out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(statusFromS3));
+      } else if (statusFromS3 == null) {
+        out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(statusFromMS));
+      }
+      // TODO: Do we need to compare the internal fields of two FileStatuses?
+    }
+
+    /**
+     * Compare the metadata of the directory with the same path, on S3 and
+     * the metadata store, respectively. If one of them is null, consider the
+     * metadata of the directory and all its subdirectories are missing from
+     * the source.
+     *
+     * Pass the FileStatus obtained from s3 and metadata store to avoid one
+     * round trip to fetch the same metadata twice, because the FileStatus
+     * hve already been obtained from listStatus() / listChildren operations.
+     *
+     * @param msDir the directory FileStatus obtained from the metadata store.
+     * @param s3Dir the directory FileStatus obtained from S3.
+     * @param out the output stream to generate diff results.
+     * @throws IOException
+     */
+    private void compareDir(S3AFileStatus msDir, S3AFileStatus s3Dir,
+                            PrintStream out) throws IOException {
+      if (msDir == null && s3Dir == null) {
+        return;
+      }
+      if (msDir != null && s3Dir != null) {
+        Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()),
+            String.format("The path from metadata store and s3 are different:" +
+                " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath()));
+      }
+
+      printDiff(msDir, s3Dir, out);
+      Map<Path, S3AFileStatus> s3Children = new HashMap<>();
+      if (s3Dir != null && s3Dir.isDirectory()) {
+        for (FileStatus status : s3a.listStatus(s3Dir.getPath())) {
+          Preconditions.checkState(status instanceof S3AFileStatus);
+          s3Children.put(status.getPath(), (S3AFileStatus) status);
+        }
+      }
+
+      Map<Path, S3AFileStatus> msChildren = new HashMap<>();
+      if (msDir != null && msDir.isDirectory()) {
+        DirListingMetadata dirMeta =
+            ms.listChildren(msDir.getPath());
+
+        if (dirMeta != null) {
+          for (PathMetadata meta : dirMeta.getListing()) {
+            S3AFileStatus status = (S3AFileStatus) meta.getFileStatus();
+            msChildren.put(status.getPath(), status);
+          }
+        }
+      }
+
+      Set<Path> allPaths = new HashSet<>(s3Children.keySet());
+      allPaths.addAll(msChildren.keySet());
+
+      for (Path path : allPaths) {
+        S3AFileStatus s3status = s3Children.get(path);
+        S3AFileStatus msStatus = msChildren.get(path);
+        printDiff(msStatus, s3status, out);
+        if ((s3status != null && s3status.isDirectory()) ||
+            (msStatus != null && msStatus.isDirectory())) {
+          compareDir(msStatus, s3status, out);
+        }
+      }
+      out.flush();
+    }
+
+    /**
+     * Compare both metadata store and S3 on the same path.
+     *
+     * @param path the path to be compared.
+     * @param out  the output stream to display results.
+     * @throws IOException
+     */
+    private void compare(Path path, PrintStream out) throws IOException {
+      Path qualified = s3a.qualify(path);
+      S3AFileStatus s3Status = null;
+      try {
+        s3Status = s3a.getFileStatus(qualified);
+      } catch (FileNotFoundException e) {
+      }
+      PathMetadata meta = ms.get(qualified);
+      S3AFileStatus msStatus = meta != null ?
+          (S3AFileStatus) meta.getFileStatus() : null;
+      compareDir(msStatus, s3Status, out);
+    }
+
+    @VisibleForTesting
+    public int run(String[] args, PrintStream out) throws IOException {
+      List<String> paths = parseArgs(args);
+      if (paths.isEmpty()) {
+        out.println(USAGE);
+        return INVALID_ARGUMENT;
+      }
+      String s3Path = paths.get(0);
+      initS3AFileSystem(s3Path);
+      initMetadataStore(true);
+
+      URI uri;
+      try {
+        uri = new URI(s3Path);
+      } catch (URISyntaxException e) {
+        throw new IOException(e);
+      }
+      Path root;
+      if (uri.getPath().isEmpty()) {
+        root = new Path("/");
+      } else {
+        root = new Path(uri.getPath());
+      }
+      root = s3a.qualify(root);
+      compare(root, out);
+      out.flush();
+      return SUCCESS;
+    }
+
+    @Override
+    public int run(String[] args) throws IOException {
+      return run(args, System.out);
+    }
+  }
+
+  private static void printHelp() {
+    System.out.println("Usage: hadoop " + NAME + " [" +
+        InitMetadata.NAME + "|" + DestroyMetadata.NAME +
+        "|" + Import.NAME + "|" + Diff.NAME +
+        "] [OPTIONS] [ARGUMENTS]");
+
+    System.out.println("\tperform metadata store " +
+        "administrative commands for s3a filesystem.");
+  }
+
+  /**
+   * Execute the command with the given arguments.
+   *
+   * @param args command specific arguments.
+   * @param conf Hadoop configuration.
+   * @return exit code.
+   * @throws Exception on I/O errors.
+   */
+  public static int run(String[] args, Configuration conf) throws Exception {
+    if (args.length == 0) {
+      printHelp();
+      return INVALID_ARGUMENT;
+    }
+    final String subCommand = args[0];
+    S3GuardTool cmd;
+    switch (subCommand) {
+    case InitMetadata.NAME:
+      cmd = new InitMetadata(conf);
+      break;
+    case DestroyMetadata.NAME:
+      cmd = new DestroyMetadata(conf);
+      break;
+    case Import.NAME:
+      cmd = new Import(conf);
+      break;
+    case Diff.NAME:
+      cmd = new Diff(conf);
+      break;
+    default:
+      printHelp();
+      return INVALID_ARGUMENT;
+    }
+    return ToolRunner.run(conf, cmd, args);
+  }
+
+  public static void main(String[] args) throws Exception {
+    try {
+      int ret = run(args, new Configuration());
+      System.exit(ret);
+    } catch (Exception e) {
+      System.err.println(e.getMessage());
+      System.exit(ERROR);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3a.sh
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3a.sh b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3a.sh
new file mode 100644
index 0000000..008652a
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3a.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if ! declare -f hadoop_subcommand_s3a >/dev/null 2>/dev/null; then
+
+  if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
+    hadoop_add_subcommand "s3a" "manage data on S3"
+  fi
+
+  # this can't be indented otherwise shelldocs won't get it
+
+## @description  s3a command for hadoop
+## @audience     public
+## @stability    stable
+## @replaceable  yes
+function hadoop_subcommand_s3a
+{
+  # shellcheck disable=SC2034
+  HADOOP_CLASSNAME=org.apache.hadoop.fs.s3a.s3guard.S3GuardTool
+  hadoop_add_to_classpath_tools hadoop-aws
+}
+
+fi

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
index 845f2e9..b2ffedb 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java
@@ -53,6 +53,15 @@ public final class S3ATestUtils {
   public static final String UNSET_PROPERTY = "unset";
 
   /**
+   * Get S3A FS name.
+   * @param conf configuration.
+   * @return S3A fs name.
+   */
+  public static String getFsName(Configuration conf) {
+    return conf.getTrimmed(TEST_FS_S3A_NAME, "");
+  }
+
+  /**
    * Create the test filesystem.
    *
    * If the test.fs.s3a.name property is not set, this will

http://git-wip-us.apache.org/repos/asf/hadoop/blob/f10114c1/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
----------------------------------------------------------------------
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
new file mode 100644
index 0000000..a300205
--- /dev/null
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java
@@ -0,0 +1,300 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.s3a.s3guard;
+
+import com.amazonaws.services.dynamodbv2.document.DynamoDB;
+import com.amazonaws.services.dynamodbv2.document.Table;
+import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
+import com.amazonaws.services.dynamodbv2.model.CreateTableRequest;
+import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
+import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput;
+import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.contract.ContractTestUtils;
+import org.apache.hadoop.fs.s3a.S3AFileStatus;
+import org.apache.hadoop.fs.s3a.S3AFileSystem;
+import org.apache.hadoop.fs.s3a.S3ATestUtils;
+import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.DestroyMetadata;
+import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Diff;
+import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.InitMetadata;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+/**
+ * Test S3Guard related CLI commands.
+ */
+public class TestS3GuardTool {
+
+  private static final String OWNER = "hdfs";
+
+  private Configuration conf;
+  private MetadataStore ms;
+  private S3AFileSystem fs;
+
+  /** Get test path of s3. */
+  private String getTestPath(String path) {
+    return fs.qualify(new Path(path)).toString();
+  }
+
+  @Before
+  public void setUp() throws Exception {
+    conf = new Configuration();
+    fs = S3ATestUtils.createTestFileSystem(conf);
+
+    ms = new LocalMetadataStore();
+    ms.initialize(fs);
+  }
+
+  @After
+  public void tearDown() {
+  }
+
+  // Check the existence of a given DynamoDB table.
+  private static boolean exist(DynamoDB dynamoDB, String tableName) {
+    assertNotNull(dynamoDB);
+    assertNotNull(tableName);
+    assertFalse("empty table name", tableName.isEmpty());
+    try {
+      Table table = dynamoDB.getTable(tableName);
+      table.describe();
+    } catch (ResourceNotFoundException e) {
+      return false;
+    }
+    return true;
+  }
+
+  @Test
+  public void testInitDynamoDBMetadataStore() throws IOException {
+    final String testTableName = "s3guard_test_init_ddb_table";
+    InitMetadata cmd = new InitMetadata(fs.getConf());
+    Table table = null;
+
+    try {
+      String[] args = new String[]{
+          "init", "-m", "dynamodb://" + testTableName,
+      };
+      assertEquals(SUCCESS, cmd.run(args));
+      // Verify the existence of the dynamodb table.
+      try {
+        assertTrue("metadata store should be DynamoDBMetadataStore",
+            ms instanceof DynamoDBMetadataStore);
+        DynamoDBMetadataStore dynamoMs = (DynamoDBMetadataStore) ms;
+        DynamoDB db = dynamoMs.getDynamoDB();
+        table = db.getTable(testTableName);
+        table.describe();
+      } catch (ResourceNotFoundException e) {
+        fail(String.format("DynamoDB table %s does not exist",
+            testTableName));
+      }
+    } finally {
+      // Clean the table.
+      try {
+        if (table != null) {
+          table.delete();
+        }
+      } catch (ResourceNotFoundException e) {
+        // Ignore
+      }
+    }
+  }
+
+  @Test
+  public void testDestroyDynamoDBMetadataStore()
+      throws IOException, InterruptedException {
+    final String testTableName = "s3guard_test_destroy_ddb_table";
+    DestroyMetadata cmd = new DestroyMetadata(fs.getConf());
+
+    // Pre-alloc DynamoDB table.
+    DynamoDB db = DynamoDBMetadataStore.createDynamoDB(fs);
+    try {
+      Table table = db.getTable(testTableName);
+      table.delete();
+      table.waitForDelete();
+    } catch (ResourceNotFoundException e) {
+      // Ignore.
+    }
+    Collection<KeySchemaElement> elements =
+        PathMetadataDynamoDBTranslation.keySchema();
+    Collection<AttributeDefinition> attrs =
+        PathMetadataDynamoDBTranslation.attributeDefinitions();
+    ProvisionedThroughput pt = new ProvisionedThroughput(100L, 200L);
+    Table table = db.createTable(new CreateTableRequest()
+        .withAttributeDefinitions(attrs)
+        .withKeySchema(elements)
+        .withTableName(testTableName)
+        .withProvisionedThroughput(pt));
+    table.waitForActive();
+    assertTrue(exist(db, testTableName));
+
+    String[] args = new String[]{
+        "destroy", "-m", "dynamodb://" + testTableName,
+    };
+    assertEquals(SUCCESS, cmd.run(args));
+    assertFalse(String.format("%s still exists", testTableName),
+        exist(db, testTableName));
+  }
+
+  @Test
+  public void testImportCommand() throws IOException {
+    fs.mkdirs(new Path("/test"));
+    Path dir = new Path("/test/a");
+    fs.mkdirs(dir);
+    for (int i = 0; i < 10; i++) {
+      String child = String.format("file-%d", i);
+      try (FSDataOutputStream out = fs.create(new Path(dir, child))) {
+        out.write(1);
+      }
+    }
+
+    S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
+    cmd.setMetadataStore(ms);
+
+    assertEquals(0, cmd.run(new String[]{"import", getTestPath("/test/a")}));
+
+    DirListingMetadata children =
+        ms.listChildren(new Path(getTestPath("/test/a")));
+    assertEquals(10, children.getListing().size());
+    // assertTrue(children.isAuthoritative());
+  }
+
+  private void mkdirs(Path path, boolean onS3, boolean onMetadataStore)
+      throws IOException {
+    if (onS3) {
+      fs.mkdirs(path);
+    }
+    if (onMetadataStore) {
+      S3AFileStatus status = new S3AFileStatus(true, path, OWNER);
+      ms.put(new PathMetadata(status));
+    }
+  }
+
+  private static void putFile(MetadataStore ms, S3AFileStatus f)
+      throws IOException {
+    assertNotNull(f);
+    ms.put(new PathMetadata(f));
+    Path parent = f.getPath().getParent();
+    while (parent != null) {
+      S3AFileStatus dir = new S3AFileStatus(false, parent, f.getOwner());
+      ms.put(new PathMetadata(dir));
+      parent = parent.getParent();
+    }
+  }
+
+  /**
+   * Create file either on S3 or in metadata store.
+   * @param path the file path.
+   * @param onS3 set to true to create the file on S3.
+   * @param onMetadataStore set to true to create the file on the
+   *                        metadata store.
+   * @throws IOException
+   */
+  private void createFile(Path path, boolean onS3, boolean onMetadataStore)
+      throws IOException {
+    if (onS3) {
+      ContractTestUtils.touch(fs, path);
+    }
+
+    if (onMetadataStore) {
+      S3AFileStatus status = new S3AFileStatus(100L, 10000L,
+          fs.qualify(path), 512L, "hdfs");
+      putFile(ms, status);
+    }
+  }
+
+  @Test
+  public void testDiffCommand() throws IOException {
+    Set<Path> filesOnS3 = new HashSet<>();  // files on S3.
+    Set<Path> filesOnMS = new HashSet<>();  // files on metadata store.
+
+    String testPath = getTestPath("/test-diff");
+    mkdirs(new Path(testPath), true, true);
+
+    Path msOnlyPath = new Path(testPath, "ms_only");
+    mkdirs(msOnlyPath, false, true);
+    filesOnMS.add(msOnlyPath);
+    for (int i = 0; i < 5; i++) {
+      Path file = new Path(msOnlyPath, String.format("file-%d", i));
+      createFile(file, false, true);
+      filesOnMS.add(file);
+    }
+
+    Path s3OnlyPath = new Path(testPath, "s3_only");
+    mkdirs(s3OnlyPath, true, false);
+    filesOnS3.add(s3OnlyPath);
+    for (int i = 0; i < 5; i++) {
+      Path file = new Path(s3OnlyPath, String.format("file-%d", i));
+      createFile(file, true, false);
+      filesOnS3.add(file);
+    }
+
+    ByteArrayOutputStream buf = new ByteArrayOutputStream();
+    PrintStream out = new PrintStream(buf);
+    Diff cmd = new Diff(fs.getConf());
+    cmd.setMetadataStore(ms);
+    assertEquals(SUCCESS,
+        cmd.run(new String[]{"diff", "-m", "local://metadata", testPath}, out));
+
+    Set<Path> actualOnS3 = new HashSet<>();
+    Set<Path> actualOnMS = new HashSet<>();
+    try (ByteArrayInputStream in =
+             new ByteArrayInputStream(buf.toByteArray())) {
+      try (BufferedReader reader =
+               new BufferedReader(new InputStreamReader(in))) {
+        String line;
+        while ((line = reader.readLine()) != null) {
+          String[] fields = line.split("\\s");
+          assertEquals("[" + line + "] does not have enough fields",
+              3, fields.length);
+          String where = fields[0];
+          if (Diff.S3_PREFIX.equals(where)) {
+            actualOnS3.add(new Path(fields[2]));
+          } else if (Diff.MS_PREFIX.equals(where)) {
+            actualOnMS.add(new Path(fields[2]));
+          } else {
+            fail("Unknown prefix: " + where);
+          }
+        }
+      }
+    }
+    String actualOut = out.toString();
+    assertEquals("Mismatched metadata store outputs: " + actualOut,
+        filesOnMS, actualOnMS);
+    assertEquals("Mismatched s3 outputs: " + actualOut, filesOnS3, actualOnS3);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org


Mime
View raw message