hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From z..@apache.org
Subject [22/34] hadoop git commit: HDFS-6673. Add delimited format support to PB OIV tool. Contributed by Eddy Xu.
Date Thu, 29 Jan 2015 18:06:05 GMT
HDFS-6673. Add delimited format support to PB OIV tool. Contributed by Eddy Xu.


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5227f6f8
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5227f6f8
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5227f6f8

Branch: refs/heads/HDFS-EC
Commit: 5227f6f84a5843e6851b96361f32037db508bd01
Parents: 3a5a90d
Author: Andrew Wang <wang@apache.org>
Authored: Wed Jan 28 12:36:29 2015 -0800
Committer: Zhe Zhang <zhz@apache.org>
Committed: Thu Jan 29 10:05:26 2015 -0800

----------------------------------------------------------------------
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt     |   2 +
 hadoop-hdfs-project/hadoop-hdfs/pom.xml         |   5 +
 .../tools/offlineImageViewer/FSImageLoader.java |   4 +-
 .../OfflineImageViewerPB.java                   |  21 +-
 .../PBImageDelimitedTextWriter.java             | 132 +++++
 .../offlineImageViewer/PBImageTextWriter.java   | 586 +++++++++++++++++++
 .../TestOfflineImageViewer.java                 |  54 +-
 7 files changed, 799 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 973f2f0..4bd2c55 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -309,6 +309,8 @@ Release 2.7.0 - UNRELEASED
 
     HDFS-7056. Snapshot support for truncate. (Plamen Jeliazkov and shv)
 
+    HDFS-6673. Add delimited format support to PB OIV tool. (Eddy Xu via wang)
+
   IMPROVEMENTS
 
     HDFS-7055. Add tracing to DFSInputStream (cmccabe)

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/pom.xml
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index bad1792..d5c1f35 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -198,6 +198,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.fusesource.leveldbjni</groupId>
+      <artifactId>leveldbjni-all</artifactId>
+      <version>1.8</version>
+    </dependency>
     <!-- 'mvn dependency:analyze' fails to detect use of this dependency -->
     <dependency>
       <groupId>org.bouncycastle</groupId>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
index 2f2fa5f..fd29106 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java
@@ -237,7 +237,7 @@ class FSImageLoader {
     return inodes;
   }
 
-  private static String[] loadStringTable(InputStream in) throws
+  static String[] loadStringTable(InputStream in) throws
   IOException {
     FsImageProto.StringTableSection s = FsImageProto.StringTableSection
         .parseDelimitedFrom(in);
@@ -479,7 +479,7 @@ class FSImageLoader {
     }
   }
 
-  private long getFileSize(FsImageProto.INodeSection.INodeFile f) {
+  static long getFileSize(FsImageProto.INodeSection.INodeFile f) {
     long size = 0;
     for (HdfsProtos.BlockProto p : f.getBlocksList()) {
       size += p.getNumBytes();

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
index 4fce6a3..6590366 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java
@@ -65,6 +65,10 @@ public class OfflineImageViewerPB {
       + "    -step defines the granularity of the distribution. (2MB by default)\n"
       + "  * Web: Run a viewer to expose read-only WebHDFS API.\n"
       + "    -addr specifies the address to listen. (localhost:5978 by default)\n"
+      + "  * Delimited: Generate a text file with all of the elements common\n"
+      + "    to both inodes and inodes-under-construction, separated by a\n"
+      + "    delimiter. The default delimiter is \\t, though this may be\n"
+      + "    changed via the -delimiter argument.\n"
       + "\n"
       + "Required command line arguments:\n"
       + "-i,--inputFile <arg>   FSImage file to process.\n"
@@ -74,8 +78,12 @@ public class OfflineImageViewerPB {
       + "                       file exists, it will be overwritten.\n"
       + "                       (output to stdout by default)\n"
       + "-p,--processor <arg>   Select which type of processor to apply\n"
-      + "                       against image file. (XML|FileDistribution|Web)\n"
+      + "                       against image file. (XML|FileDistribution|Web|Delimited)\n"
       + "                       (Web by default)\n"
+      + "-delimiter <arg>       Delimiting string to use with Delimited processor\n"
+      + "-t,--temp <arg>        Use temporary dir to cache intermediate result to generate\n"
+      + "                       Delimited outputs. If not set, Delimited processor constructs\n"
+      + "                       the namespace in memory before outputting text."
       + "-h,--help              Display usage information and exit\n";
 
   /**
@@ -97,6 +105,8 @@ public class OfflineImageViewerPB {
     options.addOption("maxSize", true, "");
     options.addOption("step", true, "");
     options.addOption("addr", true, "");
+    options.addOption("delimiter", true, "");
+    options.addOption("t", "temp", true, "");
 
     return options;
   }
@@ -141,6 +151,9 @@ public class OfflineImageViewerPB {
     String inputFile = cmd.getOptionValue("i");
     String processor = cmd.getOptionValue("p", "Web");
     String outputFile = cmd.getOptionValue("o", "-");
+    String delimiter = cmd.getOptionValue("delimiter",
+        PBImageDelimitedTextWriter.DEFAULT_DELIMITER);
+    String tempPath = cmd.getOptionValue("t", "");
 
     Configuration conf = new Configuration();
     try (PrintStream out = outputFile.equals("-") ?
@@ -163,6 +176,12 @@ public class OfflineImageViewerPB {
             viewer.start(inputFile);
           }
           break;
+        case "Delimited":
+          try (PBImageDelimitedTextWriter writer =
+              new PBImageDelimitedTextWriter(out, delimiter, tempPath)) {
+            writer.visit(new RandomAccessFile(inputFile, "r"));
+          }
+          break;
       }
       return 0;
     } catch (EOFException e) {

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
new file mode 100644
index 0000000..350967d
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+/**
+ * A PBImageDelimitedTextWriter generates a text representation of the PB fsimage,
+ * with each element separated by a delimiter string.  All of the elements
+ * common to both inodes and inodes-under-construction are included. When
+ * processing an fsimage with a layout version that did not include an
+ * element, such as AccessTime, the output file will include a column
+ * for the value, but no value will be included.
+ *
+ * Individual block information for each file is not currently included.
+ *
+ * The default delimiter is tab, as this is an unlikely value to be included in
+ * an inode path or other text metadata. The delimiter value can be via the
+ * constructor.
+ */
+public class PBImageDelimitedTextWriter extends PBImageTextWriter {
+  static final String DEFAULT_DELIMITER = "\t";
+  private static final String DATE_FORMAT="yyyy-MM-dd HH:mm";
+  private final SimpleDateFormat dateFormatter =
+      new SimpleDateFormat(DATE_FORMAT);
+
+  private final String delimiter;
+
+  PBImageDelimitedTextWriter(PrintStream out, String delimiter, String tempPath)
+      throws IOException {
+    super(out, tempPath);
+    this.delimiter = delimiter;
+  }
+
+  private String formatDate(long date) {
+    return dateFormatter.format(new Date(date));
+  }
+
+  private void append(StringBuffer buffer, int field) {
+    buffer.append(delimiter);
+    buffer.append(field);
+  }
+
+  private void append(StringBuffer buffer, long field) {
+    buffer.append(delimiter);
+    buffer.append(field);
+  }
+
+  private void append(StringBuffer buffer, String field) {
+    buffer.append(delimiter);
+    buffer.append(field);
+  }
+
+  @Override
+  public String getEntry(String parent, INode inode) {
+    StringBuffer buffer = new StringBuffer();
+    String path = new File(parent, inode.getName().toStringUtf8()).toString();
+    buffer.append(path);
+    PermissionStatus p = null;
+
+    switch (inode.getType()) {
+    case FILE:
+      INodeFile file = inode.getFile();
+      p = getPermission(file.getPermission());
+      append(buffer, file.getReplication());
+      append(buffer, formatDate(file.getModificationTime()));
+      append(buffer, formatDate(file.getAccessTime()));
+      append(buffer, file.getPreferredBlockSize());
+      append(buffer, file.getBlocksCount());
+      append(buffer, FSImageLoader.getFileSize(file));
+      append(buffer, 0);  // NS_QUOTA
+      append(buffer, 0);  // DS_QUOTA
+      break;
+    case DIRECTORY:
+      INodeDirectory dir = inode.getDirectory();
+      p = getPermission(dir.getPermission());
+      append(buffer, 0);  // Replication
+      append(buffer, formatDate(dir.getModificationTime()));
+      append(buffer, formatDate(0));  // Access time.
+      append(buffer, 0);  // Block size.
+      append(buffer, 0);  // Num blocks.
+      append(buffer, 0);  // Num bytes.
+      append(buffer, dir.getNsQuota());
+      append(buffer, dir.getDsQuota());
+      break;
+    case SYMLINK:
+      INodeSymlink s = inode.getSymlink();
+      p = getPermission(s.getPermission());
+      append(buffer, 0);  // Replication
+      append(buffer, formatDate(s.getModificationTime()));
+      append(buffer, formatDate(s.getAccessTime()));
+      append(buffer, 0);  // Block size.
+      append(buffer, 0);  // Num blocks.
+      append(buffer, 0);  // Num bytes.
+      append(buffer, 0);  // NS_QUOTA
+      append(buffer, 0);  // DS_QUOTA
+      break;
+    default:
+      break;
+    }
+    assert p != null;
+    append(buffer, p.getPermission().toString());
+    append(buffer, p.getUserName());
+    append(buffer, p.getGroupName());
+    return buffer.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
new file mode 100644
index 0000000..0da263d
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java
@@ -0,0 +1,586 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.io.LimitInputStream;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf;
+import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName;
+import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
+import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode;
+import org.apache.hadoop.hdfs.server.namenode.INodeId;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.Time;
+import org.fusesource.leveldbjni.JniDBFactory;
+import org.iq80.leveldb.DB;
+import org.iq80.leveldb.Options;
+import org.iq80.leveldb.WriteBatch;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedInputStream;
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.io.RandomAccessFile;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This class reads the protobuf-based fsimage and generates text output
+ * for each inode to {@link PBImageTextWriter#out}. The sub-class can override
+ * {@link getEntry()} to generate formatted string for each inode.
+ *
+ * Since protobuf-based fsimage does not guarantee the order of inodes and
+ * directories, PBImageTextWriter runs two-phase scans:
+ *
+ * <ol>
+ *   <li>The first phase, PBImageTextWriter scans the INode sections to reads the
+ *   filename of each directory. It also scans the INode_Dir sections to loads
+ *   the relationships between a directory and its children. It uses these metadata
+ *   to build FS namespace and stored in {@link MetadataMap}</li>
+ *   <li>The second phase, PBImageTextWriter re-scans the INode sections. For each
+ *   inode, it looks up the path of the parent directory in the {@link MetadataMap},
+ *   and generate output.</li>
+ * </ol>
+ *
+ * Two various of {@link MetadataMap} are provided. {@link InMemoryMetadataDB}
+ * stores all metadata in memory (O(n) memory) while
+ * {@link LevelDBMetadataMap} stores metadata in LevelDB on disk (O(1) memory).
+ * User can choose between them based on the time/space tradeoffs.
+ */
+abstract class PBImageTextWriter implements Closeable {
+  private static final Logger LOG =
+      LoggerFactory.getLogger(PBImageTextWriter.class);
+
+  /**
+   * This metadata map is used to construct the namespace before generating
+   * text outputs.
+   *
+   * It contains two mapping relationships:
+   * <p>
+   *   <li>It maps each inode (inode Id) to its parent directory (inode Id).</li>
+   *   <li>It maps each directory from its inode Id.</li>
+   * </p>
+   */
+  private static interface MetadataMap extends Closeable {
+    /**
+     * Associate an inode with its parent directory.
+     */
+    public void putDirChild(long parentId, long childId) throws IOException;
+
+    /**
+     * Associate a directory with its inode Id.
+     */
+    public void putDir(INode dir) throws IOException;
+
+    /** Get the full path of the parent directory for the given inode. */
+    public String getParentPath(long inode) throws IOException;
+
+    /** Synchronize metadata to persistent storage, if possible */
+    public void sync() throws IOException;
+  }
+
+  /**
+   * Maintain all the metadata in memory.
+   */
+  private static class InMemoryMetadataDB implements MetadataMap {
+    /**
+     * Represent a directory in memory.
+     */
+    private static class Dir {
+      private final long inode;
+      private Dir parent = null;
+      private String name;
+      private String path = null;  // cached full path of the directory.
+
+      Dir(long inode, String name) {
+        this.inode = inode;
+        this.name = name;
+      }
+
+      private void setParent(Dir parent) {
+        Preconditions.checkState(this.parent == null);
+        this.parent = parent;
+      }
+
+      /**
+       * Returns the full path of this directory.
+       */
+      private String getPath() {
+        if (this.parent == null) {
+          return "/";
+        }
+        if (this.path == null) {
+          this.path = new File(parent.getPath(), name).toString();
+          this.name = null;
+        }
+        return this.path;
+      }
+
+      @Override
+      public boolean equals(Object o) {
+        return o instanceof Dir && inode == ((Dir) o).inode;
+      }
+
+      @Override
+      public int hashCode() {
+        return Long.valueOf(inode).hashCode();
+      }
+    }
+
+    /** INode Id to Dir object mapping */
+    private Map<Long, Dir> dirMap = new HashMap<>();
+
+    /** Children to parent directory INode ID mapping. */
+    private Map<Long, Dir> dirChildMap = new HashMap<>();
+
+    InMemoryMetadataDB() {
+    }
+
+    @Override
+    public void close() throws IOException {
+    }
+
+    @Override
+    public void putDirChild(long parentId, long childId) {
+      Dir parent = dirMap.get(parentId);
+      Dir child = dirMap.get(childId);
+      if (child != null) {
+        child.setParent(parent);
+      }
+      Preconditions.checkState(!dirChildMap.containsKey(childId));
+      dirChildMap.put(childId, parent);
+    }
+
+    @Override
+    public void putDir(INode p) {
+      Preconditions.checkState(!dirMap.containsKey(p.getId()));
+      Dir dir = new Dir(p.getId(), p.getName().toStringUtf8());
+      dirMap.put(p.getId(), dir);
+    }
+
+    public String getParentPath(long inode) throws IOException {
+      if (inode == INodeId.ROOT_INODE_ID) {
+        return "";
+      }
+      Dir parent = dirChildMap.get(inode);
+      Preconditions.checkState(parent != null,
+          "Can not find parent directory for INode: %s", inode);
+      return parent.getPath();
+    }
+
+    @Override
+    public void sync() {
+    }
+  }
+
+  /**
+   * A MetadataMap that stores metadata in LevelDB.
+   */
+  private static class LevelDBMetadataMap implements MetadataMap {
+    /**
+     * Store metadata in LevelDB.
+     */
+    private static class LevelDBStore implements Closeable {
+      private DB db = null;
+      private WriteBatch batch = null;
+      private int writeCount = 0;
+      private static final int BATCH_SIZE = 1024;
+
+      LevelDBStore(final File dbPath) throws IOException {
+        Options options = new Options();
+        options.createIfMissing(true);
+        options.errorIfExists(true);
+        db = JniDBFactory.factory.open(dbPath, options);
+        batch = db.createWriteBatch();
+      }
+
+      @Override
+      public void close() throws IOException {
+        if (batch != null) {
+          IOUtils.cleanup(null, batch);
+          batch = null;
+        }
+        IOUtils.cleanup(null, db);
+        db = null;
+      }
+
+      public void put(byte[] key, byte[] value) throws IOException {
+        batch.put(key, value);
+        writeCount++;
+        if (writeCount >= BATCH_SIZE) {
+          sync();
+        }
+      }
+
+      public byte[] get(byte[] key) throws IOException {
+        return db.get(key);
+      }
+
+      public void sync() throws IOException {
+        try {
+          db.write(batch);
+        } finally {
+          batch.close();
+          batch = null;
+        }
+        batch = db.createWriteBatch();
+        writeCount = 0;
+      }
+    }
+
+    /**
+     * A LRU cache for directory path strings.
+     *
+     * The key of this LRU cache is the inode of a directory.
+     */
+    private static class DirPathCache extends LinkedHashMap<Long, String> {
+      private final static int CAPACITY = 16 * 1024;
+
+      DirPathCache() {
+        super(CAPACITY);
+      }
+
+      @Override
+      protected boolean removeEldestEntry(Map.Entry<Long, String> entry) {
+        return super.size() > CAPACITY;
+      }
+    }
+
+    /** Map the child inode to the parent directory inode. */
+    private LevelDBStore dirChildMap = null;
+    /** Directory entry map */
+    private LevelDBStore dirMap = null;
+    private DirPathCache dirPathCache = new DirPathCache();
+
+    LevelDBMetadataMap(String baseDir) throws IOException {
+      File dbDir = new File(baseDir);
+      if (dbDir.exists()) {
+        FileUtils.deleteDirectory(dbDir);
+      }
+      if (!dbDir.mkdirs()) {
+        throw new IOException("Failed to mkdir on " + dbDir);
+      }
+      try {
+        dirChildMap = new LevelDBStore(new File(dbDir, "dirChildMap"));
+        dirMap = new LevelDBStore(new File(dbDir, "dirMap"));
+      } catch (IOException e) {
+        LOG.error("Failed to open LevelDBs", e);
+        IOUtils.cleanup(null, this);
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      IOUtils.cleanup(null, dirChildMap, dirMap);
+      dirChildMap = null;
+      dirMap = null;
+    }
+
+    private static byte[] toBytes(long value) {
+      return ByteBuffer.allocate(8).putLong(value).array();
+    }
+
+    private static byte[] toBytes(String value)
+        throws UnsupportedEncodingException {
+      return value.getBytes("UTF-8");
+    }
+
+    private static long toLong(byte[] bytes) {
+      Preconditions.checkArgument(bytes.length == 8);
+      return ByteBuffer.wrap(bytes).getLong();
+    }
+
+    private static String toString(byte[] bytes) throws IOException {
+      try {
+        return new String(bytes, "UTF-8");
+      } catch (UnsupportedEncodingException e) {
+        throw new IOException(e);
+      }
+    }
+
+    @Override
+    public void putDirChild(long parentId, long childId) throws IOException {
+      dirChildMap.put(toBytes(childId), toBytes(parentId));
+    }
+
+    @Override
+    public void putDir(INode dir) throws IOException {
+      Preconditions.checkArgument(dir.hasDirectory(),
+          "INode %s (%s) is not a directory.", dir.getId(), dir.getName());
+      dirMap.put(toBytes(dir.getId()), toBytes(dir.getName().toStringUtf8()));
+    }
+
+    @Override
+    public String getParentPath(long inode) throws IOException {
+      if (inode == INodeId.ROOT_INODE_ID) {
+        return "/";
+      }
+      byte[] bytes = dirChildMap.get(toBytes(inode));
+      Preconditions.checkState(bytes != null && bytes.length == 8,
+          "Can not find parent directory for inode %s, "
+              + "fsimage might be corrupted", inode);
+      long parent = toLong(bytes);
+      if (!dirPathCache.containsKey(parent)) {
+        bytes = dirMap.get(toBytes(parent));
+        if (parent != INodeId.ROOT_INODE_ID) {
+          Preconditions.checkState(bytes != null,
+              "Can not find parent directory for inode %s, "
+                  + ", the fsimage might be corrupted.", parent);
+        }
+        String parentName = toString(bytes);
+        String parentPath =
+            new File(getParentPath(parent), parentName).toString();
+        dirPathCache.put(parent, parentPath);
+      }
+      return dirPathCache.get(parent);
+    }
+
+    @Override
+    public void sync() throws IOException {
+      dirChildMap.sync();
+      dirMap.sync();
+    }
+  }
+
+  private String[] stringTable;
+  private PrintStream out;
+  private MetadataMap metadataMap = null;
+
+  /**
+   * Construct a PB FsImage writer to generate text file.
+   * @param out the writer to output text information of fsimage.
+   * @param tempPath the path to store metadata. If it is empty, store metadata
+   *                 in memory instead.
+   */
+  PBImageTextWriter(PrintStream out, String tempPath) throws IOException {
+    this.out = out;
+    if (tempPath.isEmpty()) {
+      metadataMap = new InMemoryMetadataDB();
+    } else {
+      metadataMap = new LevelDBMetadataMap(tempPath);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    IOUtils.cleanup(null, metadataMap);
+  }
+
+  /**
+   * Get text output for the given inode.
+   * @param parent the path of parent directory
+   * @param inode the INode object to output.
+   */
+  abstract protected String getEntry(String parent, INode inode);
+
+  public void visit(RandomAccessFile file) throws IOException {
+    Configuration conf = new Configuration();
+    if (!FSImageUtil.checkFileFormat(file)) {
+      throw new IOException("Unrecognized FSImage");
+    }
+
+    FileSummary summary = FSImageUtil.loadSummary(file);
+
+    try (FileInputStream fin = new FileInputStream(file.getFD())) {
+      InputStream is;
+      ArrayList<FileSummary.Section> sections =
+          Lists.newArrayList(summary.getSectionsList());
+      Collections.sort(sections,
+          new Comparator<FileSummary.Section>() {
+            @Override
+            public int compare(FsImageProto.FileSummary.Section s1,
+                FsImageProto.FileSummary.Section s2) {
+              FSImageFormatProtobuf.SectionName n1 =
+                  FSImageFormatProtobuf.SectionName.fromString(s1.getName());
+              FSImageFormatProtobuf.SectionName n2 =
+                  FSImageFormatProtobuf.SectionName.fromString(s2.getName());
+              if (n1 == null) {
+                return n2 == null ? 0 : -1;
+              } else if (n2 == null) {
+                return -1;
+              } else {
+                return n1.ordinal() - n2.ordinal();
+              }
+            }
+          });
+
+      for (FileSummary.Section section : sections) {
+        fin.getChannel().position(section.getOffset());
+        is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, section.getLength())));
+        switch (SectionName.fromString(section.getName())) {
+        case STRING_TABLE:
+          stringTable = FSImageLoader.loadStringTable(is);
+          break;
+        default:
+          break;
+        }
+      }
+
+      loadDirectories(fin, sections, summary, conf);
+      loadINodeDirSection(fin, sections, summary, conf);
+      metadataMap.sync();
+      output(conf, summary, fin, sections);
+    }
+  }
+
+  private void output(Configuration conf, FileSummary summary,
+      FileInputStream fin, ArrayList<FileSummary.Section> sections)
+      throws IOException {
+    InputStream is;
+    long startTime = Time.monotonicNow();
+    for (FileSummary.Section section : sections) {
+      if (SectionName.fromString(section.getName()) == SectionName.INODE) {
+        fin.getChannel().position(section.getOffset());
+        is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, section.getLength())));
+        outputINodes(is);
+      }
+    }
+    long timeTaken = Time.monotonicNow() - startTime;
+    LOG.debug("Time to output inodes: {}ms", timeTaken);
+  }
+
+  protected PermissionStatus getPermission(long perm) {
+    return FSImageFormatPBINode.Loader.loadPermission(perm, stringTable);
+  }
+
+  /** Load the directories in the INode section. */
+  private void loadDirectories(
+      FileInputStream fin, List<FileSummary.Section> sections,
+      FileSummary summary, Configuration conf)
+      throws IOException {
+    LOG.info("Loading directories");
+    long startTime = Time.monotonicNow();
+    for (FileSummary.Section section : sections) {
+      if (SectionName.fromString(section.getName())
+          == SectionName.INODE) {
+        fin.getChannel().position(section.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(new LimitInputStream(
+                fin, section.getLength())));
+        loadDirectoriesInINodeSection(is);
+      }
+    }
+    long timeTaken = Time.monotonicNow() - startTime;
+    LOG.info("Finished loading directories in {}ms", timeTaken);
+  }
+
+  private void loadINodeDirSection(
+      FileInputStream fin, List<FileSummary.Section> sections,
+      FileSummary summary, Configuration conf)
+      throws IOException {
+    LOG.info("Loading INode directory section.");
+    long startTime = Time.monotonicNow();
+    for (FileSummary.Section section : sections) {
+      if (SectionName.fromString(section.getName())
+          == SectionName.INODE_DIR) {
+        fin.getChannel().position(section.getOffset());
+        InputStream is = FSImageUtil.wrapInputStreamForCompression(conf,
+            summary.getCodec(), new BufferedInputStream(
+                new LimitInputStream(fin, section.getLength())));
+        buildNamespace(is);
+      }
+    }
+    long timeTaken = Time.monotonicNow() - startTime;
+    LOG.info("Finished loading INode directory section in {}ms", timeTaken);
+  }
+
+  /**
+   * Load the filenames of the directories from the INode section.
+   */
+  private void loadDirectoriesInINodeSection(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    LOG.info("Loading directories in INode section.");
+    int numDirs = 0;
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INode p = INode.parseDelimitedFrom(in);
+      if (LOG.isDebugEnabled() && i % 10000 == 0) {
+        LOG.debug("Scanned {} inodes.", i);
+      }
+      if (p.hasDirectory()) {
+        metadataMap.putDir(p);
+        numDirs++;
+      }
+    }
+    LOG.info("Found {} directories in INode section.", numDirs);
+  }
+
+  /**
+   * Scan the INodeDirectory section to construct the namespace.
+   */
+  private void buildNamespace(InputStream in) throws IOException {
+    int count = 0;
+    while (true) {
+      FsImageProto.INodeDirectorySection.DirEntry e =
+          FsImageProto.INodeDirectorySection.DirEntry.parseDelimitedFrom(in);
+      if (e == null) {
+        break;
+      }
+      count++;
+      if (LOG.isDebugEnabled() && count % 10000 == 0) {
+        LOG.debug("Scanned {} directories.", count);
+      }
+      long parentId = e.getParent();
+      // Referred INode is not support for now.
+      for (int i = 0; i < e.getChildrenCount(); i++) {
+        long childId = e.getChildren(i);
+        metadataMap.putDirChild(parentId, childId);
+      }
+      Preconditions.checkState(e.getRefChildrenCount() == 0);
+    }
+    LOG.info("Scanned {} INode directories to build namespace.", count);
+  }
+
+  private void outputINodes(InputStream in) throws IOException {
+    INodeSection s = INodeSection.parseDelimitedFrom(in);
+    LOG.info("Found {} INodes in the INode section", s.getNumInodes());
+    for (int i = 0; i < s.getNumInodes(); ++i) {
+      INode p = INode.parseDelimitedFrom(in);
+      String parentPath = metadataMap.getParentPath(p.getId());
+      out.println(getEntry(parentPath, p));
+
+      if (LOG.isDebugEnabled() && i % 100000 == 0) {
+        LOG.debug("Outputted {} INodes.", i);
+      }
+    }
+    LOG.info("Outputted {} INodes.", s.getNumInodes());
+  }
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5227f6f8/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
index 4bb2b79..a8d1d54 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java
@@ -20,22 +20,26 @@ package org.apache.hadoop.hdfs.tools.offlineImageViewer;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
 import java.io.PrintStream;
-import java.io.PrintWriter;
 import java.io.RandomAccessFile;
 import java.io.StringReader;
-import java.io.StringWriter;
 import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URL;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -51,6 +55,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileSystemTestHelper;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
@@ -329,6 +334,51 @@ public class TestOfflineImageViewer {
     }
   }
 
+  @Test
+  public void testPBDelimitedWriter() throws IOException, InterruptedException {
+    testPBDelimitedWriter("");  // Test in memory db.
+    testPBDelimitedWriter(
+        new FileSystemTestHelper().getTestRootDir() + "/delimited.db");
+  }
+
+  private void testPBDelimitedWriter(String db)
+      throws IOException, InterruptedException {
+    final String DELIMITER = "\t";
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+
+    try (PrintStream o = new PrintStream(output)) {
+      PBImageDelimitedTextWriter v =
+          new PBImageDelimitedTextWriter(o, DELIMITER, db);
+      v.visit(new RandomAccessFile(originalFsimage, "r"));
+    }
+
+    Set<String> fileNames = new HashSet<>();
+    try (
+        ByteArrayInputStream input =
+            new ByteArrayInputStream(output.toByteArray());
+        BufferedReader reader =
+            new BufferedReader(new InputStreamReader(input))) {
+      String line;
+      while ((line = reader.readLine()) != null) {
+        System.out.println(line);
+        String[] fields = line.split(DELIMITER);
+        assertEquals(12, fields.length);
+        fileNames.add(fields[0]);
+      }
+    }
+
+    // writtenFiles does not contain root directory and "invalid XML char" dir.
+    for (Iterator<String> it = fileNames.iterator(); it.hasNext(); ) {
+      String filename = it.next();
+      if (filename.startsWith("/dirContainingInvalidXMLChar")) {
+        it.remove();
+      } else if (filename.equals("/")) {
+        it.remove();
+      }
+    }
+    assertEquals(writtenFiles.keySet(), fileNames);
+  }
+
   private static void compareFile(FileStatus expected, FileStatus status) {
     assertEquals(expected.getAccessTime(), status.getAccessTime());
     assertEquals(expected.getBlockSize(), status.getBlockSize());


Mime
View raw message