hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cmcc...@apache.org
Subject svn commit: r1494403 - in /hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs: ./ src/main/java/org/apache/hadoop/hdfs/server/datanode/ src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/ src/main/java/org/apache/hadoop/hdfs/se...
Date Wed, 19 Jun 2013 00:40:23 GMT
Author: cmccabe
Date: Wed Jun 19 00:40:22 2013
New Revision: 1494403

URL: http://svn.apache.org/r1494403
Log:
HDFS-4461.  DirectoryScanner: volume prefix takes up memory for every block that is scanned
(Colin Patrick McCabe)

Modified:
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
    hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1494403&r1=1494402&r2=1494403&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Jun 19
00:40:22 2013
@@ -79,6 +79,9 @@ Release 2.1.0-beta - UNRELEASED
 
   IMPROVEMENTS
 
+    HDFS-4461. DirectoryScanner: volume path prefix takes up memory for every
+    block that is scanned (Colin Patrick McCabe)
+
     HDFS-4222. NN is unresponsive and loses heartbeats from DNs when 
     configured to use LDAP and LDAP has issues. (Xiaobo Peng, suresh)
 

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java?rev=1494403&r1=1494402&r2=1494403&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java
Wed Jun 19 00:40:22 2013
@@ -33,6 +33,8 @@ import java.util.concurrent.Future;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -154,30 +156,109 @@ public class DirectoryScanner implements
    * Tracks the files and other information related to a block on the disk
    * Missing file is indicated by setting the corresponding member
    * to null.
+   * 
+   * Because millions of these structures may be created, we try to save
+   * memory here.  So instead of storing full paths, we store path suffixes.
+   * The block file, if it exists, will have a path like this:
+   * <volume_base_path>/<block_path>
+   * So we don't need to store the volume path, since we already know what the
+   * volume is.
+   * 
+   * The metadata file, if it exists, will have a path like this:
+   * <volume_base_path>/<block_path>_<genstamp>.meta
+   * So if we have a block file, there isn't any need to store the block path
+   * again.
+   * 
+   * The accessor functions take care of these manipulations.
    */
   static class ScanInfo implements Comparable<ScanInfo> {
     private final long blockId;
-    private final File metaFile;
-    private final File blockFile;
+    
+    /**
+     * The block file path, relative to the volume's base directory.
+     * If there was no block file found, this may be null. If 'vol'
+     * is null, then this is the full path of the block file.
+     */
+    private final String blockSuffix;
+    
+    /**
+     * The suffix of the meta file path relative to the block file.
+     * If blockSuffix is null, then this will be the entire path relative
+     * to the volume base directory, or an absolute path if vol is also
+     * null.
+     */
+    private final String metaSuffix;
+
     private final FsVolumeSpi volume;
 
+    private final static Pattern CONDENSED_PATH_REGEX =
+        Pattern.compile("(?<!^)(\\\\|/){2,}");
+    
+    private final static String QUOTED_FILE_SEPARATOR = 
+        Matcher.quoteReplacement(File.separator);
+    
+    /**
+     * Get the most condensed version of the path.
+     *
+     * For example, the condensed version of /foo//bar is /foo/bar
+     * Unlike {@link File#getCanonicalPath()}, this will never perform I/O
+     * on the filesystem.
+     */
+    private static String getCondensedPath(String path) {
+      return CONDENSED_PATH_REGEX.matcher(path).
+          replaceAll(QUOTED_FILE_SEPARATOR);
+    }
+
+    /**
+     * Get a path suffix.
+     *
+     * @param f            The file to get the suffix for.
+     * @param prefix       The prefix we're stripping off.
+     *
+     * @return             A suffix such that prefix + suffix = path to f
+     */
+    private static String getSuffix(File f, String prefix) {
+      String fullPath = getCondensedPath(f.getAbsolutePath());
+      if (fullPath.startsWith(prefix)) {
+        return fullPath.substring(prefix.length());
+      }
+      throw new RuntimeException(prefix + " is not a prefix of " + fullPath);
+    }
+
     ScanInfo(long blockId) {
       this(blockId, null, null, null);
     }
 
     ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) {
       this.blockId = blockId;
-      this.metaFile = metaFile;
-      this.blockFile = blockFile;
+      String condensedVolPath = vol == null ? null :
+        getCondensedPath(vol.getBasePath());
+      this.blockSuffix = blockFile == null ? null :
+        getSuffix(blockFile, condensedVolPath);
+      if (metaFile == null) {
+        this.metaSuffix = null;
+      } else if (blockFile == null) {
+        this.metaSuffix = getSuffix(metaFile, condensedVolPath);
+      } else {
+        this.metaSuffix = getSuffix(metaFile,
+            condensedVolPath + blockSuffix);
+      }
       this.volume = vol;
     }
 
-    File getMetaFile() {
-      return metaFile;
+    File getBlockFile() {
+      return (blockSuffix == null) ? null :
+        new File(volume.getBasePath(), blockSuffix);
     }
 
-    File getBlockFile() {
-      return blockFile;
+    File getMetaFile() {
+      if (metaSuffix == null) {
+        return null;
+      } else if (blockSuffix == null) {
+        return new File(volume.getBasePath(), metaSuffix);
+      } else {
+        return new File(volume.getBasePath(), blockSuffix + metaSuffix);
+      }
     }
 
     long getBlockId() {
@@ -216,8 +297,9 @@ public class DirectoryScanner implements
     }
 
     public long getGenStamp() {
-      return metaFile != null ? Block.getGenerationStamp(metaFile.getName()) :
-        GenerationStamp.GRANDFATHER_GENERATION_STAMP;
+      return metaSuffix != null ? Block.getGenerationStamp(
+          getMetaFile().getName()) : 
+            GenerationStamp.GRANDFATHER_GENERATION_STAMP;
     }
   }
 

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java?rev=1494403&r1=1494402&r2=1494403&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/FsVolumeSpi.java
Wed Jun 19 00:40:22 2013
@@ -30,6 +30,9 @@ public interface FsVolumeSpi {
   /** @return the available storage space in bytes. */
   public long getAvailable() throws IOException;
 
+  /** @return the base path to the volume */
+  public String getBasePath();
+
   /** @return the path to the volume */
   public String getPath(String bpid) throws IOException;
 

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java?rev=1494403&r1=1494402&r2=1494403&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java
Wed Jun 19 00:40:22 2013
@@ -125,6 +125,11 @@ class FsVolumeImpl implements FsVolumeSp
   }
 
   @Override
+  public String getBasePath() {
+    return currentDir.getParent();
+  }
+  
+  @Override
   public String getPath(String bpid) throws IOException {
     return getBlockPoolSlice(bpid).getDirectory().getAbsolutePath();
   }

Modified: hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java?rev=1494403&r1=1494402&r2=1494403&view=diff
==============================================================================
--- hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
(original)
+++ hadoop/common/branches/branch-2/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java
Wed Jun 19 00:40:22 2013
@@ -380,4 +380,93 @@ public class TestDirectoryScanner {
     assertNotNull(memBlock);
     assertEquals(genStamp, memBlock.getGenerationStamp());
   }
+  
+  private static class TestFsVolumeSpi implements FsVolumeSpi {
+    @Override
+    public String[] getBlockPoolList() {
+      return new String[0];
+    }
+    
+    @Override
+    public long getAvailable() throws IOException {
+      return 0;
+    }
+    
+    @Override
+    public String getBasePath() {
+      return "/base";
+    }
+    
+    @Override
+    public String getPath(String bpid) throws IOException {
+      return "/base/current/" + bpid;
+    }
+
+    @Override
+    public File getFinalizedDir(String bpid) throws IOException {
+      return new File("/base/current/" + bpid + "/finalized");
+    }
+  }
+
+  private final static TestFsVolumeSpi TEST_VOLUME = new TestFsVolumeSpi();
+  
+  private final static String BPID_1 = "BP-783049782-127.0.0.1-1370971773491";
+  
+  private final static String BPID_2 = "BP-367845636-127.0.0.1-5895645674231";
+      
+  void testScanInfoObject(long blockId, File blockFile, File metaFile)
+      throws Exception {
+    assertEquals("/base/current/" + BPID_1 + "/finalized",
+        TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath());
+    DirectoryScanner.ScanInfo scanInfo =
+        new DirectoryScanner.ScanInfo(blockId, blockFile, metaFile, TEST_VOLUME);
+    assertEquals(blockId, scanInfo.getBlockId());
+    if (blockFile != null) {
+      assertEquals(blockFile.getAbsolutePath(),
+          scanInfo.getBlockFile().getAbsolutePath());
+    } else {
+      assertNull(scanInfo.getBlockFile());
+    }
+    if (metaFile != null) {
+      assertEquals(metaFile.getAbsolutePath(),
+          scanInfo.getMetaFile().getAbsolutePath());
+    } else {
+      assertNull(scanInfo.getMetaFile());
+    }
+    assertEquals(TEST_VOLUME, scanInfo.getVolume());
+  }
+  
+  void testScanInfoObject(long blockId) throws Exception {
+    DirectoryScanner.ScanInfo scanInfo =
+        new DirectoryScanner.ScanInfo(blockId);
+    assertEquals(blockId, scanInfo.getBlockId());
+    assertNull(scanInfo.getBlockFile());
+    assertNull(scanInfo.getMetaFile());
+  }
+
+  @Test(timeout=120000)
+  public void TestScanInfo() throws Exception {
+    testScanInfoObject(123,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123"),
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123__1001.meta"));
+    testScanInfoObject(464,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123"),
+        null);
+    testScanInfoObject(523,
+        null,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_1).getAbsolutePath(),
+            "blk_123__1009.meta"));
+    testScanInfoObject(789,
+        null,
+        null);
+    testScanInfoObject(456);
+    testScanInfoObject(123,
+        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
+            "blk_567"),
+        new File(TEST_VOLUME.getFinalizedDir(BPID_2).getAbsolutePath(),
+            "blk_567__1004.meta"));
+  }
 }



Mime
View raw message