hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r596418 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/ src/test/org/apache/hadoop/dfs/
Date Mon, 19 Nov 2007 20:03:08 GMT
Author: dhruba
Date: Mon Nov 19 12:03:06 2007
New Revision: 596418

URL: http://svn.apache.org/viewvc?rev=596418&view=rev
Log:
HADOOP-713.  Reduce CPU usage on namenode while listing directories.
FileSystem.listPaths does not return the size of the entire subtree.
Introduced a new API ClientProtocol.getContentLength that returns the
size of the subtree. (Dhruba Borthakur via dhruba)


Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon Nov 19 12:03:06 2007
@@ -109,6 +109,13 @@
 
 Release 0.15.1 -
 
+  INCOMPATIBLE CHANGES
+
+    HADOOP-713.  Reduce CPU usage on namenode while listing directories.
+    FileSystem.listPaths does not return the size of the entire subtree.
+    Introduced a new API ClientProtocol.getContentLength that returns the
+    size of the subtree. (Dhruba Borthakur via dhruba)
+
   IMPROVEMENTS
 
     HADOOP-1917.  Addition of guides/tutorial for better overall

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Mon Nov 19 12:03:06
2007
@@ -35,8 +35,9 @@
    * 17 : getBlockSize replaced by getPreferredBlockSize
    * 18 : datanodereport returns dead, live or all nodes.
    * 19 : rollEditLog() returns a token to uniquely identify the editfile.
+   * 20 : getContentLength reutrns the total size in bytes of a directory subtree
    */
-  public static final long versionID = 19L;
+  public static final long versionID = 20L;
   
   ///////////////////////////////////////
   // File contents
@@ -373,4 +374,11 @@
    * @return object containing information regarding the file
    */
   public DFSFileInfo getFileInfo(String src) throws IOException;
+
+  /* Get the total size of all files and directories rooted at
+   * the specified directory.
+   * @param src The string representation of the path
+   * @return size of directory subtree in bytes
+   */
+  public long getContentLength(String src) throws IOException;
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Mon Nov 19 12:03:06
2007
@@ -485,6 +485,19 @@
   }
 
   /**
+   * Retrieves the total size of all files and directories under
+   * the specified path.
+   * 
+   * @param src
+   * @throws IOException
+   * @return the number of bytes in the subtree rooted at src
+   */
+  public long getContentLength(String src
+                               ) throws IOException {
+    return namenode.getContentLength(src);
+  }
+
+  /**
    * Pick the best node from which to stream the data.
    * Entries in <i>nodes</i> are already in the priority order
    */

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java Mon Nov 19 12:03:06
2007
@@ -47,8 +47,8 @@
    * Create DFSFileInfo by file INode 
    */
   public DFSFileInfo(String path, INode node) {
-    // XXX This should probably let length == 0 for directories
-    super(node.computeContentsLength(),
+    // length is zero for directories
+    super(node.isDirectory() ? 0 : node.computeContentsLength(), 
           node.isDirectory(), 
           node.isDirectory() ? 0 : ((INodeFile)node).getReplication(), 
           node.isDirectory() ? 0 : ((INodeFile)node).getPreferredBlockSize(),

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java Mon Nov 19 12:03:06 2007
@@ -46,7 +46,7 @@
     return info.getLen();
   }
   public long getContentsLength() {
-    assert isDirectory() : "Must be a directory";
+    assert !isDirectory();
     return info.getLen();
   }
   public short getReplication() {

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Mon Nov
19 12:03:06 2007
@@ -148,12 +148,16 @@
   }
 
   public long getContentLength(Path f) throws IOException {
+    // If it is a directory, then issue a getContentLength
+    // RPC to find the size of the entire subtree in one call.
+    //
     if (f instanceof DfsPath) {
-      return ((DfsPath)f).getContentsLength();
+      DfsPath dfspath = (DfsPath)f;
+      if (!dfspath.isDirectory()) {
+        return dfspath.getContentsLength();
+      }
     }
-
-    DFSFileInfo info[] = dfs.listPaths(getPathName(f));
-    return (info == null) ? 0 : info[0].getLen();
+    return dfs.getContentLength(getPathName(f));
   }
 
   public FileStatus[] listStatus(Path f) throws IOException {

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java Mon Nov 19 12:03:06
2007
@@ -610,4 +610,19 @@
     }
     return src;
   }
+
+  /* Get the size of the directory subtree.
+   */
+  long getContentLength(String src) throws IOException {
+    String srcs = normalizePath(src);
+    synchronized (rootDir) {
+      INode targetNode = rootDir.getNode(srcs);
+      if (targetNode == null) {
+        throw new IOException(src + " does not exist");
+      }
+      else {
+        return targetNode.computeContentsLength();
+      }
+    }
+  }
 }

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Mon Nov 19 12:03:06
2007
@@ -1406,6 +1406,15 @@
     return success;
   }
 
+  /* Get the size of the specified directory subtree.
+   * @param src The string representation of the path
+   * @throws IOException if path does not exist
+   * @return size in bytes
+   */
+  long getContentLength(String src) throws IOException {
+    return dir.getContentLength(src);
+  }
+
   /************************************************************
    * A Lease governs all the locks held by a single client.
    * For each client there's a corresponding lease, whose

Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original)
+++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Mon Nov 19 12:03:06 2007
@@ -517,6 +517,15 @@
     namesystem.metaSave(filename);
   }
 
+  /* Get the size of the directory subtree.
+   * @param src The string representation of the path to the file
+   * @throws IOException if path does not exist
+   * @return size in bytes of the directory subtree
+   */
+  public long getContentLength(String src)  throws IOException {
+    return namesystem.getContentLength(src);
+  }
+
   ////////////////////////////////////////////////////////////////
   // DatanodeProtocol
   ////////////////////////////////////////////////////////////////

Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java?rev=596418&r1=596417&r2=596418&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java (original)
+++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java Mon Nov 19 12:03:06
2007
@@ -103,20 +103,27 @@
                   fs.getFileStatus(file1).isDir() == false);
       assertTrue(fs.getFileStatus(file1).getBlockSize() == blockSize);
       assertTrue(fs.getFileStatus(file1).getReplication() == 1);
+      assertTrue(fs.getFileStatus(file1).getLen() == fileSize);
+      assertTrue(fs.getContentLength(file1) == fileSize);
       System.out.println("Path : \"" + file1 + "\"");
 
-      // create a directory
+      // create an empty directory
       //
+      Path parentDir = new Path("/test");
       Path dir = new Path("/test/mkdirs");
       assertTrue(fs.mkdirs(dir));
       assertTrue(fs.exists(dir));
       assertTrue(dir + " should be a directory", 
                  fs.getFileStatus(path).isDir() == true);
+      assertTrue(dir + " should be zero size ",
+                 fs.getContentLength(dir) == 0);
+      assertTrue(dir + " should be zero size ",
+                 fs.getFileStatus(dir).getLen() == 0);
       System.out.println("Dir : \"" + dir + "\"");
 
       // create another file that is smaller than a block.
       //
-      Path file2 = new Path("filestatus2.dat");
+      Path file2 = new Path("/test/mkdirs/filestatus2.dat");
       writeFile(fs, file2, 1, blockSize/4, blockSize);
       System.out.println("Created file filestatus2.dat with one "
                          + " replicas.");
@@ -127,6 +134,42 @@
       assertTrue(fs.getFileStatus(file2).getBlockSize() == blockSize);
       assertTrue(fs.getFileStatus(file2).getReplication() == 1);
 
+      // create another file in the same directory
+      Path file3 = new Path("/test/mkdirs/filestatus3.dat");
+      writeFile(fs, file3, 1, blockSize/4, blockSize);
+      System.out.println("Created file filestatus3.dat with one "
+                         + " replicas.");
+      checkFile(fs, file3, 1);
+
+      // verify that the size of the directory increased by the size 
+      // of the two files
+      assertTrue(dir + " size should be " + (blockSize/2), 
+                 blockSize/2 == fs.getContentLength(dir));
+
+      // The following are test cases for listPaths which is a deprecated
+      // API. These tests shoudl go away when the API is removed.
+
+      // issue a listPaths on directory /test/mkdirs and verify that the
+      // size of the files inside it are valid
+      Path[] files = fs.listPaths(dir);
+      assertTrue(dir + " should have two files", files.length == 2);
+      for (int i = 0; i < files.length; i++) {
+        DfsPath dfspath = (DfsPath) files[i];
+        assertTrue(files[i] + " should be of size " + (blockSize/4), 
+                   blockSize/4 == dfspath.getContentsLength());
+        assertTrue(files[i] + " should be of size " + (blockSize/4), 
+                   blockSize/4 == fs.getContentLength(dfspath));
+      }
+
+      // issue a listPath on directory /test and verify that the
+      // size returned for /test/mkdirs directory is correct.
+      Path[] dirs = fs.listPaths(parentDir);
+      assertTrue(parentDir + " should have one sub directory", 
+                 dirs.length == 1);
+      DfsPath dfsdir = (DfsPath) dirs[0];
+      assertTrue(dirs[0] + " should be of size " + blockSize/2,
+                 fs.getContentLength(dfsdir) == blockSize/2);
+      
     } finally {
       fs.close();
       cluster.shutdown();



Mime
View raw message