Return-Path: Delivered-To: apmail-lucene-hadoop-commits-archive@locus.apache.org Received: (qmail 68813 invoked from network); 19 Nov 2007 20:03:34 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.2) by minotaur.apache.org with SMTP; 19 Nov 2007 20:03:34 -0000 Received: (qmail 72496 invoked by uid 500); 19 Nov 2007 20:03:21 -0000 Delivered-To: apmail-lucene-hadoop-commits-archive@lucene.apache.org Received: (qmail 72463 invoked by uid 500); 19 Nov 2007 20:03:21 -0000 Mailing-List: contact hadoop-commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hadoop-dev@lucene.apache.org Delivered-To: mailing list hadoop-commits@lucene.apache.org Received: (qmail 72447 invoked by uid 99); 19 Nov 2007 20:03:20 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Nov 2007 12:03:20 -0800 X-ASF-Spam-Status: No, hits=-100.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO eris.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 19 Nov 2007 20:03:31 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 4F5921A9832; Mon, 19 Nov 2007 12:03:09 -0800 (PST) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r596418 - in /lucene/hadoop/trunk: ./ src/java/org/apache/hadoop/dfs/ src/test/org/apache/hadoop/dfs/ Date: Mon, 19 Nov 2007 20:03:08 -0000 To: hadoop-commits@lucene.apache.org From: dhruba@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20071119200309.4F5921A9832@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: dhruba Date: Mon Nov 19 12:03:06 2007 New Revision: 596418 URL: http://svn.apache.org/viewvc?rev=596418&view=rev Log: HADOOP-713. Reduce CPU usage on namenode while listing directories. FileSystem.listPaths does not return the size of the entire subtree. Introduced a new API ClientProtocol.getContentLength that returns the size of the subtree. (Dhruba Borthakur via dhruba) Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Nov 19 12:03:06 2007 @@ -109,6 +109,13 @@ Release 0.15.1 - + INCOMPATIBLE CHANGES + + HADOOP-713. Reduce CPU usage on namenode while listing directories. + FileSystem.listPaths does not return the size of the entire subtree. + Introduced a new API ClientProtocol.getContentLength that returns the + size of the subtree. (Dhruba Borthakur via dhruba) + IMPROVEMENTS HADOOP-1917. Addition of guides/tutorial for better overall Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/ClientProtocol.java Mon Nov 19 12:03:06 2007 @@ -35,8 +35,9 @@ * 17 : getBlockSize replaced by getPreferredBlockSize * 18 : datanodereport returns dead, live or all nodes. * 19 : rollEditLog() returns a token to uniquely identify the editfile. + * 20 : getContentLength reutrns the total size in bytes of a directory subtree */ - public static final long versionID = 19L; + public static final long versionID = 20L; /////////////////////////////////////// // File contents @@ -373,4 +374,11 @@ * @return object containing information regarding the file */ public DFSFileInfo getFileInfo(String src) throws IOException; + + /* Get the total size of all files and directories rooted at + * the specified directory. + * @param src The string representation of the path + * @return size of directory subtree in bytes + */ + public long getContentLength(String src) throws IOException; } Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSClient.java Mon Nov 19 12:03:06 2007 @@ -485,6 +485,19 @@ } /** + * Retrieves the total size of all files and directories under + * the specified path. + * + * @param src + * @throws IOException + * @return the number of bytes in the subtree rooted at src + */ + public long getContentLength(String src + ) throws IOException { + return namenode.getContentLength(src); + } + + /** * Pick the best node from which to stream the data. * Entries in nodes are already in the priority order */ Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DFSFileInfo.java Mon Nov 19 12:03:06 2007 @@ -47,8 +47,8 @@ * Create DFSFileInfo by file INode */ public DFSFileInfo(String path, INode node) { - // XXX This should probably let length == 0 for directories - super(node.computeContentsLength(), + // length is zero for directories + super(node.isDirectory() ? 0 : node.computeContentsLength(), node.isDirectory(), node.isDirectory() ? 0 : ((INodeFile)node).getReplication(), node.isDirectory() ? 0 : ((INodeFile)node).getPreferredBlockSize(), Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DfsPath.java Mon Nov 19 12:03:06 2007 @@ -46,7 +46,7 @@ return info.getLen(); } public long getContentsLength() { - assert isDirectory() : "Must be a directory"; + assert !isDirectory(); return info.getLen(); } public short getReplication() { Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/DistributedFileSystem.java Mon Nov 19 12:03:06 2007 @@ -148,12 +148,16 @@ } public long getContentLength(Path f) throws IOException { + // If it is a directory, then issue a getContentLength + // RPC to find the size of the entire subtree in one call. + // if (f instanceof DfsPath) { - return ((DfsPath)f).getContentsLength(); + DfsPath dfspath = (DfsPath)f; + if (!dfspath.isDirectory()) { + return dfspath.getContentsLength(); + } } - - DFSFileInfo info[] = dfs.listPaths(getPathName(f)); - return (info == null) ? 0 : info[0].getLen(); + return dfs.getContentLength(getPathName(f)); } public FileStatus[] listStatus(Path f) throws IOException { Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java Mon Nov 19 12:03:06 2007 @@ -610,4 +610,19 @@ } return src; } + + /* Get the size of the directory subtree. + */ + long getContentLength(String src) throws IOException { + String srcs = normalizePath(src); + synchronized (rootDir) { + INode targetNode = rootDir.getNode(srcs); + if (targetNode == null) { + throw new IOException(src + " does not exist"); + } + else { + return targetNode.computeContentsLength(); + } + } + } } Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Mon Nov 19 12:03:06 2007 @@ -1406,6 +1406,15 @@ return success; } + /* Get the size of the specified directory subtree. + * @param src The string representation of the path + * @throws IOException if path does not exist + * @return size in bytes + */ + long getContentLength(String src) throws IOException { + return dir.getContentLength(src); + } + /************************************************************ * A Lease governs all the locks held by a single client. * For each client there's a corresponding lease, whose Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/dfs/NameNode.java Mon Nov 19 12:03:06 2007 @@ -517,6 +517,15 @@ namesystem.metaSave(filename); } + /* Get the size of the directory subtree. + * @param src The string representation of the path to the file + * @throws IOException if path does not exist + * @return size in bytes of the directory subtree + */ + public long getContentLength(String src) throws IOException { + return namesystem.getContentLength(src); + } + //////////////////////////////////////////////////////////////// // DatanodeProtocol //////////////////////////////////////////////////////////////// Modified: lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java?rev=596418&r1=596417&r2=596418&view=diff ============================================================================== --- lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java (original) +++ lucene/hadoop/trunk/src/test/org/apache/hadoop/dfs/TestFileStatus.java Mon Nov 19 12:03:06 2007 @@ -103,20 +103,27 @@ fs.getFileStatus(file1).isDir() == false); assertTrue(fs.getFileStatus(file1).getBlockSize() == blockSize); assertTrue(fs.getFileStatus(file1).getReplication() == 1); + assertTrue(fs.getFileStatus(file1).getLen() == fileSize); + assertTrue(fs.getContentLength(file1) == fileSize); System.out.println("Path : \"" + file1 + "\""); - // create a directory + // create an empty directory // + Path parentDir = new Path("/test"); Path dir = new Path("/test/mkdirs"); assertTrue(fs.mkdirs(dir)); assertTrue(fs.exists(dir)); assertTrue(dir + " should be a directory", fs.getFileStatus(path).isDir() == true); + assertTrue(dir + " should be zero size ", + fs.getContentLength(dir) == 0); + assertTrue(dir + " should be zero size ", + fs.getFileStatus(dir).getLen() == 0); System.out.println("Dir : \"" + dir + "\""); // create another file that is smaller than a block. // - Path file2 = new Path("filestatus2.dat"); + Path file2 = new Path("/test/mkdirs/filestatus2.dat"); writeFile(fs, file2, 1, blockSize/4, blockSize); System.out.println("Created file filestatus2.dat with one " + " replicas."); @@ -127,6 +134,42 @@ assertTrue(fs.getFileStatus(file2).getBlockSize() == blockSize); assertTrue(fs.getFileStatus(file2).getReplication() == 1); + // create another file in the same directory + Path file3 = new Path("/test/mkdirs/filestatus3.dat"); + writeFile(fs, file3, 1, blockSize/4, blockSize); + System.out.println("Created file filestatus3.dat with one " + + " replicas."); + checkFile(fs, file3, 1); + + // verify that the size of the directory increased by the size + // of the two files + assertTrue(dir + " size should be " + (blockSize/2), + blockSize/2 == fs.getContentLength(dir)); + + // The following are test cases for listPaths which is a deprecated + // API. These tests shoudl go away when the API is removed. + + // issue a listPaths on directory /test/mkdirs and verify that the + // size of the files inside it are valid + Path[] files = fs.listPaths(dir); + assertTrue(dir + " should have two files", files.length == 2); + for (int i = 0; i < files.length; i++) { + DfsPath dfspath = (DfsPath) files[i]; + assertTrue(files[i] + " should be of size " + (blockSize/4), + blockSize/4 == dfspath.getContentsLength()); + assertTrue(files[i] + " should be of size " + (blockSize/4), + blockSize/4 == fs.getContentLength(dfspath)); + } + + // issue a listPath on directory /test and verify that the + // size returned for /test/mkdirs directory is correct. + Path[] dirs = fs.listPaths(parentDir); + assertTrue(parentDir + " should have one sub directory", + dirs.length == 1); + DfsPath dfsdir = (DfsPath) dirs[0]; + assertTrue(dirs[0] + " should be of size " + blockSize/2, + fs.getContentLength(dfsdir) == blockSize/2); + } finally { fs.close(); cluster.shutdown();