hadoop-hdfs-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sur...@apache.org
Subject svn commit: r811185 - in /hadoop/hdfs/trunk: ./ src/java/org/apache/hadoop/hdfs/server/namenode/ src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/
Date Fri, 04 Sep 2009 00:20:57 GMT
Author: suresh
Date: Fri Sep  4 00:20:56 2009
New Revision: 811185

URL: http://svn.apache.org/viewvc?rev=811185&view=rev
Log:
HDFS-173. Namenode will not block until a large directory deletion completes. It allows other
operations when the deletion is in progress.

Added:
    hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java
Modified:
    hadoop/hdfs/trunk/CHANGES.txt
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
    hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java

Modified: hadoop/hdfs/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/CHANGES.txt?rev=811185&r1=811184&r2=811185&view=diff
==============================================================================
--- hadoop/hdfs/trunk/CHANGES.txt (original)
+++ hadoop/hdfs/trunk/CHANGES.txt Fri Sep  4 00:20:56 2009
@@ -122,6 +122,9 @@
     allows an execution of non-FI tests in FI-enable environment.  (Konstantin
     Boudnik via szetszwo)
 
+    HDFS-173. Namenode will not block until a large directory deletion completes.
+    It allows other operations when the deletion is in progress. (suresh)
+
   BUG FIXES
 
     HDFS-76. Better error message to users when commands fail because of 

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java?rev=811185&r1=811184&r2=811185&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/BlockManager.java Fri
Sep  4 00:20:56 2009
@@ -352,12 +352,13 @@
 
   /**
    * Adds block to list of blocks which will be invalidated on specified
-   * datanode and log the move
+   * datanode
    *
    * @param b block
    * @param dn datanode
+   * @param log true to create an entry in the log 
    */
-  void addToInvalidates(Block b, DatanodeInfo dn) {
+  void addToInvalidates(Block b, DatanodeInfo dn, boolean log) {
     Collection<Block> invalidateSet = recentInvalidateSets
         .get(dn.getStorageID());
     if (invalidateSet == null) {
@@ -366,20 +367,39 @@
     }
     if (invalidateSet.add(b)) {
       pendingDeletionBlocksCount++;
-      NameNode.stateChangeLog.info("BLOCK* NameSystem.addToInvalidates: "
-          + b.getBlockName() + " is added to invalidSet of " + dn.getName());
+      if (log) {
+        NameNode.stateChangeLog.info("BLOCK* NameSystem.addToInvalidates: "
+            + b.getBlockName() + " to " + dn.getName());
+      }
     }
   }
 
   /**
+   * Adds block to list of blocks which will be invalidated on specified
+   * datanode and log the operation
+   *
+   * @param b block
+   * @param dn datanode
+   */
+  void addToInvalidates(Block b, DatanodeInfo dn) {
+    addToInvalidates(b, dn, true);
+  }
+
+  /**
    * Adds block to list of blocks which will be invalidated on all its
    * datanodes.
    */
   private void addToInvalidates(Block b) {
+    StringBuilder datanodes = new StringBuilder();
     for (Iterator<DatanodeDescriptor> it = blocksMap.nodeIterator(b); it
         .hasNext();) {
       DatanodeDescriptor node = it.next();
-      addToInvalidates(b, node);
+      addToInvalidates(b, node, false);
+      datanodes.append(node.getName()).append(" ");
+    }
+    if (datanodes.length() != 0) {
+      NameNode.stateChangeLog.info("BLOCK* NameSystem.addToInvalidates: "
+          + b.getBlockName() + " to " + datanodes.toString());
     }
   }
 

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java?rev=811185&r1=811184&r2=811185&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java Fri
Sep  4 00:20:56 2009
@@ -570,19 +570,26 @@
   }
     
   /**
-   * Remove the file from management, return blocks
+   * Delete the target directory and collect the blocks under it
+   * 
+   * @param src Path of a directory to delete
+   * @param collectedBlocks Blocks under the deleted directory
+   * @return true on successful deletion; else false
    */
-  INode delete(String src) {
+  boolean delete(String src, List<Block>collectedBlocks) {
     if (NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("DIR* FSDirectory.delete: "+src);
+      NameNode.stateChangeLog.debug("DIR* FSDirectory.delete: " + src);
     }
     waitForReady();
     long now = FSNamesystem.now();
-    INode deletedNode = unprotectedDelete(src, now);
-    if (deletedNode != null) {
-      fsImage.getEditLog().logDelete(src, now);
-    }
-    return deletedNode;
+    INode removedNode = unprotectedDelete(src, collectedBlocks, now);
+    if (removedNode == null) {
+      return false;
+    }
+    // Blocks will be deleted later by the caller of this method
+    getFSNamesystem().removePathAndBlocks(src, null);
+    fsImage.getEditLog().logDelete(src, now);
+    return true;
   }
   
   /** Return if a directory is empty or not **/
@@ -608,12 +615,30 @@
   /**
    * Delete a path from the name space
    * Update the count at each ancestor directory with quota
+   * <br>
+   * Note: This is to be used by {@link FSEditLog} only.
+   * <br>
+   * @param src a string representation of a path to an inode
+   * @param mtime the time the inode is removed
+   * @return deleted inode if deletion succeeds; else null
+   */ 
+  INode unprotectedDelete(String src, long mtime) {
+    List<Block> collectedBlocks = new ArrayList<Block>();
+    INode removedNode = unprotectedDelete(src, collectedBlocks, mtime);
+    getFSNamesystem().removePathAndBlocks(src, collectedBlocks);
+    return removedNode;
+  }
+  
+  /**
+   * Delete a path from the name space
+   * Update the count at each ancestor directory with quota
    * @param src a string representation of a path to an inode
-   * @param modificationTime the time the inode is removed
-   * @param deletedBlocks the place holder for the blocks to be removed
-   * @return if the deletion succeeds
+   * @param collectedBlocks blocks collected from the deleted path
+   * @param mtime the time the inode is removed
+   * @return deleted inode if deletion succeeds; else null
    */ 
-  INode unprotectedDelete(String src, long modificationTime) {
+  INode unprotectedDelete(String src, List<Block> collectedBlocks, 
+      long mtime) {
     src = normalizePath(src);
 
     synchronized (rootDir) {
@@ -624,33 +649,34 @@
         NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
             +"failed to remove "+src+" because it does not exist");
         return null;
-      } else if (inodes.length == 1) { // src is the root
+      }
+      if (inodes.length == 1) { // src is the root
         NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: " +
             "failed to remove " + src +
             " because the root is not allowed to be deleted");
         return null;
-      } else {
-        try {
-          // Remove the node from the namespace
-          removeChild(inodes, inodes.length-1);
-          // set the parent's modification time
-          inodes[inodes.length-2].setModificationTime(modificationTime);
-          // GC all the blocks underneath the node.
-          ArrayList<Block> v = new ArrayList<Block>();
-          int filesRemoved = targetNode.collectSubtreeBlocksAndClear(v);
-          incrDeletedFileCount(filesRemoved);
-          getFSNamesystem().removePathAndBlocks(src, v);
-          if (NameNode.stateChangeLog.isDebugEnabled()) {
-            NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
-              +src+" is removed");
-          }
-          return targetNode;
-        } catch(QuotaExceededException e) {
-          NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: " +
-              "failed to remove " + src + " because " + e.getMessage());
-          return null;
-        }
       }
+      int pos = inodes.length - 1;
+      try {
+        // Remove the node from the namespace
+        targetNode = removeChild(inodes, pos);
+      } catch(QuotaExceededException e) {
+        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: " +
+            "failed to remove " + src + " because " + e.getMessage());
+        return null;
+      }
+      if (targetNode == null) {
+        return null;
+      }
+      // set the parent's modification time
+      inodes[pos-1].setModificationTime(mtime);
+      int filesRemoved = targetNode.collectSubtreeBlocksAndClear(collectedBlocks);
+      incrDeletedFileCount(filesRemoved);
+      if (NameNode.stateChangeLog.isDebugEnabled()) {
+        NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
+          +src+" is removed");
+      }
+      return targetNode;
     }
   }
 

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=811185&r1=811184&r2=811185&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Fri
Sep  4 00:20:56 2009
@@ -123,6 +123,7 @@
   public static final Log auditLog = LogFactory.getLog(
       FSNamesystem.class.getName() + ".audit");
 
+  static int BLOCK_DELETION_INCREMENT = 1000;
   private boolean isPermissionEnabled;
   private UserGroupInformation fsOwner;
   private String supergroup;
@@ -1385,8 +1386,10 @@
       if ((!recursive) && (!dir.isDirEmpty(src))) {
         throw new IOException(src + " is non empty");
       }
+      if (NameNode.stateChangeLog.isDebugEnabled()) {
+        NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
+      }
       boolean status = deleteInternal(src, true);
-      getEditLog().logSync();
       if (status && auditLog.isInfoEnabled()) {
         logAuditEvent(UserGroupInformation.getCurrentUGI(),
                       Server.getRemoteIp(),
@@ -1396,25 +1399,68 @@
     }
     
   /**
-   * Remove the indicated filename from the namespace.  This may
-   * invalidate some blocks that make up the file.
+   * Remove a file/directory from the namespace.
+   * <p>
+   * For large directories, deletion is incremental. The blocks under
+   * the directory are collected and deleted a small number at a time holding
+   * the {@link FSNamesystem} lock.
+   * <p>
+   * For small directory or file the deletion is done in one shot.
    */
-  synchronized boolean deleteInternal(String src, 
+  private boolean deleteInternal(String src, 
       boolean enforcePermission) throws IOException {
-    if (NameNode.stateChangeLog.isDebugEnabled()) {
-      NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src);
+    boolean deleteNow = false;
+    ArrayList<Block> collectedBlocks = new ArrayList<Block>();
+    synchronized(this) {
+      if (isInSafeMode()) {
+        throw new SafeModeException("Cannot delete " + src, safeMode);
+      }
+      if (enforcePermission && isPermissionEnabled) {
+        checkPermission(src, false, null, FsAction.WRITE, null, FsAction.ALL);
+      }
+      // Unlink the target directory from directory tree
+      if (!dir.delete(src, collectedBlocks)) {
+        return false;
+      }
+      deleteNow = collectedBlocks.size() <= BLOCK_DELETION_INCREMENT;
+      if (deleteNow) { // Perform small deletes right away
+        removeBlocks(collectedBlocks);
+      }
     }
-    if (isInSafeMode())
-      throw new SafeModeException("Cannot delete " + src, safeMode);
-    if (enforcePermission && isPermissionEnabled) {
-      checkPermission(src, false, null, FsAction.WRITE, null, FsAction.ALL);
+    // Log directory deletion to editlog
+    getEditLog().logSync();
+    if (!deleteNow) {
+      removeBlocks(collectedBlocks); // Incremental deletion of blocks
     }
-
-    return dir.delete(src) != null;
+    collectedBlocks.clear();
+    if (NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("DIR* Namesystem.delete: "
+        + src +" is removed");
+    }
+    return true;
   }
 
+  /** From the given list, incrementally remove the blocks from blockManager */
+  private void removeBlocks(List<Block> blocks) {
+    int start = 0;
+    int end = 0;
+    while (start < blocks.size()) {
+      end = BLOCK_DELETION_INCREMENT + start;
+      end = end > blocks.size() ? blocks.size() : end;
+      synchronized(this) {
+        for (int i=start; i<end; i++) {
+          blockManager.removeBlock(blocks.get(i));
+        }
+      }
+      start = end;
+    }
+  }
+  
   void removePathAndBlocks(String src, List<Block> blocks) {
     leaseManager.removeLeaseWithPrefixPath(src);
+    if (blocks == null) {
+      return;
+    }
     for(Block b : blocks) {
       blockManager.removeBlock(b);
     }

Modified: hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java?rev=811185&r1=811184&r2=811185&view=diff
==============================================================================
--- hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java (original)
+++ hadoop/hdfs/trunk/src/java/org/apache/hadoop/hdfs/server/namenode/INodeFile.java Fri Sep
 4 00:20:56 2009
@@ -112,8 +112,9 @@
 
   int collectSubtreeBlocksAndClear(List<Block> v) {
     parent = null;
-    for (Block blk : blocks) {
+    for (BlockInfo blk : blocks) {
       v.add(blk);
+      blk.setINode(null);
     }
     blocks = null;
     return 1;

Added: hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java
URL: http://svn.apache.org/viewvc/hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java?rev=811185&view=auto
==============================================================================
--- hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java
(added)
+++ hadoop/hdfs/trunk/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestLargeDirectoryDelete.java
Fri Sep  4 00:20:56 2009
@@ -0,0 +1,152 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+/**
+ * Ensure during large directory delete, namenode does not block until the 
+ * deletion completes and handles new requests from other clients
+ */
+public class TestLargeDirectoryDelete {
+  private static final Log LOG = LogFactory.getLog(TestLargeDirectoryDelete.class);
+  private static final Configuration CONF = new Configuration();
+  private static final int TOTAL_BLOCKS = 10000;
+  private MiniDFSCluster mc = null;
+  private int createOps = 0;
+  private int lockOps = 0;
+  
+  static {
+    CONF.setLong("dfs.block.size", 1);
+    CONF.setInt("io.bytes.per.checksum", 1);
+  }
+  
+  /** create a file with a length of <code>filelen</code> */
+  private void createFile(final String fileName, final long filelen) throws IOException {
+    FileSystem fs = mc.getFileSystem();
+    Path filePath = new Path(fileName);
+    DFSTestUtil.createFile(fs, filePath, filelen, (short) 1, 0);
+  }
+  
+  /** Create a large number of directories and files */
+  private void createFiles() throws IOException {
+    Random rand = new Random();
+    // Create files in a directory with random depth
+    // ranging from 0-10.
+    for (int i = 0; i < TOTAL_BLOCKS; i+=100) {
+      String filename = "/root/";
+      int dirs = rand.nextInt(10);  // Depth of the directory
+      for (int j=i; j >=(i-dirs); j--) {
+        filename += j + "/";
+      }
+      filename += "file" + i;
+      createFile(filename, 100);
+    }
+  }
+  
+  private int getBlockCount() {
+    return (int)mc.getNamesystem().getBlocksTotal();
+  }
+
+  /** Run multiple threads doing simultaneous operations on the namenode
+   * while a large directory is being deleted.
+   */
+  private void runThreads() throws IOException {
+    final Thread threads[] = new Thread[2];
+    
+    // Thread for creating files
+    threads[0] = new Thread() {
+      @Override
+      public void run() {
+        while(true) {
+          try {
+            int blockcount = getBlockCount();
+            if (blockcount < TOTAL_BLOCKS && blockcount > 0) {
+              String file = "/tmp" + createOps;
+              createFile(file, 1);
+              mc.getFileSystem().delete(new Path(file), true);
+              createOps++;
+            }
+          } catch (IOException ex) {
+            LOG.info("createFile exception ", ex);
+            break;
+          }
+        }
+      }
+    };
+    
+    // Thread that periodically acquires the FSNamesystem lock
+    threads[1] = new Thread() {
+      @Override
+      public void run() {
+        while(true) {
+          try {
+            int blockcount = getBlockCount();
+            if (blockcount < TOTAL_BLOCKS && blockcount > 0) {
+              synchronized(mc.getNamesystem()) {
+                lockOps++;
+              }
+              Thread.sleep(1);
+            }
+          } catch (InterruptedException ex) {
+            LOG.info("lockOperation exception ", ex);
+            break;
+          }
+        }
+      }
+    };
+    threads[0].start();
+    threads[1].start();
+    
+    final long start = System.currentTimeMillis();
+    FSNamesystem.BLOCK_DELETION_INCREMENT = 1;
+    mc.getFileSystem().delete(new Path("/root"), true); // recursive delete
+    final long end = System.currentTimeMillis();
+    threads[0].interrupt();
+    threads[1].interrupt();
+    LOG.info("Deletion took " + (end - start) + "msecs");
+    LOG.info("createOperations " + createOps);
+    LOG.info("lockOperations " + lockOps);
+    Assert.assertTrue(lockOps + createOps > 0);
+  }
+  
+  @Test
+  public void largeDelete() throws IOException, InterruptedException {
+    mc = new MiniDFSCluster(CONF, 1, true, null);
+    try {
+      mc.waitActive();
+      createFiles();
+      Assert.assertEquals(TOTAL_BLOCKS, getBlockCount());
+      runThreads();
+    } finally {
+      mc.shutdown();
+    }
+  }
+}
\ No newline at end of file



Mime
View raw message