hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s..@apache.org
Subject svn commit: r654931 - in /hadoop/core/trunk: ./ src/java/org/apache/hadoop/dfs/
Date Fri, 09 May 2008 20:19:15 GMT
Author: shv
Date: Fri May  9 13:19:14 2008
New Revision: 654931

URL: http://svn.apache.org/viewvc?rev=654931&view=rev
Log:
HADOOP-3364. Faster image and log edits loading. Contributed by Konstantin Shvachko.

Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
    hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
    hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
    hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
    hadoop/core/trunk/src/java/org/apache/hadoop/dfs/INode.java
    hadoop/core/trunk/src/java/org/apache/hadoop/dfs/LeaseManager.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Fri May  9 13:19:14 2008
@@ -128,6 +128,8 @@
     HADOOP-3297. Fetch more task completion events from the job
     tracker and task tracker. (ddas via omalley)
 
+    HADOOP-3364. Faster image and log edits loading. (shv)
+
   BUG FIXES
 
     HADOOP-2905. 'fsck -move' triggers NPE in NameNode. 

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSDirectory.java Fri May  9 13:19:14
2008
@@ -208,6 +208,43 @@
     }
   }
 
+  INodeDirectory addToParent( String src,
+                              INodeDirectory parentINode,
+                              PermissionStatus permissions,
+                              Block[] blocks, 
+                              short replication,
+                              long modificationTime,
+                              long preferredBlockSize) {
+    // create new inode
+    INode newNode;
+    if (blocks == null)
+      newNode = new INodeDirectory(permissions, modificationTime);
+    else 
+      newNode = new INodeFile(permissions, blocks.length, replication,
+                              modificationTime, preferredBlockSize);
+    // add new node to the parent
+    INodeDirectory newParent = null;
+    synchronized (rootDir) {
+      try {
+        newParent = rootDir.addToParent(src, newNode, parentINode, false);
+      } catch (FileNotFoundException e) {
+        return null;
+      }
+      if(newParent == null)
+        return null;
+      totalInodes++;
+      if(blocks != null) {
+        int nrBlocks = blocks.length;
+        // Add file->block mapping
+        INodeFile newF = (INodeFile)newNode;
+        for (int i = 0; i < nrBlocks; i++) {
+          newF.setBlock(i, namesystem.blocksMap.addINode(blocks[i], newF));
+        }
+      }
+    }
+    return newParent;
+  }
+
   /**
    * Add a block to the file. Returns a reference to the added block.
    */
@@ -248,16 +285,14 @@
   /**
    * Close file.
    */
-  void closeFile(String path, INode file) throws IOException {
+  void closeFile(String path, INodeFile file) throws IOException {
     waitForReady();
 
     synchronized (rootDir) {
-      INodeFile fileNode = (INodeFile) file;
-
       // file is closed
-      fsImage.getEditLog().logCloseFile(path, fileNode);
+      fsImage.getEditLog().logCloseFile(path, file);
       NameNode.stateChangeLog.debug("DIR* FSDirectory.closeFile: "
-                                    +path+" with "+ fileNode.getBlocks().length 
+                                    +path+" with "+ file.getBlocks().length 
                                     +" blocks is persisted to the file system");
     }
   }
@@ -492,7 +527,7 @@
     synchronized (rootDir) {
       INode targetNode = rootDir.getNode(src);
       if (targetNode == null) {
-        NameNode.stateChangeLog.warn("DIR* FSDirectory.unprotectedDelete: "
+        NameNode.stateChangeLog.debug("DIR* FSDirectory.unprotectedDelete: "
                                      +"failed to remove "+src+" because it does not exist");
         return null;
       } else {
@@ -693,10 +728,10 @@
   /**
    */
   INode unprotectedMkdir(String src, PermissionStatus permissions,
-      boolean inheritPermission, long timestamp) throws FileNotFoundException {
+                          long timestamp) throws FileNotFoundException {
     synchronized (rootDir) {
-      INode newNode = rootDir.addNode(src, new INodeDirectory(permissions, 
-                                      timestamp), inheritPermission);
+      INode newNode = rootDir.addNode(src,
+                                new INodeDirectory(permissions, timestamp));
       if (newNode != null) {
         totalInodes++;
       }

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSEditLog.java Fri May  9 13:19:14 2008
@@ -384,38 +384,40 @@
     INode old = null;
     String clientName = null;
     String clientMachine = null;
-	DatanodeDescriptor lastLocations[] = null;
-    
+    DatanodeDescriptor lastLocations[] = null;
+    String path = null;
+    int numOpAdd = 0, numOpClose = 0, numOpDelete = 0,
+        numOpRename = 0, numOpSetRepl = 0, numOpMkDir = 0,
+        numOpSetPerm = 0, numOpSetOwner = 0, numOpSetGenStamp = 0,
+        numOpOther = 0;
+    long startTime = FSNamesystem.now();
+
     if (edits != null) {
-      DataInputStream in = new DataInputStream(
-                                               new BufferedInputStream(
-                                                                       new FileInputStream(edits)));
-      // Read log file version. Could be missing. 
-      in.mark(4);
-      // If edits log is greater than 2G, available method will return negative
-      // numbers, so we avoid having to call available
-      boolean available = true;
+      DataInputStream in = new DataInputStream(new BufferedInputStream(
+                                new FileInputStream(edits)));
       try {
-        logVersion = in.readByte();
-      } catch (EOFException e) {
-        available = false;
-      }
-      if (available) {
-        in.reset();
-        if (logVersion >= 0)
-          logVersion = 0;
-        else
+        // Read log file version. Could be missing. 
+        in.mark(4);
+        // If edits log is greater than 2G, available method will return negative
+        // numbers, so we avoid having to call available
+        boolean available = true;
+        try {
+          logVersion = in.readByte();
+        } catch (EOFException e) {
+          available = false;
+        }
+        if (available) {
+          in.reset();
           logVersion = in.readInt();
-        if (logVersion < FSConstants.LAYOUT_VERSION) // future version
-          throw new IOException(
-                                "Unexpected version of the file system log file: "
-                                + logVersion
-                                + ". Current version = " 
-                                + FSConstants.LAYOUT_VERSION + ".");
-      }
-      
-      short replication = fsNamesys.getDefaultReplication();
-      try {
+          if (logVersion < FSConstants.LAYOUT_VERSION) // future version
+            throw new IOException(
+                            "Unexpected version of the file system log file: "
+                            + logVersion + ". Current version = " 
+                            + FSConstants.LAYOUT_VERSION + ".");
+        }
+        assert logVersion <= Storage.LAST_UPGRADABLE_LAYOUT_VERSION :
+                              "Unsupported version " + logVersion;
+  
         while (true) {
           long timestamp = 0;
           long mtime = 0;
@@ -430,44 +432,24 @@
           switch (opcode) {
           case OP_ADD:
           case OP_CLOSE: {
-            UTF8 name = new UTF8();
-            String path = null;
-            ArrayWritable aw = null;
-            Writable writables[];
-            // version 0 does not support per file replication
-            if (logVersion >= 0)
-              name.readFields(in);  // read name only
-            else {  // other versions do
-              // get name and replication
-              aw = new ArrayWritable(UTF8.class);
-              aw.readFields(in);
-              writables = aw.get(); 
-              if (-4 <= logVersion && writables.length != 2 ||
-                  -7 <= logVersion && logVersion < -4 && writables.length
!= 3||
-                  logVersion < -7 && writables.length != 4) {
-                  throw new IOException("Incorrect data format."  +
-                                        " logVersion is " + logVersion +
-                                        " but writables.length is " +
-                                        writables.length + ". ");
-              }
-              name = (UTF8) writables[0];
-              path = name.toString();
-              replication = Short.parseShort(
-                                             ((UTF8)writables[1]).toString());
-              replication = adjustReplication(replication);
-              if (logVersion < -4) {
-                mtime = Long.parseLong(((UTF8)writables[2]).toString());
-              }
-              if (logVersion < -7) {
-                blockSize = Long.parseLong(((UTF8)writables[3]).toString());
-              }
+            // versions > 0 support per file replication
+            // get name and replication
+            int length = in.readInt();
+            if (-7 == logVersion && length != 3||
+                logVersion < -7 && length != 4) {
+                throw new IOException("Incorrect data format."  +
+                                      " logVersion is " + logVersion +
+                                      " but writables.length is " +
+                                      length + ". ");
+            }
+            path = FSImage.readString(in);
+            short replication = adjustReplication(readShort(in));
+            mtime = readLong(in);
+            if (logVersion < -7) {
+              blockSize = readLong(in);
             }
             // get blocks
-            aw = new ArrayWritable(Block.class);
-            aw.readFields(in);
-            writables = aw.get();
-            Block blocks[] = new Block[writables.length];
-            System.arraycopy(writables, 0, blocks, 0, blocks.length);
+            Block blocks[] = readBlocks(in);
 
             // Older versions of HDFS does not store the block size in inode.
             // If the file has more than one block, use the size of the
@@ -488,26 +470,21 @@
             }
 
             // clientname, clientMachine and block locations of last block.
-            clientName = "";
-            clientMachine = "";
             lastLocations = null;
             if (opcode == OP_ADD && logVersion <= -12) {
-              UTF8 uu = new UTF8();
-              UTF8 cl = new UTF8();
-              uu.readFields(in);
-              cl.readFields(in);
+              clientName = FSImage.readString(in);
+              clientMachine = FSImage.readString(in);
               lastLocations = readDatanodeDescriptorArray(in);
-              clientName = uu.toString();
-              clientMachine = cl.toString();
-              writables = aw.get();
             } else {
-              lastLocations = new DatanodeDescriptor[0];
+              clientName = "";
+              clientMachine = "";
+              lastLocations = EMPTY_ARRAY_DN_DESCRIPTORS;
             }
   
             // The open lease transaction re-creates a file if necessary.
             // Delete the file if it already exists.
             if (FSNamesystem.LOG.isDebugEnabled()) {
-              FSNamesystem.LOG.debug(opcode + ": " + name.toString() + 
+              FSNamesystem.LOG.debug(opcode + ": " + path + 
                                      " numblocks : " + blocks.length +
                                      " clientHolder " +  clientName +
                                      " clientMachine " + clientMachine);
@@ -521,12 +498,13 @@
                                                       blocks, replication, 
                                                       mtime, blockSize);
             if (opcode == OP_ADD) {
+              numOpAdd++;
               //
               // Replace current node with a INodeUnderConstruction.
               // Recreate in-memory lease record.
               //
               INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
-                                        INode.string2Bytes(node.getLocalName()),
+                                        node.getLocalNameBytes(),
                                         node.getReplication(), 
                                         node.getModificationTime(),
                                         node.getPreferredBlockSize(),
@@ -551,134 +529,95 @@
             break;
           } 
           case OP_SET_REPLICATION: {
-            UTF8 src = new UTF8();
-            UTF8 repl = new UTF8();
-            src.readFields(in);
-            repl.readFields(in);
-            replication = adjustReplication(fromLogReplication(repl));
-            fsDir.unprotectedSetReplication(src.toString(), 
-                                            replication,
-                                            null);
+            numOpSetRepl++;
+            path = FSImage.readString(in);
+            short replication = adjustReplication(readShort(in));
+            fsDir.unprotectedSetReplication(path, replication, null);
             break;
           } 
           case OP_RENAME: {
-            UTF8 src = null;
-            UTF8 dst = null;
-            if (logVersion >= -4) {
-              src = new UTF8();
-              dst = new UTF8();
-              src.readFields(in);
-              dst.readFields(in);
-            } else {
-              ArrayWritable aw = null;
-              Writable writables[];
-              aw = new ArrayWritable(UTF8.class);
-              aw.readFields(in);
-              writables = aw.get(); 
-              if (writables.length != 3) {
-                throw new IOException("Incorrect data format. " 
-                                      + "Mkdir operation.");
-              }
-              src = (UTF8) writables[0];
-              dst = (UTF8) writables[1];
-              timestamp = Long.parseLong(((UTF8)writables[2]).toString());
+            numOpRename++;
+            int length = in.readInt();
+            if (length != 3) {
+              throw new IOException("Incorrect data format. " 
+                                    + "Mkdir operation.");
             }
-            String s = src.toString();
-            String d = dst.toString();
+            String s = FSImage.readString(in);
+            String d = FSImage.readString(in);
+            timestamp = readLong(in);
             fsDir.unprotectedRenameTo(s, d, timestamp);
             fsNamesys.changeLease(s, d);
             break;
           }
           case OP_DELETE: {
-            String src = null;
-            if (logVersion >= -4) {
-              UTF8 srcUtf8 = new UTF8();
-              srcUtf8.readFields(in);
-              src = srcUtf8.toString();
-            } else {
-              ArrayWritable aw = null;
-              Writable writables[];
-              aw = new ArrayWritable(UTF8.class);
-              aw.readFields(in);
-              writables = aw.get(); 
-              if (writables.length != 2) {
-                throw new IOException("Incorrect data format. " 
-                                      + "delete operation.");
-              }
-              src = writables[0].toString();
-              timestamp = Long.parseLong(writables[1].toString());
+            numOpDelete++;
+            int length = in.readInt();
+            if (length != 2) {
+              throw new IOException("Incorrect data format. " 
+                                    + "delete operation.");
             }
-            old = fsDir.unprotectedDelete(src, timestamp, null);
+            path = FSImage.readString(in);
+            timestamp = readLong(in);
+            old = fsDir.unprotectedDelete(path, timestamp, null);
             if (old != null && old.isUnderConstruction()) {
               INodeFileUnderConstruction cons = (INodeFileUnderConstruction)old;
-              fsNamesys.leaseManager.removeLease(src, cons.getClientName());
+              fsNamesys.leaseManager.removeLease(path, cons.getClientName());
             }
             break;
           }
           case OP_MKDIR: {
-            UTF8 src = null;
+            numOpMkDir++;
             PermissionStatus permissions = fsNamesys.getUpgradePermission();
-            if (logVersion >= -4) {
-              src = new UTF8();
-              src.readFields(in);
-            } else {
-              ArrayWritable aw = null;
-              Writable writables[];
-              aw = new ArrayWritable(UTF8.class);
-              aw.readFields(in);
-              writables = aw.get(); 
-              if (writables.length != 2) {
-                throw new IOException("Incorrect data format. " 
-                                      + "Mkdir operation.");
-              }
-              src = (UTF8) writables[0];
-              timestamp = Long.parseLong(((UTF8)writables[1]).toString());
+            int length = in.readInt();
+            if (length != 2) {
+              throw new IOException("Incorrect data format. " 
+                                    + "Mkdir operation.");
+            }
+            path = FSImage.readString(in);
+            timestamp = readLong(in);
 
-              if (logVersion <= -11) {
-                permissions = PermissionStatus.read(in);
-              }
+            if (logVersion <= -11) {
+              permissions = PermissionStatus.read(in);
             }
-            fsDir.unprotectedMkdir(src.toString(),permissions,false,timestamp);
+            fsDir.unprotectedMkdir(path, permissions, timestamp);
             break;
           }
           case OP_SET_GENSTAMP: {
-            LongWritable aw = new LongWritable();
-            aw.readFields(in);
-            fsDir.namesystem.setGenerationStamp(aw.get());
+            numOpSetGenStamp++;
+            long lw = in.readLong();
+            fsDir.namesystem.setGenerationStamp(lw);
             break;
           } 
           case OP_DATANODE_ADD: {
-            if (logVersion > -3)
-              throw new IOException("Unexpected opcode " + opcode 
-                                    + " for version " + logVersion);
+            numOpOther++;
             FSImage.DatanodeImage nodeimage = new FSImage.DatanodeImage();
             nodeimage.readFields(in);
             //Datnodes are not persistent any more.
             break;
           }
           case OP_DATANODE_REMOVE: {
-            if (logVersion > -3)
-              throw new IOException("Unexpected opcode " + opcode 
-                                    + " for version " + logVersion);
+            numOpOther++;
             DatanodeID nodeID = new DatanodeID();
             nodeID.readFields(in);
             //Datanodes are not persistent any more.
             break;
           }
           case OP_SET_PERMISSIONS: {
+            numOpSetPerm++;
             if (logVersion > -11)
               throw new IOException("Unexpected opcode " + opcode
                                     + " for version " + logVersion);
             fsDir.unprotectedSetPermission(
-                readUTF8String(in), FsPermission.read(in));
+                FSImage.readString(in), FsPermission.read(in));
             break;
           }
           case OP_SET_OWNER: {
+            numOpSetOwner++;
             if (logVersion > -11)
               throw new IOException("Unexpected opcode " + opcode
                                     + " for version " + logVersion);
-            fsDir.unprotectedSetOwner(
-                readUTF8String(in), readUTF8String(in), readUTF8String(in));
+            fsDir.unprotectedSetOwner(FSImage.readString(in),
+                FSImage.readString(in), FSImage.readString(in));
             break;
           }
           default: {
@@ -689,19 +628,25 @@
       } finally {
         in.close();
       }
+      FSImage.LOG.info("Edits file " + edits.getName() 
+          + " of size " + edits.length() + " edits # " + numEdits 
+          + " loaded in " + (FSNamesystem.now()-startTime)/1000 + " seconds.");
     }
-    
+
+    if (FSImage.LOG.isDebugEnabled()) {
+      FSImage.LOG.debug("numOpAdd = " + numOpAdd + " numOpClose = " + numOpClose 
+          + " numOpDelete = " + numOpDelete + " numOpRename = " + numOpRename 
+          + " numOpSetRepl = " + numOpSetRepl + " numOpMkDir = " + numOpMkDir
+          + " numOpSetPerm = " + numOpSetPerm 
+          + " numOpSetOwner = " + numOpSetOwner
+          + " numOpSetGenStamp = " + numOpSetGenStamp 
+          + " numOpOther = " + numOpOther);
+    }
+
     if (logVersion != FSConstants.LAYOUT_VERSION) // other version
       numEdits++; // save this image asap
     return numEdits;
   }
-  
-  private String readUTF8String(DataInputStream in) throws IOException {
-    UTF8 utf8 = new UTF8();
-    utf8.readFields(in);
-    String s = utf8.toString();
-    return s.length() == 0? null: s;
-  }
 
   static short adjustReplication(short replication) {
     FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
@@ -953,15 +898,11 @@
     logEdit(OP_SET_GENSTAMP, new LongWritable(genstamp));
   }
   
-  static UTF8 toLogReplication(short replication) {
+  static private UTF8 toLogReplication(short replication) {
     return new UTF8(Short.toString(replication));
   }
   
-  static short fromLogReplication(UTF8 replication) {
-    return Short.parseShort(replication.toString());
-  }
-
-  static UTF8 toLogLong(long timestamp) {
+  static private UTF8 toLogLong(long timestamp) {
     return new UTF8(Long.toString(timestamp));
   }
 
@@ -1082,6 +1023,8 @@
   }
 
   /** This method is defined for compatibility reason. */
+  private static final DatanodeDescriptor[] EMPTY_ARRAY_DN_DESCRIPTORS
+                                                  = new DatanodeDescriptor[0];
   //TODO: remove this class in HADOOP-3329
   static private DatanodeDescriptor[] readDatanodeDescriptorArray(DataInput in
       ) throws IOException {
@@ -1092,4 +1035,22 @@
     }
     return locations;
   }
+
+  static private short readShort(DataInputStream in) throws IOException {
+    return Short.parseShort(FSImage.readString(in));
+  }
+
+  static private long readLong(DataInputStream in) throws IOException {
+    return Long.parseLong(FSImage.readString(in));
+  }
+
+  static private Block[] readBlocks(DataInputStream in) throws IOException {
+    int numBlocks = in.readInt();
+    Block[] blocks = new Block[numBlocks];
+    for (int i = 0; i < numBlocks; i++) {
+      blocks[i] = new Block();
+      blocks[i].readFields(in);
+    }
+    return blocks;
+  }
 }

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSImage.java Fri May  9 13:19:14 2008
@@ -39,6 +39,7 @@
 import java.lang.Math;
 import java.nio.ByteBuffer;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.conf.Configuration;
@@ -89,8 +90,8 @@
   /**
    * Used for saving the image to disk
    */
-  static private final FsPermission fileperm = new FsPermission((short)0);
-  static private final byte[] separator = INode.string2Bytes("/");
+  static private final FsPermission FILE_PERM = new FsPermission((short)0);
+  static private final byte[] PATH_SEPARATOR = INode.string2Bytes(Path.SEPARATOR);
   static private byte[] byteStore = null;
 
   /**
@@ -654,12 +655,17 @@
     }
     assert latestSD != null : "Latest storage directory was not determined.";
 
+    long startTime = FSNamesystem.now();
+    long imageSize = getImageFile(latestSD, NameNodeFile.IMAGE).length();
+
     //
     // Load in bits
     //
     latestSD.read();
     needToSave |= loadFSImage(getImageFile(latestSD, NameNodeFile.IMAGE));
 
+    LOG.info("Image file of size " + imageSize + " loaded in " 
+        + (FSNamesystem.now() - startTime)/1000 + " seconds.");
     //
     // read in the editlog from the same directory from
     // which we read in the image
@@ -685,9 +691,8 @@
     // Load in bits
     //
     boolean needToSave = true;
-    DataInputStream in = new DataInputStream(
-                                             new BufferedInputStream(
-                                                                     new FileInputStream(curFile)));
+    DataInputStream in = new DataInputStream(new BufferedInputStream(
+                              new FileInputStream(curFile)));
     try {
       /*
        * Note: Remove any checks for version earlier than 
@@ -719,11 +724,16 @@
 
       // read file info
       short replication = FSNamesystem.getFSNamesystem().getDefaultReplication();
+
+      LOG.info("Number of files = " + numFiles);
+
+      String path;
+      String parentPath = "";
+      INodeDirectory parentINode = fsDir.rootDir;
       for (int i = 0; i < numFiles; i++) {
-        UTF8 name = new UTF8();
         long modificationTime = 0;
         long blockSize = 0;
-        name.readFields(in);
+        path = readString(in);
         replication = in.readShort();
         replication = FSEditLog.adjustReplication(replication);
         modificationTime = in.readLong();
@@ -759,7 +769,13 @@
         if (imgVersion <= -11) {
           permissions = PermissionStatus.read(in);
         }
-        fsDir.unprotectedAddFile(name.toString(), permissions,
+        // check if the new inode belongs to the same parent
+        if(!isParent(path, parentPath)) {
+          parentINode = null;
+          parentPath = getParent(path);
+        }
+        // add new inode
+        parentINode = fsDir.addToParent(path, parentINode, permissions,
             blocks, replication, modificationTime, blockSize);
       }
       
@@ -776,6 +792,19 @@
   }
 
   /**
+   * Return string representing the parent of the given path.
+   */
+  String getParent(String path) {
+    return path.substring(0, path.lastIndexOf(Path.SEPARATOR));
+  }
+
+  private boolean isParent(String path, String parent) {
+    return parent != null && path != null
+          && path.indexOf(parent) == 0
+          && path.lastIndexOf(Path.SEPARATOR) == parent.length();
+  }
+
+  /**
    * Load and merge edits from two edits files
    * 
    * @param sd storage directory
@@ -797,6 +826,7 @@
   void saveFSImage(File newFile) throws IOException {
     FSNamesystem fsNamesys = FSNamesystem.getFSNamesystem();
     FSDirectory fsDir = fsNamesys.dir;
+    long startTime = FSNamesystem.now();
     //
     // Write out data
     //
@@ -817,6 +847,9 @@
     } finally {
       out.close();
     }
+
+    LOG.info("Image file of size " + newFile.length() + " saved in " 
+        + (FSNamesystem.now() - startTime)/1000 + " seconds.");
   }
 
   /**
@@ -887,47 +920,55 @@
 
   /**
    * Save file tree image starting from the given root.
+   * This is a recursive procedure, which first saves all children of
+   * a current directory and then moves inside the sub-directories.
    */
-  private static void saveImage(ByteBuffer parentPrefix, 
+  private static void saveImage(ByteBuffer parentPrefix,
                                 int prefixLength,
-                                INode inode, 
+                                INodeDirectory current,
                                 DataOutputStream out) throws IOException {
     int newPrefixLength = prefixLength;
-    if (inode.getParent() != null) {
-      parentPrefix.put(separator).put(inode.getLocalNameBytes());
-      newPrefixLength += separator.length + inode.getLocalNameBytes().length;
+    if (current.getChildrenRaw() == null)
+      return;
+    for(INode child : current.getChildren()) {
+    // print all children first
+      parentPrefix.position(prefixLength);
+      parentPrefix.put(PATH_SEPARATOR).put(child.getLocalNameBytes());
+      newPrefixLength = parentPrefix.position();
       out.writeShort(newPrefixLength);
       out.write(byteStore, 0, newPrefixLength);
-      if (!inode.isDirectory()) {  // write file inode
-        INodeFile fileINode = (INodeFile)inode;
+      if (!child.isDirectory()) {  // write file inode
+        INodeFile fileINode = (INodeFile)child;
         out.writeShort(fileINode.getReplication());
-        out.writeLong(inode.getModificationTime());
+        out.writeLong(fileINode.getModificationTime());
         out.writeLong(fileINode.getPreferredBlockSize());
         Block[] blocks = fileINode.getBlocks();
         out.writeInt(blocks.length);
         for (Block blk : blocks)
           blk.write(out);
-        fileperm.fromShort(fileINode.getFsPermissionShort());
+        FILE_PERM.fromShort(fileINode.getFsPermissionShort());
         PermissionStatus.write(out, fileINode.getUserName(),
                                fileINode.getGroupName(),
-                               fileperm);
-        parentPrefix.position(prefixLength);
-        return;
+                               FILE_PERM);
+        continue;
       }
       // write directory inode
       out.writeShort(0);  // replication
-      out.writeLong(inode.getModificationTime());
+      out.writeLong(child.getModificationTime());
       out.writeLong(0);   // preferred block size
       out.writeInt(-1);    // # of blocks
-      fileperm.fromShort(inode.getFsPermissionShort());
-      PermissionStatus.write(out, inode.getUserName(),
-                             inode.getGroupName(),
-                             fileperm);
-    }
-    if (((INodeDirectory)inode).getChildrenRaw() != null) {
-      for(INode child : ((INodeDirectory)inode).getChildren()) {
-        saveImage(parentPrefix, newPrefixLength, child, out);
-      }
+      FILE_PERM.fromShort(child.getFsPermissionShort());
+      PermissionStatus.write(out, child.getUserName(),
+                             child.getGroupName(),
+                             FILE_PERM);
+    }
+    for(INode child : current.getChildren()) {
+      if(!child.isDirectory())
+        continue;
+      parentPrefix.position(prefixLength);
+      parentPrefix.put(PATH_SEPARATOR).put(child.getLocalNameBytes());
+      newPrefixLength = parentPrefix.position();
+      saveImage(parentPrefix, newPrefixLength, (INodeDirectory)child, out);
     }
     parentPrefix.position(prefixLength);
   }
@@ -954,6 +995,8 @@
       return;
     int size = in.readInt();
 
+    LOG.info("Number of files under construction = " + size);
+
     for (int i = 0; i < size; i++) {
       INodeFileUnderConstruction cons = readINodeUnderConstruction(in);
 
@@ -977,9 +1020,7 @@
   //
   static INodeFileUnderConstruction readINodeUnderConstruction(
                             DataInputStream in) throws IOException {
-    UTF8 src = new UTF8();
-    src.readFields(in);
-    byte[] name = src.getBytes();
+    byte[] name = readBytes(in);
     short blockReplication = in.readShort();
     long modificationTime = in.readLong();
     long preferredBlockSize = in.readLong();
@@ -991,10 +1032,8 @@
       blocks[i] = new BlockInfo(blk, blockReplication);
     }
     PermissionStatus perm = PermissionStatus.read(in);
-    UTF8 clientName = new UTF8();
-    clientName.readFields(in);
-    UTF8 clientMachine = new UTF8();
-    clientMachine.readFields(in);
+    String clientName = readString(in);
+    String clientMachine = readString(in);
 
     int numLocs = in.readInt();
     DatanodeDescriptor[] locations = new DatanodeDescriptor[numLocs];
@@ -1009,8 +1048,8 @@
                                           preferredBlockSize,
                                           blocks,
                                           perm,
-                                          clientName.toString(),
-                                          clientMachine.toString(),
+                                          clientName,
+                                          clientMachine,
                                           null,
                                           locations);
 
@@ -1023,7 +1062,7 @@
                                            INodeFileUnderConstruction cons,
                                            String path) 
                                            throws IOException {
-    new UTF8(path).write(out);
+    writeString(path, out);
     out.writeShort(cons.getReplication());
     out.writeLong(cons.getModificationTime());
     out.writeLong(cons.getPreferredBlockSize());
@@ -1033,8 +1072,8 @@
       cons.getBlocks()[i].write(out);
     }
     cons.getPermissionStatus().write(out);
-    new UTF8(cons.getClientName()).write(out);
-    new UTF8(cons.getClientMachine()).write(out);
+    writeString(cons.getClientName(), out);
+    writeString(cons.getClientMachine(), out);
 
     int numLocs = cons.getLastBlockLocations().length;
     out.writeInt(numLocs);
@@ -1315,4 +1354,23 @@
     }
     return dirs;
   }
+
+  static private final UTF8 U_STR = new UTF8();
+  static String readString(DataInputStream in) throws IOException {
+    U_STR.readFields(in);
+    return U_STR.toString();
+  }
+
+  static byte[] readBytes(DataInputStream in) throws IOException {
+    U_STR.readFields(in);
+    int len = U_STR.getLength();
+    byte[] bytes = new byte[len];
+    System.arraycopy(U_STR.getBytes(), 0, bytes, 0, len);
+    return bytes;
+  }
+
+  static void writeString(String str, DataOutputStream out) throws IOException {
+    U_STR.set(str);
+    U_STR.write(out);
+  }
 }

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/FSNamesystem.java Fri May  9 13:19:14
2008
@@ -20,6 +20,7 @@
 import org.apache.commons.logging.*;
 
 import org.apache.hadoop.conf.*;
+import org.apache.hadoop.dfs.BlocksMap.BlockInfo;
 import org.apache.hadoop.dfs.BlocksWithLocations.BlockWithLocations;
 import org.apache.hadoop.dfs.namenode.metrics.FSNamesystemMBean;
 import org.apache.hadoop.security.UnixUserGroupInformation;
@@ -842,13 +843,13 @@
                                               boolean overwrite,
                                               short replication,
                                               long blockSize
-                                             	) throws IOException {
+                                              ) throws IOException {
     NameNode.stateChangeLog.debug("DIR* NameSystem.startFile: file "
                                   +src+" for "+holder+" at "+clientMachine);
     if (isInSafeMode())
       throw new SafeModeException("Cannot create file" + src, safeMode);
     if (!isValidName(src)) {
-      throw new IOException("Invalid file name: " + src);      	  
+      throw new IOException("Invalid file name: " + src);
     }
     if (isPermissionEnabled) {
       if (overwrite && dir.exists(src)) {
@@ -2459,14 +2460,21 @@
   synchronized Block addStoredBlock(Block block, 
                                     DatanodeDescriptor node,
                                     DatanodeDescriptor delNodeHint) {
-        
-    INodeFile fileINode = blocksMap.getINode(block);
-    int replication = (fileINode != null) ?  fileINode.getReplication() : 
-      defaultReplication;
-    boolean added = blocksMap.addNode(block, node, replication);
-        
-    Block storedBlock = blocksMap.getStoredBlock(block); //extra look up!
-    if (storedBlock != null && block != storedBlock) {
+    BlockInfo storedBlock = blocksMap.getStoredBlock(block);
+    INodeFile fileINode = null;
+    boolean added = false;
+    if(storedBlock == null) { // block is not in the blocksMaps
+      // add block to the blocksMap and to the data-node
+      added = blocksMap.addNode(block, node, defaultReplication);
+      storedBlock = blocksMap.getStoredBlock(block);
+    } else {
+      // add block to the data-node
+      added = node.addBlock(storedBlock);
+    }
+    assert storedBlock != null : "Block must be stored by now";
+
+    fileINode = storedBlock.getINode();
+    if (block != storedBlock) {
       if (block.getNumBytes() > 0) {
         long cursize = storedBlock.getNumBytes();
         if (cursize == 0) {
@@ -2519,6 +2527,7 @@
       }
       block = storedBlock;
     }
+    assert storedBlock == block : "Block must be stored by now";
         
     int curReplicaDelta = 0;
         
@@ -2552,7 +2561,7 @@
     }
 
     // filter out containingNodes that are marked for decommission.
-    NumberReplicas num = countNodes(block);
+    NumberReplicas num = countNodes(storedBlock);
     int numCurrentReplica = num.liveReplicas()
       + pendingReplications.getNumReplicas(block);
 

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/INode.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/INode.java?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/INode.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/INode.java Fri May  9 13:19:14 2008
@@ -86,6 +86,13 @@
     setLocalName(name);
   }
 
+  /**
+   * Check whether this is the root inode.
+   */
+  boolean isRoot() {
+    return name.length == 0;
+  }
+
   /** Set the {@link PermissionStatus} */
   protected void setPermissionStatus(PermissionStatus ps) {
     setUser(ps.getUserName());
@@ -519,29 +526,54 @@
    * @param newNode INode to be added
    * @param inheritPermission If true, copy the parent's permission to newNode.
    * @return null if the node already exists; inserted INode, otherwise
-   * @throws FileNotFoundException
+   * @throws FileNotFoundException if parent does not exist or 
+   * is not a directory.
    */
   <T extends INode> T addNode(String path, T newNode, boolean inheritPermission
       ) throws FileNotFoundException {
+    if(addToParent(path, newNode, null, inheritPermission) == null)
+      return null;
+    return newNode;
+  }
+
+  /**
+   * Add new inode to the parent if specified.
+   * Optimized version of addNode() if parent is not null.
+   * 
+   * @return  parent INode if new inode is inserted
+   *          or null if it already exists.
+   * @throws  FileNotFoundException if parent does not exist or 
+   *          is not a directory.
+   */
+  <T extends INode> INodeDirectory addToParent(
+                                      String path,
+                                      T newNode,
+                                      INodeDirectory parent,
+                                      boolean inheritPermission
+                                    ) throws FileNotFoundException {
     byte[][] pathComponents = getPathComponents(path);
     assert pathComponents != null : "Incorrect path " + path;
     int pathLen = pathComponents.length;
     if (pathLen < 2)  // add root
       return null;
-    // Gets the parent INode
-    INode[] inode  = new INode[2];
-    getExistingPathINodes(pathComponents, inode);
-    INode node = inode[0];
-    if (node == null) {
-      throw new FileNotFoundException("Parent path does not exist: "+path);
-    }
-    if (!node.isDirectory()) {
-      throw new FileNotFoundException("Parent path is not a directory: "+path);
+    if(parent == null) {
+      // Gets the parent INode
+      INode[] inodes  = new INode[2];
+      getExistingPathINodes(pathComponents, inodes);
+      INode inode = inodes[0];
+      if (inode == null) {
+        throw new FileNotFoundException("Parent path does not exist: "+path);
+      }
+      if (!inode.isDirectory()) {
+        throw new FileNotFoundException("Parent path is not a directory: "+path);
+      }
+      parent = (INodeDirectory)inode;
     }
-
     // insert into the parent children list
     newNode.name = pathComponents[pathLen-1];
-    return ((INodeDirectory)node).addChild(newNode, inheritPermission);
+    if(parent.addChild(newNode, inheritPermission) == null)
+      return null;
+    return parent;
   }
 
   /**
@@ -755,7 +787,6 @@
     clientName = null;
     clientMachine = null;
     clientNode = null;
-    clientNode = null;
   }
 
   INodeFileUnderConstruction(PermissionStatus permissions,

Modified: hadoop/core/trunk/src/java/org/apache/hadoop/dfs/LeaseManager.java
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/java/org/apache/hadoop/dfs/LeaseManager.java?rev=654931&r1=654930&r2=654931&view=diff
==============================================================================
--- hadoop/core/trunk/src/java/org/apache/hadoop/dfs/LeaseManager.java (original)
+++ hadoop/core/trunk/src/java/org/apache/hadoop/dfs/LeaseManager.java Fri May  9 13:19:14
2008
@@ -246,7 +246,7 @@
         // remember new filename
         String newPath = path.replaceFirst(overwrite, replaceBy);
         toAdd.add(new StringBytesWritable(newPath));
-        LOG.info("Modified Lease for file " + path +
+        LOG.debug("Modified Lease for file " + path +
                  " to new path " + newPath);
       }
       // add modified filenames back into lease.



Mime
View raw message