hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dhr...@apache.org
Subject svn commit: r1040414 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/fs/TestHarFileSystem.java src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java src/tools/org/apache/hadoop/fs/HarFileSystem.java
Date Tue, 30 Nov 2010 05:52:40 GMT
Author: dhruba
Date: Tue Nov 30 05:52:40 2010
New Revision: 1040414

URL: http://svn.apache.org/viewvc?rev=1040414&view=rev
Log:
MAPREDUCE-1752. Implement getFileBlockLocations in HarFilesystem.
(Patrick Kling via dhruba)


Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/fs/TestHarFileSystem.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/fs/HarFileSystem.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=1040414&r1=1040413&r2=1040414&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Nov 30 05:52:40 2010
@@ -19,6 +19,9 @@ Trunk (unreleased changes)
     MAPREDUCE-2200. TestUmbilicalProtocolWithJobToken is failing without Krb
     evironment: needs to be conditional. (cos)
 
+    MAPREDUCE-1752. Implement getFileBlockLocations in HarFilesystem.
+    (Patrick Kling via dhruba)
+
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/fs/TestHarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/fs/TestHarFileSystem.java?rev=1040414&r1=1040413&r2=1040414&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/fs/TestHarFileSystem.java (original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/fs/TestHarFileSystem.java Tue
Nov 30 05:52:40 2010
@@ -22,6 +22,7 @@ import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.junit.Assert;
+import static org.junit.Assert.*;
 import org.junit.Test;
 
 public class TestHarFileSystem {
@@ -52,4 +53,84 @@ public class TestHarFileSystem {
     final HarFileSystem harfs = new HarFileSystem();
     Assert.assertEquals(null, harfs.getFileChecksum(p));
   }
+
+  /**
+   * Test how block location offsets and lengths are fixed.
+   */
+  @Test
+  public void testFixBlockLocations() {
+    // do some tests where start == 0
+    {
+      // case 1: range starts before current har block and ends after
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 0, 20, 5);
+      assertEquals(b[0].getOffset(), 5);
+      assertEquals(b[0].getLength(), 10);
+    }
+    {
+      // case 2: range starts in current har block and ends after
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 0, 20, 15);
+      assertEquals(b[0].getOffset(), 0);
+      assertEquals(b[0].getLength(), 5);
+    }
+    {
+      // case 3: range starts before current har block and ends in
+      // current har block
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 0, 10, 5);
+      assertEquals(b[0].getOffset(), 5);
+      assertEquals(b[0].getLength(), 5);
+    }
+    {
+      // case 4: range starts and ends in current har block
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 0, 6, 12);
+      assertEquals(b[0].getOffset(), 0);
+      assertEquals(b[0].getLength(), 6);
+    }
+
+    // now try a range where start == 3
+    {
+      // case 5: range starts before current har block and ends after
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 3, 20, 5);
+      assertEquals(b[0].getOffset(), 5);
+      assertEquals(b[0].getLength(), 10);
+    }
+    {
+      // case 6: range starts in current har block and ends after
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 3, 20, 15);
+      assertEquals(b[0].getOffset(), 3);
+      assertEquals(b[0].getLength(), 2);
+    }
+    {
+      // case 7: range starts before current har block and ends in
+      // current har block
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 3, 7, 5);
+      assertEquals(b[0].getOffset(), 5);
+      assertEquals(b[0].getLength(), 5);
+    }
+    {
+      // case 8: range starts and ends in current har block
+      BlockLocation[] b = { new BlockLocation(null, null, 10, 10) };
+      HarFileSystem.fixBlockLocations(b, 3, 3, 12);
+      assertEquals(b[0].getOffset(), 3);
+      assertEquals(b[0].getLength(), 3);
+    }
+
+    // test case from JIRA MAPREDUCE-1752
+    {
+      BlockLocation[] b = { new BlockLocation(null, null, 512, 512),
+                            new BlockLocation(null, null, 1024, 512) };
+      HarFileSystem.fixBlockLocations(b, 0, 512, 896);
+      assertEquals(b[0].getOffset(), 0);
+      assertEquals(b[0].getLength(), 128);
+      assertEquals(b[1].getOffset(), 128);
+      assertEquals(b[1].getLength(), 384);
+    }
+
+  }
 }

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java?rev=1040414&r1=1040413&r2=1040414&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
(original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
Tue Nov 30 05:52:40 2010
@@ -25,6 +25,7 @@ import java.util.Iterator;
 import junit.framework.TestCase;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FsShell;
@@ -367,6 +368,40 @@ public class TestHarFileSystem extends T
     assertTrue("number of bytes left should be -1", reduceIn.read(b) == -1);
     reduceIn.close();
   }
+  
+  public void testGetFileBlockLocations() throws Exception {
+    fs.delete(archivePath, true);
+    Configuration conf = mapred.createJobConf();
+    HadoopArchives har = new HadoopArchives(conf);
+    String[] args = new String[8];
+    args[0] = "-Dhar.block.size=512";
+    args[1] = "-Dhar.partfile.size=1";
+    args[2] = "-archiveName";
+    args[3] = "foo bar.har";
+    args[4] = "-p";
+    args[5] = fs.getHomeDirectory().toString();
+    args[6] = "test";
+    args[7] = archivePath.toString();
+    int ret = ToolRunner.run(har, args);
+    assertTrue("failed test", ret == 0);
+    Path finalPath = new Path(archivePath, "foo bar.har");
+    Path fsPath = new Path(inputPath.toUri().getPath());
+    Path filePath = new Path(finalPath, "test");
+    Path filea = new Path(filePath, "a");
+    // make it a har path
+    Path harPath = new Path("har://" + filea.toUri().getPath());
+    FileSystem harFs = harPath.getFileSystem(conf);
+    FileStatus[] statuses = harFs.listStatus(filePath);
+    for (FileStatus status : statuses) {
+      BlockLocation[] locations =
+        harFs.getFileBlockLocations(status, 0, status.getLen());
+      long lastOffset = 0;
+      assertEquals("Only one block location expected for files this small",
+                   1, locations.length);
+      assertEquals("Block location should start at offset 0",
+                   0, locations[0].getOffset());
+    }
+  }
 
   public void testSpaces() throws Exception {
      fs.delete(archivePath, true);

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/fs/HarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/fs/HarFileSystem.java?rev=1040414&r1=1040413&r2=1040414&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/fs/HarFileSystem.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/fs/HarFileSystem.java Tue Nov 30 05:52:40
2010
@@ -336,24 +336,74 @@ public class HarFileSystem extends Filte
     //change this to Har uri 
     return new Path(uri.getScheme(), harAuth, tmpURI.getPath());
   }
+
+  /**
+   * Fix offset and length of block locations.
+   * Note that this method modifies the original array.
+   * @param locations block locations of har part file
+   * @param start the start of the desired range in the contained file
+   * @param len the length of the desired range
+   * @param fileOffsetInHar the offset of the desired file in the har part file
+   * @return block locations with fixed offset and length
+   */  
+  static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
+                                          long start,
+                                          long len,
+                                          long fileOffsetInHar) {
+    // offset 1 past last byte of desired range
+    long end = start + len;
+
+    for (BlockLocation location : locations) {
+      // offset of part block relative to beginning of desired file
+      // (may be negative if file starts in this part block)
+      long harBlockStart = location.getOffset() - fileOffsetInHar;
+      // offset 1 past last byte of har block relative to beginning of
+      // desired file
+      long harBlockEnd = harBlockStart + location.getLength();
+      
+      if (start > harBlockStart) {
+        // desired range starts after beginning of this har block
+        // fix offset to beginning of relevant range (relative to desired file)
+        location.setOffset(start);
+        // fix length to relevant portion of har block
+        location.setLength(location.getLength() - (start - harBlockStart));
+      } else {
+        // desired range includes beginning of this har block
+        location.setOffset(harBlockStart);
+      }
+      
+      if (harBlockEnd > end) {
+        // range ends before end of this har block
+        // fix length to remove irrelevant portion at the end
+        location.setLength(location.getLength() - (harBlockEnd - end));
+      }
+    }
+    
+    return locations;
+  }
   
   /**
-   * get block locations from the underlying fs
+   * Get block locations from the underlying fs and fix their
+   * offsets and lengths.
    * @param file the input filestatus to get block locations
-   * @param start the start in the file
-   * @param len the length in the file
+   * @param start the start of the desired range in the contained file
+   * @param len the length of the desired range
    * @return block locations for this segment of file
    * @throws IOException
    */
   @Override
   public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
-      long len) throws IOException {
-    // just fake block locations
-    // its fast and simpler
-    // doing various block location manipulation
-    // with part files adds a lot of overhead because 
-    // of the look ups of filestatus in index files
-    return new BlockLocation[]{ new BlockLocation() };
+                                               long len) throws IOException {
+    HarStatus hstatus = getFileHarStatus(file.getPath());
+    Path partPath = new Path(archivePath, hstatus.getPartName());
+    FileStatus partStatus = fs.getFileStatus(partPath);
+
+    // get all part blocks that overlap with the desired file blocks
+    BlockLocation[] locations = 
+      fs.getFileBlockLocations(partStatus,
+                               hstatus.getStartIndex() + start, len);
+
+    return fixBlockLocations(locations, start, len, hstatus.getStartIndex());
   }
   
   /**
@@ -636,6 +686,11 @@ public class HarFileSystem extends Filte
    */
   @Override
   public FileStatus getFileStatus(Path f) throws IOException {
+    HarStatus hstatus = getFileHarStatus(f);
+    return toFileStatus(hstatus, null);
+  }
+
+  private HarStatus getFileHarStatus(Path f) throws IOException {
     // get the fs DataInputStream for the underlying file
     // look up the index.
     Path p = makeQualified(f);
@@ -647,11 +702,8 @@ public class HarFileSystem extends Filte
     if (readStr == null) {
       throw new FileNotFoundException("File: " +  f + " does not exist in " + uri);
     }
-    HarStatus hstatus = null;
-    hstatus = new HarStatus(readStr);
-    return toFileStatus(hstatus, null);
+    return new HarStatus(readStr);
   }
-
   /**
    * @return null since no checksum algorithm is implemented.
    */
@@ -667,17 +719,7 @@ public class HarFileSystem extends Filte
   @Override
   public FSDataInputStream open(Path f, int bufferSize) throws IOException {
     // get the fs DataInputStream for the underlying file
-    // look up the index.
-    Path p = makeQualified(f);
-    Path harPath = getPathInHar(p);
-    if (harPath == null) {
-      throw new IOException("Invalid file name: " + f + " in " + uri);
-    }
-    String readStr = fileStatusInIndex(harPath);
-    if (readStr == null) {
-      throw new FileNotFoundException(f + ": not found in " + archivePath);
-    }
-    HarStatus hstatus = new HarStatus(readStr); 
+    HarStatus hstatus = getFileHarStatus(f);
     // we got it.. woo hooo!!! 
     if (hstatus.isDir()) {
       throw new FileNotFoundException(f + " : not a file in " +



Mime
View raw message