hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From maha...@apache.org
Subject svn commit: r931274 - in /hadoop/mapreduce/trunk: CHANGES.txt src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java src/tools/org/apache/hadoop/tools/HadoopArchives.java
Date Tue, 06 Apr 2010 19:07:34 GMT
Author: mahadev
Date: Tue Apr  6 19:07:33 2010
New Revision: 931274

URL: http://svn.apache.org/viewvc?rev=931274&view=rev
Log:
MAPREDUCE-1585. Create Hadoop Archives version 2 with filenames URL-encoded (rodrigo via mahadev)

Modified:
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
    hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
    hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=931274&r1=931273&r2=931274&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Tue Apr  6 19:07:33 2010
@@ -511,6 +511,9 @@ Trunk (unreleased changes)
     MAPREDUCE-1602. Fix the error message for the case that src does not
     exist.  (szetszwo)
 
+    MAPREDUCE-1585. Create Hadoop Archives version 2 with filenames
+    URL-encoded (rodrigo via mahadev)
+
 Release 0.21.0 - Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java?rev=931274&r1=931273&r2=931274&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
(original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
Tue Apr  6 19:07:33 2010
@@ -112,8 +112,9 @@ public class TestHadoopArchives extends 
     createFile(sub1, "x", fs);
     createFile(sub1, "y", fs);
     createFile(sub1, "z", fs);
-    final Path sub2 = new Path(inputPath, "sub 2");
+    final Path sub2 = new Path(inputPath, "sub 1 with suffix");
     fs.mkdirs(sub2);
+    createFile(sub2, "z", fs);
     final Configuration conf = mapred.createJobConf();
     final FsShell shell = new FsShell(conf);
 
@@ -125,23 +126,9 @@ public class TestHadoopArchives extends 
     final String prefix = "har://hdfs-" + uri.getHost() +":" + uri.getPort()
         + archivePath.toUri().getPath() + Path.SEPARATOR;
 
-    {//space replacement is not enabled
-      final String[] args = {
-          "-archiveName",
-          "fail.har",
-          "-p",
-          inputPathStr,
-          "*",
-          archivePath.toString()
-      };
-      final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
-      assertEquals(-1, ToolRunner.run(har, args));
-    }
-
     {//Enable space replacement
       final String harName = "foo.har";
       final String[] args = {
-          "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
           "-archiveName",
           harName,
           "-p",
@@ -154,87 +141,11 @@ public class TestHadoopArchives extends 
 
       //compare results
       final List<String> harPaths = lsr(shell, prefix + harName);
-      final List<String> replaced = replace(originalPaths, HadoopArchives.SPACE_REPLACEMENT_DEFAULT);
-      assertEquals(replaced, harPaths);
-    }
-
-    {//Replace space with space
-      final String[] args = {
-          "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
-          "-D" + HadoopArchives.SPACE_REPLACEMENT_LABEL + "=p q",
-          "-Dhar.space.replace.enable=true",
-          "-archiveName",
-          "fail.har",
-          "-p",
-          inputPathStr,
-          "*",
-          archivePath.toString()
-      };
-      final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
-      try {
-        ToolRunner.run(har, args);
-        fail();
-      } catch(IllegalArgumentException iae) {
-        System.out.println("GOOD");
-        iae.printStackTrace();
-      }
-    }
-
-    {//Replace space with Path.SEPARATOR
-      final String[] args = {
-          "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
-          "-D" + HadoopArchives.SPACE_REPLACEMENT_LABEL + "=" + Path.SEPARATOR,
-          "-Dhar.space.replace.enable=true",
-          "-archiveName",
-          "fail.har",
-          "-p",
-          inputPathStr,
-          "*",
-          archivePath.toString()
-      };
-      final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
-      try {
-        ToolRunner.run(har, args);
-        fail();
-      } catch(IllegalArgumentException iae) {
-        System.out.println("GOOD");
-        iae.printStackTrace();
-      }
+      assertEquals(originalPaths, harPaths);
     }
 
-    {//Replace space with a valid replacement
-      final String harName = "bar.har";
-      final String replacement = "+-";
-      final String[] args = {
-          "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
-          "-D" + HadoopArchives.SPACE_REPLACEMENT_LABEL + "=" + replacement,
-          "-Dhar.space.replace.enable=true",
-          "-archiveName",
-          harName,
-          "-p",
-          inputPathStr,
-          "*",
-          archivePath.toString()
-      };
-      final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
-      assertEquals(0, ToolRunner.run(har, args));
-
-      //compare results
-      final List<String> harPaths = lsr(shell, prefix + harName);
-      final List<String> replaced = replace(originalPaths, replacement);
-      assertEquals(replaced, harPaths);
-    }
   }
 
-  private static List<String> replace(List<String> paths, String replacement)
{
-    final List<String> replaced = new ArrayList<String>();
-    for(int i = 0; i < paths.size(); i++) {
-      replaced.add(paths.get(i).replace(" ", replacement));
-    }
-    Collections.sort(replaced);
-    return replaced;   
-  }
-      
   private static List<String> lsr(final FsShell shell, String dir
       ) throws Exception {
     System.out.println("lsr root=" + dir);

Modified: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java?rev=931274&r1=931273&r2=931274&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
(original)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHarFileSystem.java
Tue Apr  6 19:07:33 2010
@@ -37,6 +37,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.*;
 import org.apache.hadoop.tools.HadoopArchives;
 import org.apache.hadoop.util.ToolRunner;
+import org.mortbay.log.Log;
 
 /**
  * test the har file system
@@ -62,7 +63,7 @@ public class TestHarFileSystem extends T
         getPath().substring(1), "test");
     filea = new Path(inputPath,"a");
     fileb = new Path(inputPath,"b");
-    filec = new Path(inputPath,"c");
+    filec = new Path(inputPath,"c c");
     archivePath = new Path(fs.getHomeDirectory(), "tmp");
     fs.mkdirs(inputPath);
     FSDataOutputStream out = fs.create(filea); 
@@ -117,7 +118,7 @@ public class TestHarFileSystem extends T
   private void  checkBytes(Path harPath, Configuration conf) throws IOException {
     Path harFilea = new Path(harPath, "a");
     Path harFileb = new Path(harPath, "b");
-    Path harFilec = new Path(harPath, "c");
+    Path harFilec = new Path(harPath, "c c");
     FileSystem harFs = harFilea.getFileSystem(conf);
     FSDataInputStream fin = harFs.open(harFilea);
     byte[] b = new byte[4];
@@ -291,7 +292,7 @@ public class TestHarFileSystem extends T
     // fileb and filec
     Path harFilea = new Path(harPath, "a");
     Path harFileb = new Path(harPath, "b");
-    Path harFilec = new Path(harPath, "c");
+    Path harFilec = new Path(harPath, "c c");
     FileSystem harFs = harFilea.getFileSystem(conf);
     FSDataInputStream fin = harFs.open(harFilea);
     byte[] b = new byte[4];
@@ -311,6 +312,8 @@ public class TestHarFileSystem extends T
     assertTrue("strings are equal ", (b[0] == "c".getBytes()[0]));
     // ok all files match 
     // run a map reduce job
+    FileSystem fsHar = harPath.getFileSystem(conf);
+    FileStatus[] bla = fsHar.listStatus(harPath);
     Path outdir = new Path(fs.getHomeDirectory(), "mapout"); 
     JobConf jobconf = mapred.createJobConf();
     FileInputFormat.addInputPath(jobconf, harPath);
@@ -331,7 +334,7 @@ public class TestHarFileSystem extends T
     FSDataInputStream reduceIn = fs.open(reduceFile);
     b = new byte[6];
     readBytes = reduceIn.read(b);
-    assertTrue("Should read 6 bytes.", readBytes == 6);
+    assertTrue("Should read 6 bytes instead of "+readBytes+".", readBytes == 6);
     //assuming all the 6 bytes were read.
     Text readTxt = new Text(b);
     assertTrue("a\nb\nc\n".equals(readTxt.toString()));

Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java?rev=931274&r1=931273&r2=931274&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java Tue Apr 
6 19:07:33 2010
@@ -22,6 +22,9 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLDecoder;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -74,7 +77,7 @@ import org.apache.hadoop.util.ToolRunner
  * Hadoop archives look at {@link HarFileSystem}.
  */
 public class HadoopArchives implements Tool {
-  public static final int VERSION = 1;
+  public static final int VERSION = 2;
   private static final Log LOG = LogFactory.getLog(HadoopArchives.class);
   
   private static final String NAME = "har"; 
@@ -90,22 +93,6 @@ public class HadoopArchives implements T
   static final String HAR_BLOCKSIZE_LABEL = NAME + ".block.size";
   /**the size of the part files that will be created when archiving **/
   static final String HAR_PARTSIZE_LABEL = NAME + ".partfile.size";
-  static final String SPACE_REPLACE_LABEL = NAME + ".space.replace.enable";
-  static final boolean SPACE_REPLACE_DEFAULT = false;
-  static final String SPACE_REPLACEMENT_LABEL = NAME + ".space.replacement";
-  static final String SPACE_REPLACEMENT_DEFAULT = "_";
-  static final String SPACE_REPLACEMENT_DESCRIPTION
-    = HadoopArchives.class.getSimpleName() + " (version=" + VERSION
-    + ") does not support source paths with the space character."
-    + "\n\nThere is a space replacement option, which can be enabled by"
-    + "\n  -D" + SPACE_REPLACE_LABEL + "=true"
-    + "\nThe space replacement string can be specified by"
-    + "\n  -D" + SPACE_REPLACEMENT_LABEL + "=<REPLACEMENT_STRING>"
-    + "\nThe default <REPLACEMENT_STRING> is \""
-    + SPACE_REPLACEMENT_DEFAULT
-    + "\".\n*** Note that the original paths will not be changed"
-    + " by the space replacement option."
-    + "  The resulted har contains only the replaced paths.";
 
   /** size of each part file size **/
   long partSize = 2 * 1024 * 1024 * 1024l;
@@ -118,11 +105,6 @@ public class HadoopArchives implements T
   
  
   private JobConf conf;
-  private String spaceReplacement = null;
-
-  private boolean isSpaceReplaceEnabled() {
-    return spaceReplacement != null;
-  }
 
   public void setConf(Configuration conf) {
     if (conf instanceof JobConf) {
@@ -315,13 +297,12 @@ public class HadoopArchives implements T
    * @param fullPath the full path
    * @param root the prefix root to be truncated
    * @return the relative path
-   * @throws IOException 
    */
-  private String relPathToRoot(Path fullPath, Path root) throws IOException {
+  private Path relPathToRoot(Path fullPath, Path root) {
     // just take some effort to do it 
     // rather than just using substring 
     // so that we do not break sometime later
-    final String justRoot = Path.SEPARATOR;
+    final Path justRoot = new Path(Path.SEPARATOR);
     if (fullPath.depth() == root.depth()) {
       return justRoot;
     }
@@ -332,7 +313,7 @@ public class HadoopArchives implements T
         retPath = new Path(parent.getName(), retPath);
         parent = parent.getParent();
       }
-      return new Path(justRoot, retPath.toString()).toString();
+      return new Path(justRoot, retPath);
     }
     return null;
   }
@@ -403,35 +384,20 @@ public class HadoopArchives implements T
     }
     Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
     for (Map.Entry<String, HashSet<String>> entry : keyVals) {
-      final String relPath = relPathToRoot(new Path(entry.getKey()), parentPath);
+      final Path relPath = relPathToRoot(new Path(entry.getKey()), parentPath);
       if (relPath != null) {
         final String[] children = new String[entry.getValue().size()];
         int i = 0;
         for(String child: entry.getValue()) {
           children[i++] = child;
         }
-        append(srcWriter, 0L, relPath, children);
+        append(srcWriter, 0L, relPath.toString(), children);
       }
     }
   }
 
-  //check whether the path contains the space character.
-  private void checkSpace(String p) throws IOException {
-    //check only if space replacement is disabled.
-    if (!isSpaceReplaceEnabled() && p.indexOf(' ') >= 0) {
-      throw new IOException("Source \"" + p + "\" contains the space character.  "
-          + SPACE_REPLACEMENT_DESCRIPTION);
-    }
-  }
-
   private void append(SequenceFile.Writer srcWriter, long len,
       String path, String[] children) throws IOException {
-    checkSpace(path);
-    if (children != null) {
-      for(String child: children) {
-        checkSpace(child);
-      }
-    }
     srcWriter.append(new LongWritable(len), new HarEntry(path, children));
   }
     
@@ -544,7 +510,7 @@ public class HadoopArchives implements T
         for (FileStatusDir statDir: allFiles) {
           FileStatus stat = statDir.getFileStatus();
           long len = stat.isDir()? 0:stat.getLen();
-          final String path = relPathToRoot(stat.getPath(), parentPath);
+          final Path path = relPathToRoot(stat.getPath(), parentPath);
           final String[] children;
           if (stat.isDir()) {
             //get the children 
@@ -557,7 +523,7 @@ public class HadoopArchives implements T
           else {
             children = null;
           }
-          append(srcWriter, len, path, children);
+          append(srcWriter, len, path.toString(), children);
           srcWriter.sync();
           numFiles++;
           totalSize += len;
@@ -596,8 +562,6 @@ public class HadoopArchives implements T
   static class HArchivesMapper 
   implements Mapper<LongWritable, HarEntry, IntWritable, Text> {
     private JobConf conf = null;
-    private String spaceReplacement;
-
     int partId = -1 ; 
     Path tmpOutputDir = null;
     Path tmpOutput = null;
@@ -615,8 +579,6 @@ public class HadoopArchives implements T
     // tmp files. 
     public void configure(JobConf conf) {
       this.conf = conf;
-      this.spaceReplacement = conf.get(SPACE_REPLACEMENT_LABEL,
-          SPACE_REPLACEMENT_DEFAULT);
 
       // this is tightly tied to map reduce
       // since it does not expose an api 
@@ -676,8 +638,9 @@ public class HadoopArchives implements T
       return new Path(parent, new Path(p.toString().substring(1)));
     }
 
-    private String replaceSpaces(String s) {
-      return s.replace(" ", spaceReplacement);
+    private static String encodeName(String s) 
+      throws UnsupportedEncodingException {
+      return URLEncoder.encode(s,"UTF-8");
     }
 
     // read files from the split input 
@@ -690,15 +653,15 @@ public class HadoopArchives implements T
         Reporter reporter) throws IOException {
       Path relPath = new Path(value.path);
       int hash = HarFileSystem.getHarHash(relPath);
-      String towrite = replaceSpaces(relPath.toString());
+      String towrite = encodeName(relPath.toString());
       Path srcPath = realPath(relPath, rootPath);
       long startPos = partStream.getPos();
       if (value.isDir()) { 
-        towrite += " dir none " + 0 + " " + 0 + " ";
+        towrite += " dir none 0 0 ";
         StringBuffer sbuff = new StringBuffer();
         sbuff.append(towrite);
         for (String child: value.children) {
-          sbuff.append(replaceSpaces(child) + " ");
+          sbuff.append(encodeName(child) + " ");
         }
         towrite = sbuff.toString();
         //reading directories is also progress
@@ -884,22 +847,6 @@ public class HadoopArchives implements T
             + srcPaths);
       }
 
-      //process space replacement configuration
-      if (conf.getBoolean(SPACE_REPLACE_LABEL, SPACE_REPLACE_DEFAULT)) {
-        spaceReplacement = conf.get(SPACE_REPLACEMENT_LABEL,
-            SPACE_REPLACEMENT_DEFAULT);
-        if (spaceReplacement.indexOf(' ') >= 0) {
-          throw new IllegalArgumentException("spaceReplacement = \""
-              + spaceReplacement + "\" cannot contain the space character.");
-        }
-        if (spaceReplacement.indexOf(Path.SEPARATOR) >= 0) {
-          throw new IllegalArgumentException("spaceReplacement = \""
-              + spaceReplacement + "\" cannot contain the path separator \""
-              + Path.SEPARATOR + "\".");
-        }
-        LOG.info(SPACE_REPLACEMENT_LABEL + " = " + spaceReplacement);
-      }
-
       archive(parentPath, globPaths, archiveName, destPath);
     } catch(IOException ie) {
       System.err.println(ie.getLocalizedMessage());



Mime
View raw message