hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From omal...@apache.org
Subject svn commit: r1079237 - in /hadoop/mapreduce/branches/yahoo-merge/src: contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/ docs/src/documentation/content/xdocs/
Date Tue, 08 Mar 2011 05:59:06 GMT
Author: omalley
Date: Tue Mar  8 05:59:05 2011
New Revision: 1079237

URL: http://svn.apache.org/viewvc?rev=1079237&view=rev
Log:
commit a86f62cfed40a699a2184d67a13c21f7a0b39d17
Author: Ravi Gummadi <gravi@yahoo-inc.com>
Date:   Fri Jan 7 13:52:18 2011 +0530

    : Make GridMix emulate usage of local FS based distributed
    cache files in simulated jobs. Patch is available at
     (gravi)

Added:
    hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java
    hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestPseudoLocalFs.java
Modified:
    hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
    hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
    hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java
    hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/gridmix.xml

Modified: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java?rev=1079237&r1=1079236&r2=1079237&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/DistributedCacheEmulator.java Tue Mar  8 05:59:05 2011
@@ -38,6 +38,8 @@ import org.apache.hadoop.tools.rumen.Job
 import org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants;
 
 import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -51,18 +53,27 @@ import java.util.Map;
  * <br> Emulation of Distributed Cache Load in gridmix will put load on
  * TaskTrackers and affects execution time of tasks because of localization of
  * distributed cache files by TaskTrackers.
- * <br> Gridmix creates private and public distributed cache files by launching
+ * <br> Gridmix creates distributed cache files for simulated jobs by launching
  * a MapReduce job {@link GenerateDistCacheData} in advance i.e. before
  * launching simulated jobs.
- * <br> The file paths of original cluster are mapped to the simulation
- * cluster's paths. The configuration properties like
+ * <br> The distributed cache file paths used in the original cluster are mapped
+ * to unique file names in the simulated cluster.
+ * <br> All HDFS-based distributed cache files generated by gridmix are
+ * public distributed cache files. But Gridmix makes sure that load incurred due
+ * to localization of private distributed cache files on the original cluster
+ * is also faithfully simulated. Gridmix emulates the load due to private
+ * distributed cache files by mapping private distributed cache files of
+ * different users in the original cluster to different public distributed cache
+ * files in the simulated cluster.
+ *
+ * <br> The configuration properties like
  * {@link MRJobConfig#CACHE_FILES}, {@link MRJobConfig#CACHE_FILE_VISIBILITIES},
  * {@link MRJobConfig#CACHE_FILES_SIZES} and
  * {@link MRJobConfig#CACHE_FILE_TIMESTAMPS} obtained from trace are used to
  *  decide
  * <li> file size of each distributed cache file to be generated
  * <li> whether a dist cache file is already seen in this trace file
- * <li> whether a distributed cache file be considered public or private.
+ * <li> whether a distributed cache file was considered public or private.
  * <br>
  * <br> Gridmix configures these generated files as distributed cache files for
  * the simulated jobs.
@@ -80,8 +91,6 @@ class DistributedCacheEmulator {
   int MISSING_DIST_CACHE_FILES_ERROR = 1;
 
   private Path distCachePath;
-  private Path privateDistCachePath = null;
-  private Path publicDistCachePath = null;
 
   /**
    * Map between simulated cluster's dist cache file paths and their file sizes.
@@ -105,6 +114,10 @@ class DistributedCacheEmulator {
   boolean generateDistCacheData = false;
 
   Configuration conf; // gridmix configuration
+
+  // Pseudo local file system where local FS based dist cache files are
+  // created by gridmix.
+  FileSystem pseudoLocalFs = null;
   
   /**
    * @param conf gridmix configuration
@@ -114,6 +127,7 @@ class DistributedCacheEmulator {
   public DistributedCacheEmulator(Configuration conf, Path ioPath) {
     this.conf = conf;
     distCachePath = new Path(ioPath, "distributedCache");
+    this.conf.setClass("fs.pseudo.impl", PseudoLocalFs.class, FileSystem.class);
   }
 
   /**
@@ -131,8 +145,9 @@ class DistributedCacheEmulator {
    * of &lt;ioPath&gt; till root. This is because for emulation of dist cache
    * load, dist cache files created under
    * &lt;ioPath/distributedCache/public/&gt; should be considered by hadoop
-   * as public dist cache files.</ol>
-   * <br> For (2), (3) and (4), generation of distributed cache data
+   * as public dist cache files.
+   * <li> creation of pseudo local file system fails.</ol>
+   * <br> For (2), (3), (4) and (5), generation of distributed cache data
    * is also disabled.
    * 
    * @param traceIn trace file path. If this is '-', then trace comes from the
@@ -179,6 +194,17 @@ class DistributedCacheEmulator {
         }
       }
     }
+
+    // Check if pseudo local file system can be created
+    try {
+      pseudoLocalFs = FileSystem.get(new URI("pseudo:///"), conf);
+    } catch (URISyntaxException e) {
+      LOG.warn("Gridmix will not emulate Distributed Cache load because "
+          + "creation of pseudo local file system failed.");
+      e.printStackTrace();
+      emulateDistributedCache = generateDistCacheData = false;
+      return;
+    }
   }
 
   /**
@@ -196,17 +222,10 @@ class DistributedCacheEmulator {
   }
 
   /**
-   * @return Dir under which gridmix creates private distributed cache files
+   * @return the distributed cache directory path
    */
-  Path getPrivateDistCacheDir() {
-    return privateDistCachePath;
-  }
-
-  /**
-   * @return Dir under which gridmix creates public distributed cache files
-   */
-  Path getPublicDistCacheDir() {
-    return publicDistCachePath;
+  Path getDistributedCacheDir() {
+    return distCachePath;
   }
 
   /**
@@ -220,29 +239,18 @@ class DistributedCacheEmulator {
   int setupGenerateDistCacheData(JobStoryProducer jsp)
       throws IOException {
 
-    createDistCacheDirectories();
+    createDistCacheDirectory();
     return buildDistCacheFilesList(jsp);
   }
 
   /**
-   * Create distributed cache directories where dist cache files will be
+   * Create distributed cache directory where distributed cache files will be
    * created by the MapReduce job GRIDMIX_GENERATE_DISTCACHE_DATA.
    * @throws IOException
    */
-  void createDistCacheDirectories() throws IOException {
+  private void createDistCacheDirectory() throws IOException {
     FileSystem fs = FileSystem.get(conf);
     FileSystem.mkdirs(fs, distCachePath, new FsPermission((short) 0777));
-
-    // For private dist cache dir, no execute permission for others
-    privateDistCachePath =
-        new Path(distCachePath, "private");
-    FileSystem.mkdirs(fs, privateDistCachePath,
-        new FsPermission((short) 0776));
-
-    publicDistCachePath =
-        new Path(distCachePath, "public");
-    FileSystem.mkdirs(fs, publicDistCachePath,
-        new FsPermission((short) 0777));
   }
 
   /**
@@ -307,9 +315,17 @@ class DistributedCacheEmulator {
         jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
 
       FileSystem fs = FileSystem.get(conf);
+      String user = jobConf.getUser();
       for (int i = 0; i < files.length; i++) {
+        if (isLocalDistCacheFile(files[i], user, visibilities[i])) {
+          // local FS based dist cache file.
+          // Create this file on the pseudo local FS on the fly (i.e. when the
+          // simulated job is submitted).
+          continue;
+        }
+        // dist cache file on hdfs
         String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
-                              Boolean.valueOf(visibilities[i]));
+                              Boolean.valueOf(visibilities[i]), user);
 
         // No need to add a dist cache file path to the list if
         // (1) the mapped path is already there in the list OR
@@ -328,18 +344,47 @@ class DistributedCacheEmulator {
   }
 
   /**
-   * Map the dist cache file path from original cluster to simulated cluster
-   * @param file dist cache file path
+   * Check if the file path provided was constructed by MapReduce for a
+   * distributed cache file on local file system.
+   * @param filePath path of the distributed cache file
+   * @param user job submitter of the job for which &lt;filePath&gt; is a
+   *             distributed cache file
+   * @param visibility <code>true</code> for public distributed cache file
+   * @return true if the path provided is of a local file system based
+   *              distributed cache file
+   */
+  private boolean isLocalDistCacheFile(String filePath, String user,
+                                       String visibility) {
+    return (!Boolean.valueOf(visibility)
+            && filePath.contains(user + "/.staging"));
+  }
+
+  /**
+   * Map the HDFS based distributed cache file path from original cluster to
+   * a unique file name on the simulated cluster.
+   * <br> Unique  distributed file names on simulated cluster are generated
+   * using original cluster's <li>file path, <li>timestamp and <li> the
+   * job-submitter for private distributed cache file.
+   * <br> This implies that if on original cluster, a single HDFS file
+   * considered as two private distributed cache files for two jobs of
+   * different users, then the corresponding simulated jobs will have two
+   * different files of the same size in public distributed cache, one for each
+   * user. Both these simulated jobs will not share these distributed cache
+   * files, thus leading to the same load as seen in the original cluster.
+   * @param file distributed cache file path
    * @param timeStamp time stamp of dist cachce file
    * @param isPublic true if this dist cache file is a public dist cache file
+   * @param user job submitter on original cluster
    * @return the mapped path on simulated cluster
    */
   private String mapDistCacheFilePath(String file, String timeStamp,
-      boolean isPublic) {
-    Path distCacheDir = isPublic ? publicDistCachePath
-                                 : privateDistCachePath;
-    return new Path(distCacheDir,
-        MD5Hash.digest(file + timeStamp).toString()).toUri().getPath();
+      boolean isPublic, String user) {
+    String id = file + timeStamp;
+    if (!isPublic) {
+      id = id.concat(user);// consider job-submitter for private dist cache file
+    }
+    return new Path(distCachePath, MD5Hash.digest(id).toString()).toUri()
+               .getPath();
   }
 
   /**
@@ -392,6 +437,9 @@ class DistributedCacheEmulator {
     if (src_writer != null) {
       src_writer.close();
     }
+    // Set delete on exit for 'dist cache files list' as it is not needed later.
+    fs.deleteOnExit(distCacheFilesList);
+
     conf.setInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, fileCount);
     conf.setLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, byteCount);
     LOG.info("DistCacheFilesToBeGeneratedCount=" + fileCount
@@ -415,6 +463,10 @@ class DistributedCacheEmulator {
    * files of a simulated job by mapping the original cluster's dist cache
    * file paths to the simulated cluster's paths and setting these mapped paths
    * in the job configuration of the simulated job.
+   * <br>
+   * Configure local FS based distributed cache files through the property
+   * "tmpfiles" and hdfs based distributed cache files through the property
+   * {@link MRJobConfig#CACHE_FILES}.
    * @param conf configuration for the simulated job to be run
    * @param jobConf job configuration of original cluster's job, obtained from
    *                trace
@@ -426,18 +478,44 @@ class DistributedCacheEmulator {
 
       String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
       if (files != null) {
-        String[] cacheFiles = new String[files.length];
+        // hdfs based dist cache files to be configured for simulated job
+        List<String> cacheFiles = new ArrayList<String>();
+        // local FS based dist cache files to be configured for simulated job
+        List<String> localCacheFiles = new ArrayList<String>();
+
         String[] visibilities =
           jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
         String[] timeStamps =
           jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
+        String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
 
+        String user = jobConf.getUser();
         for (int i = 0; i < files.length; i++) {
-          String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
-              Boolean.valueOf(visibilities[i]));
-          cacheFiles[i] = mappedPath;
+          if (isLocalDistCacheFile(files[i], user, visibilities[i])) {
+            // local FS based dist cache file.
+            // Create this file on the pseudo local FS.
+            String fileId = MD5Hash.digest(files[i] + timeStamps[i]).toString();
+            long fileSize = Long.valueOf(fileSizes[i]);
+            Path mappedLocalFilePath =
+                PseudoLocalFs.generateFilePath(fileId, fileSize)
+                    .makeQualified(pseudoLocalFs.getUri(),
+                                   pseudoLocalFs.getWorkingDirectory());
+            pseudoLocalFs.create(mappedLocalFilePath);
+            localCacheFiles.add(mappedLocalFilePath.toUri().toString());
+          } else {
+            // hdfs based dist cache file.
+            // Get the mapped HDFS path on simulated cluster
+            String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
+                                  Boolean.valueOf(visibilities[i]), user);
+            cacheFiles.add(mappedPath);
+          }
         }
-        conf.setStrings(MRJobConfig.CACHE_FILES, cacheFiles);
+        // configure hdfs based dist cache files for simulated job
+        conf.setStrings(MRJobConfig.CACHE_FILES,
+                        cacheFiles.toArray(new String[cacheFiles.size()]));
+        // configure local FS based dist cache files for simulated job
+        conf.setStrings("tmpfiles", localCacheFiles.toArray(
+                                      new String[localCacheFiles.size()]));
       }
     }
   }

Added: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java?rev=1079237&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java Tue Mar  8 05:59:05 2011
@@ -0,0 +1,332 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred.gridmix;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Random;
+import java.net.URI;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * Pseudo local file system that generates random data for any file on the fly
+ * instead of storing files on disk. So opening same file multiple times will
+ * not give same file content. There are no directories in this file system
+ * other than the root and all the files are under root i.e. "/". All file URIs
+ * on pseudo local file system should be of the format <code>
+ * pseudo:///&lt;name&gt;.&lt;fileSize&gt;</code> where name is a unique name
+ * and &lt;fileSize&gt; is a number representing the size of the file in bytes.
+ */
+class PseudoLocalFs extends FileSystem {
+  Path home;
+  /**
+   * The creation time and modification time of all files in
+   * {@link PseudoLocalFs} is same.
+   */
+  private static final long TIME = System.currentTimeMillis();
+  private static final String HOME_DIR = "/";
+  private static final long BLOCK_SIZE  = 4 * 1024 * 1024L; // 4 MB
+  private static final int DEFAULT_BUFFER_SIZE = 1024  * 1024; // 1MB
+
+  static final URI NAME = URI.create("pseudo:///");
+
+  PseudoLocalFs() {
+    this(new Path(HOME_DIR));
+  }
+
+  PseudoLocalFs(Path home) {
+    super();
+    this.home = home;
+  }
+
+  @Override
+  public URI getUri() {
+    return NAME;
+  }
+
+  @Override
+  public Path getHomeDirectory() {
+    return home;
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    return getHomeDirectory();
+  }
+
+  /**
+   * Generates a valid pseudo local file path from the given <code>fileId</code>
+   * and <code>fileSize</code>.
+   * @param fileId unique file id string
+   * @param fileSize file size
+   * @return the generated relative path
+   */
+  static Path generateFilePath(String fileId, long fileSize) {
+    return new Path(fileId + "." + fileSize);
+  }
+
+  /**
+   * Creating a pseudo local file is nothing but validating the file path.
+   * Actual data of the file is generated on the fly when client tries to open
+   * the file for reading.
+   * @param path file path to be created
+   */
+  @Override
+  public FSDataOutputStream create(Path path) throws IOException {
+    try {
+      validateFileNameFormat(path);
+    } catch (FileNotFoundException e) {
+      throw new IOException("File creation failed for " + path);
+    }
+    return null;
+  }
+
+  /**
+   * Validate if the path provided is of expected format of Pseudo Local File
+   * System based files.
+   * @param path file path
+   * @return the file size
+   * @throws FileNotFoundException
+   */
+  long validateFileNameFormat(Path path) throws FileNotFoundException {
+    path = path.makeQualified(this);
+    boolean valid = true;
+    long fileSize = 0;
+    if (!path.toUri().getScheme().equals(getUri().getScheme())) {
+      valid = false;
+    } else {
+      String[] parts = path.toUri().getPath().split("\\.");
+      try {
+        fileSize = Long.valueOf(parts[parts.length - 1]);
+        valid = (fileSize >= 0);
+      } catch (NumberFormatException e) {
+        valid = false;
+      }
+    }
+    if (!valid) {
+      throw new FileNotFoundException("File " + path
+          + " does not exist in pseudo local file system");
+    }
+    return fileSize;
+  }
+
+  /**
+   * @See create(Path) for details
+   */
+  @Override
+  public FSDataInputStream open(Path path, int bufferSize) throws IOException {
+    long fileSize = validateFileNameFormat(path);
+    InputStream in = new RandomInputStream(fileSize, bufferSize);
+    return new FSDataInputStream(in);
+  }
+
+  /**
+   * @See create(Path) for details
+   */
+  @Override
+  public FSDataInputStream open(Path path) throws IOException {
+    return open(path, DEFAULT_BUFFER_SIZE);
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path path) throws IOException {
+    long fileSize = validateFileNameFormat(path);
+    return new FileStatus(fileSize, false, 1, BLOCK_SIZE, TIME, path);
+  }
+
+  @Override
+  public boolean exists(Path path) {
+    try{
+      validateFileNameFormat(path);
+    } catch (FileNotFoundException e) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public FSDataOutputStream create(Path path, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    return create(path);
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path path) throws FileNotFoundException,
+      IOException {
+    return new FileStatus[] {getFileStatus(path)};
+  }
+
+  /**
+   * Input Stream that generates specified number of random bytes.
+   */
+  static class RandomInputStream extends InputStream
+      implements Seekable, PositionedReadable {
+
+    private final Random r = new Random();
+    private BytesWritable val = null;
+    private int positionInVal = 0;// current position in the buffer 'val'
+
+    private long totalSize = 0;// total number of random bytes to be generated
+    private long curPos = 0;// current position in this stream
+
+    /**
+     * @param size total number of random bytes to be generated in this stream
+     * @param bufferSize the buffer size. An internal buffer array of length
+     * <code>bufferSize</code> is created. If <code>bufferSize</code> is not a
+     * positive number, then a default value of 1MB is used.
+     */
+    RandomInputStream(long size, int bufferSize) {
+      totalSize = size;
+      if (bufferSize <= 0) {
+        bufferSize = DEFAULT_BUFFER_SIZE;
+      }
+      val = new BytesWritable(new byte[bufferSize]);
+    }
+
+    @Override
+    public int read() throws IOException {
+      byte[] b = new byte[1];
+      if (curPos < totalSize) {
+        if (positionInVal < val.getLength()) {// use buffered byte
+          b[0] = val.getBytes()[positionInVal++];
+          ++curPos;
+        } else {// generate data
+          int num = read(b);
+          if (num < 0) {
+            return num;
+          }
+        }
+      } else {
+        return -1;
+      }
+      return b[0];
+    }
+
+    @Override
+    public int read(byte[] bytes) throws IOException {
+      return read(bytes, 0, bytes.length);
+    }
+
+    @Override
+    public int read(byte[] bytes, int off, int len) throws IOException {
+      if (curPos == totalSize) {
+        return -1;// EOF
+      }
+      int numBytes = len;
+      if (numBytes > (totalSize - curPos)) {// position in file is close to EOF
+        numBytes = (int)(totalSize - curPos);
+      }
+      if (numBytes > (val.getLength() - positionInVal)) {
+        // need to generate data into val
+        r.nextBytes(val.getBytes());
+        positionInVal = 0;
+      }
+
+      System.arraycopy(val.getBytes(), positionInVal, bytes, off, numBytes);
+      curPos += numBytes;
+      positionInVal += numBytes;
+      return numBytes;
+    }
+
+    @Override
+    public int available() {
+      return (int)(val.getLength() - positionInVal);
+    }
+
+    @Override
+    public int read(long position, byte[] buffer, int offset, int length)
+        throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer, int offset, int length)
+        throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Get the current position in this stream/pseudo-file
+     * @return the position in this stream/pseudo-file
+     * @throws IOException
+     */
+    @Override
+    public long getPos() throws IOException {
+      return curPos;
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  @Override
+  public FSDataOutputStream append(Path path, int bufferSize,
+      Progressable progress) throws IOException {
+    throw new UnsupportedOperationException("Append is not supported"
+        + " in pseudo local file system.");
+  }
+
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    throw new UnsupportedOperationException("Mkdirs is not supported"
+        + " in pseudo local file system.");
+  }
+
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+    throw new UnsupportedOperationException("Rename is not supported"
+        + " in pseudo local file system.");
+  }
+
+  @Override
+  public boolean delete(Path path, boolean recursive) {
+    throw new UnsupportedOperationException("File deletion is not supported "
+        + "in pseudo local file system.");
+  }
+
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    throw new UnsupportedOperationException("SetWorkingDirectory "
+        + "is not supported in pseudo local file system.");
+  }
+}

Modified: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java?rev=1079237&r1=1079236&r2=1079237&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java Tue Mar  8 05:59:05 2011
@@ -28,6 +28,7 @@ import org.apache.hadoop.tools.rumen.Pre
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.InputSplit;
 
@@ -209,9 +210,14 @@ public class DebugJobProducer implements
 
    @Override
    public String getUser() {
-     String s = String.format("foobar%d", id);
-     GridmixTestUtils.createHomeAndStagingDirectory(s, (JobConf)conf);
-     return s;
+     // Obtain user name from job configuration, if available.
+     // Otherwise use dummy user names.
+     String user = conf.get(MRJobConfig.USER_NAME);
+     if (user == null) {
+       user = String.format("foobar%d", id);
+     }
+     GridmixTestUtils.createHomeAndStagingDirectory(user, (JobConf)conf);
+     return user;
    }
 
    @Override

Modified: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java?rev=1079237&r1=1079236&r2=1079237&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestDistCacheEmulation.java Tue Mar  8 05:59:05 2011
@@ -56,9 +56,6 @@ import org.junit.Test;
  */
 public class TestDistCacheEmulation {
 
-  private static List<Long> privateDCFilesSizesExpected = new ArrayList<Long>();
-  private static List<Long> publicDCFilesSizesExpected = new ArrayList<Long>();
-
   private DistributedCacheEmulator dce = null;
 
   @BeforeClass
@@ -74,86 +71,78 @@ public class TestDistCacheEmulation {
   /**
    * Validate the dist cache files generated by GenerateDistCacheData job.
    * @param jobConf configuration of GenerateDistCacheData job.
+   * @param sortedFileSizes array of sorted distributed cache file sizes 
    * @throws IOException 
    * @throws FileNotFoundException 
    */
-  private void validateDistCacheData(JobConf jobConf)
+  private void validateDistCacheData(JobConf jobConf, long[] sortedFileSizes)
       throws FileNotFoundException, IOException {
-    Path privateDistCache = dce.getPrivateDistCacheDir();
-    Path publicDistCache = dce.getPublicDistCacheDir();
-    Path distCachePath = privateDistCache.getParent();
+    Path distCachePath = dce.getDistributedCacheDir();
     String filesListFile =
         jobConf.get(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST);
+    FileSystem fs = FileSystem.get(jobConf);
 
-    // validate files directly under dist cache dir
-    RemoteIterator<LocatedFileStatus> iter =
-        GridmixTestUtils.dfs.listFiles(distCachePath, false);
-    assertTrue("Dist Cache files list file missing.", iter.hasNext());
-    LocatedFileStatus stat = iter.next();
-    assertTrue("Dist Cache files list file path did not match the expected.",
-        stat.getPath().toUri().getPath().equals(
-        new Path(filesListFile).toUri().getPath()));
-    assertFalse("Some file other than special file is seen directly under "
-        + "dist cache directory.", iter.hasNext());
-
-    // validate private dist cache files
-    validateDistCacheFiles(privateDCFilesSizesExpected,
-        privateDistCache, false);
-
-    // validate public dist cache files
-    validateDistCacheFiles(publicDCFilesSizesExpected,
-        publicDistCache, true);
+    // Validate the existence of Distributed Cache files list file directly
+    // under distributed cache directory
+    Path listFile = new Path(filesListFile);
+    assertTrue("Path of Distributed Cache files list file is wrong.",
+        distCachePath.equals(listFile.getParent().makeQualified(fs)));
+
+    // Delete the dist cache files list file
+    assertTrue("Failed to delete distributed Cache files list file " + listFile,
+               fs.delete(listFile));
+
+    List<Long> fileSizes = new ArrayList<Long>();
+    for (long size : sortedFileSizes) {
+      fileSizes.add(size);
+    }
+    // validate dist cache files after deleting the 'files list file'
+    validateDistCacheFiles(fileSizes, distCachePath);
   }
 
   /**
-   * Validate private/public dist cache files.
-   * @param filesSizesExpected sizes of expected dist cache files
-   * @param distCacheDir the private/public dist cache dir to be validated
-   * @param arePublic true if the dist cache files to be validated are public
+   * Validate private/public distributed cache files.
+   * @param filesSizesExpected list of sizes of expected dist cache files
+   * @param distCacheDir the distributed cache dir to be validated
    * @throws IOException 
    * @throws FileNotFoundException 
    */
-  private void validateDistCacheFiles(List<Long> filesSizesExpected,
-      Path distCacheDir, boolean arePublic)
-      throws FileNotFoundException, IOException {
-    String publicOrPrivate = arePublic ? "public" : "private";
+  private void validateDistCacheFiles(List filesSizesExpected,
+      Path distCacheDir) throws FileNotFoundException, IOException {
     RemoteIterator<LocatedFileStatus> iter =
         GridmixTestUtils.dfs.listFiles(distCacheDir, false);
     int numFiles = filesSizesExpected.size();
     for (int i = 0; i < numFiles; i++) {
-      assertTrue("Missing " + publicOrPrivate + " dist cache files",
-                 iter.hasNext());
+      assertTrue("Missing distributed cache files.", iter.hasNext());
       LocatedFileStatus stat = iter.next();
-      assertTrue("File size of " + publicOrPrivate + " dist cache file "
+      assertTrue("File size of distributed cache file "
           + stat.getPath().toUri().getPath() + " is wrong.",
           filesSizesExpected.remove(stat.getLen()));
 
       FsPermission perm = stat.getPermission();
-      assertEquals("Wrong permissions for " + publicOrPrivate
-          + " dist cache file " + stat.getPath().toUri().getPath(),
+      assertEquals("Wrong permissions for distributed cache file "
+          + stat.getPath().toUri().getPath(),
           new FsPermission((short)0644), perm);
     }
-    assertFalse("Number of files under " + publicOrPrivate
-        + " dist cache dir is wrong.", iter.hasNext());
+    assertFalse("Number of files under distributed cache dir is wrong.",
+        iter.hasNext());
   }
 
   /**
    * Runs setupGenerateDistCacheData() on a new DistrbutedCacheEmulator and
-   * and returns the jobConf. Fills the arrays sortedFileSizes and
-   * sortedVisibilities that can be used for validation.
+   * and returns the jobConf. Fills the array <code>sortedFileSizes</code> that
+   * can be used for validation.
    * Validation of exit code from setupGenerateDistCacheData() is done.
    * @param generate true if -generate option is specified
-   * @param sortedFileSizes sorted dist cache file sizes
-   * @param sortedVisibilities visibilities of dist cache files sorted based on
-   *                           file sizes
+   * @param sortedFileSizes sorted distributed cache file sizes
    * @throws IOException
    * @throws InterruptedException
    */
   private JobConf runSetupGenerateDistCacheData(boolean generate,
-      long[] sortedFileSizes, boolean[] sortedVisibilities)
-      throws IOException, InterruptedException {
+      long[] sortedFileSizes) throws IOException, InterruptedException {
     Configuration conf = new Configuration();
     String user = UserGroupInformation.getCurrentUser().getShortUserName();
+    conf.set(MRJobConfig.USER_NAME, user);
     // Set some dummy dist cache files in gridmix configuration so that they go
     // into the configuration of JobStory objects.
     String[] distCacheFiles = {"hdfs:///tmp/file1.txt",
@@ -163,26 +152,17 @@ public class TestDistCacheEmulation {
                                "subdir1/file5.txt",
                                "subdir2/file6.gz"};
     String[] fileSizes = {"400", "2500", "700", "1200", "1500", "500"};
-    System.arraycopy(new long[] {2500, 1500, 1200, 700, 500, 400}, 0,
-                     sortedFileSizes, 0, 6);
+    // local FS based dist cache file whose path contains <user>/.staging is
+    // not created on HDFS. So file size 2500 is not added to sortedFileSizes
+    // and its visibility is not added to sortedVisibilities.
+    System.arraycopy(new long[] {1500, 1200, 700, 500, 400}, 0,
+                     sortedFileSizes, 0, 5);
     String[] visibilities = {"true", "false", "false", "true", "true", "false"};
-    System.arraycopy(new boolean[] {//sorted based on file sizes
-                     false, true, true, false, false, true}, 0,
-                     sortedVisibilities, 0, 6);
     String[] timeStamps = {"1234", "2345", "34567", "5434", "125", "134"};
     conf.setStrings(MRJobConfig.CACHE_FILES, distCacheFiles);
     conf.setStrings(MRJobConfig.CACHE_FILES_SIZES, fileSizes);
     conf.setStrings(MRJobConfig.CACHE_FILE_VISIBILITIES, visibilities);
     conf.setStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS, timeStamps);
-    // build lists of expected dist cache files
-    privateDCFilesSizesExpected.clear();
-    privateDCFilesSizesExpected.add(2500L);
-    privateDCFilesSizesExpected.add(700L);
-    privateDCFilesSizesExpected.add(500L);
-    publicDCFilesSizesExpected.clear();
-    publicDCFilesSizesExpected.add(1500L);
-    publicDCFilesSizesExpected.add(1200L);
-    publicDCFilesSizesExpected.add(400L);
 
     // Job stories of all 3 jobs will have same dist cache files in their
     // configurations
@@ -219,15 +199,16 @@ public class TestDistCacheEmulation {
    */
   @Test
   public void testGenerateDistCacheData() throws Exception {
+    long[] sortedFileSizes = new long[5];
     JobConf jobConf =
-        runSetupGenerateDistCacheData(true, new long[6], new boolean[6]);
+        runSetupGenerateDistCacheData(true, sortedFileSizes);
     GridmixJob gridmixJob = new GenerateDistCacheData(jobConf);
     Job job = gridmixJob.call();
     assertEquals("Number of reduce tasks in GenerateDistCacheData is not 0.",
         0, job.getNumReduceTasks());
     assertTrue("GenerateDistCacheData job failed.",
         job.waitForCompletion(false));
-    validateDistCacheData(jobConf);
+    validateDistCacheData(jobConf, sortedFileSizes);
   }
 
   /**
@@ -236,8 +217,7 @@ public class TestDistCacheEmulation {
    *  <li> content of the generated sequence file. This includes validation of
    *       dist cache file paths and their file sizes.
    */
-  private void validateSetupGenDC(JobConf jobConf, long[] sortedFileSizes,
-      boolean[] sortedVisibilities)
+  private void validateSetupGenDC(JobConf jobConf, long[] sortedFileSizes)
       throws IOException, InterruptedException {
     // build things needed for validation
     long sumOfFileSizes = 0;
@@ -272,27 +252,23 @@ public class TestDistCacheEmulation {
     reader.initialize(split, mapContext);
 
     // start validating setupGenerateDistCacheData
-    doValidateSetupGenDC(reader, fs, sortedFileSizes, sortedVisibilities);
+    doValidateSetupGenDC(reader, fs, sortedFileSizes);
   }
 
   /**
    *  Validate setupGenerateDistCacheData by validating
-   *  <li> permissions of the distributed cache directories and
+   *  <li> permissions of the distributed cache directory and
    *  <li> content of the generated sequence file. This includes validation of
    *       dist cache file paths and their file sizes.
    */
   private void doValidateSetupGenDC(RecordReader<LongWritable, BytesWritable>
-      reader, FileSystem fs, long[] sortedFileSizes,
-      boolean[] sortedVisibilities) throws IOException, InterruptedException {
+      reader, FileSystem fs, long[] sortedFileSizes)
+      throws IOException, InterruptedException {
 
-    // Validate permissions of dist cache directories
-    Path privateDCDirPath = dce.getPrivateDistCacheDir();
-    assertEquals("Wrong permissions for private dist cache dir.",
-        fs.getFileStatus(privateDCDirPath).getPermission()
-        .getOtherAction().and(FsAction.EXECUTE), FsAction.NONE);
-    Path publicDCDirPath = dce.getPublicDistCacheDir();
-    assertEquals("Wrong permissions for public dist cache dir.",
-        fs.getFileStatus(publicDCDirPath).getPermission()
+    // Validate permissions of dist cache directory
+    Path distCacheDir = dce.getDistributedCacheDir();
+    assertEquals("Wrong permissions for distributed cache dir " + distCacheDir,
+        fs.getFileStatus(distCacheDir).getPermission()
         .getOtherAction().and(FsAction.EXECUTE), FsAction.EXECUTE);
 
     // Validate the content of the sequence file generated by
@@ -312,17 +288,13 @@ public class TestDistCacheEmulation {
       assertEquals("Dist cache file size is wrong.",
           sortedFileSizes[i], fileSize);
 
-      // Validate dist cache file paths based on visibilities
+      // Validate dist cache file path.
 
       // parent dir of dist cache file
       Path parent = new Path(file).getParent().makeQualified(fs);
-      if (sortedVisibilities[i]) {// should exist in public dist cache dir
-        assertTrue("Public dist cache file path is wrong.",
-            publicDCDirPath.equals(parent));
-      } else {// should exist in private dist cache dir
-        assertTrue("Private dist cache file path is wrong.",
-            privateDCDirPath.equals(parent));
-      }
+      // should exist in dist cache dir
+      assertTrue("Public dist cache file path is wrong.",
+          distCacheDir.equals(parent));
     }
   }
 
@@ -335,20 +307,18 @@ public class TestDistCacheEmulation {
   @Test
   public void testSetupGenerateDistCacheData()
       throws IOException, InterruptedException {
-    long[] sortedFileSizes = new long[6];
-    boolean[] sortedVisibilities = new boolean[6];
-    JobConf jobConf =
-      runSetupGenerateDistCacheData(true, sortedFileSizes, sortedVisibilities);
-    validateSetupGenDC(jobConf, sortedFileSizes, sortedVisibilities);
-    
+    long[] sortedFileSizes = new long[5];
+    JobConf jobConf = runSetupGenerateDistCacheData(true, sortedFileSizes);
+    validateSetupGenDC(jobConf, sortedFileSizes);
+
     // Verify if correct exit code is seen when -generate option is missing and
     // distributed cache files are missing in the expected path.
-    runSetupGenerateDistCacheData(false, sortedFileSizes, sortedVisibilities);
+    runSetupGenerateDistCacheData(false, sortedFileSizes);
   }
 
   /**
    *  Create DistributedCacheEmulator object and do the initialization by
-   *  calling init() on it with dummy trace.
+   *  calling init() on it with dummy trace. Also configure the pseudo local FS.
    */
   private DistributedCacheEmulator createDistributedCacheEmulator(
       Configuration conf, Path ioPath, boolean generate) throws IOException {

Added: hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestPseudoLocalFs.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestPseudoLocalFs.java?rev=1079237&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestPseudoLocalFs.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/TestPseudoLocalFs.java Tue Mar  8 05:59:05 2011
@@ -0,0 +1,216 @@
+package org.apache.hadoop.mapred.gridmix;
+
+import static org.junit.Assert.*;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.junit.Test;
+
+/**
+ * Test the basic functionality of PseudoLocalFs
+ */
+public class TestPseudoLocalFs {
+
+  /**
+   * Test if a file on PseudoLocalFs of a specific size can be opened and read.
+   * Validate the size of the data read.
+   * Test the read methods of {@link PseudoLocalFs.RandomInputStream}.
+   * @throws Exception
+   */
+  @Test
+  public void testPseudoLocalFsFileSize() throws Exception {
+    long fileSize = 10000;
+    Path path = PseudoLocalFs.generateFilePath("myPsedoFile", fileSize);
+    PseudoLocalFs pfs = new PseudoLocalFs();
+    pfs.create(path);
+
+    // Read 1 byte at a time and validate file size.
+    InputStream in = pfs.open(path, 0);
+    long totalSize = 0;
+
+    while (in.read() >= 0) {
+      ++totalSize;
+    }
+    in.close();
+    assertEquals("File size mismatch with read().", fileSize, totalSize);
+
+    // Read data from PseudoLocalFs-based file into buffer to
+    // validate read(byte[]) and file size.
+    in = pfs.open(path, 0);
+    totalSize = 0;
+    byte[] b = new byte[1024];
+    int bytesRead = in.read(b);
+    while (bytesRead >= 0) {
+      totalSize += bytesRead;
+      bytesRead = in.read(b);
+    }
+    assertEquals("File size mismatch with read(byte[]).", fileSize, totalSize);
+  }
+
+  /**
+   * Validate if file status is obtained for correctly formed file paths on
+   * PseudoLocalFs and also verify if appropriate exception is thrown for
+   * invalid file paths.
+   * @param pfs Pseudo Local File System
+   * @param path file path for which getFileStatus() is to be called
+   * @param shouldSucceed <code>true</code> if getFileStatus() should succeed
+   * @throws IOException
+   */
+  private void validateGetFileStatus(FileSystem pfs, Path path,
+      boolean shouldSucceed) throws IOException {
+    boolean expectedExceptionSeen = false;
+    FileStatus stat = null;
+    try {
+      stat = pfs.getFileStatus(path);
+    } catch(FileNotFoundException e) {
+      expectedExceptionSeen = true;
+    }
+    if (shouldSucceed) {
+      assertFalse("getFileStatus() has thrown Exception for valid file name "
+                  + path, expectedExceptionSeen);
+      assertNotNull("Missing file status for a valid file.", stat);
+
+      // validate fileSize
+      String[] parts = path.toUri().getPath().split("\\.");
+      long expectedFileSize = Long.valueOf(parts[parts.length - 1]);
+      assertEquals("Invalid file size.", expectedFileSize, stat.getLen());
+    } else {
+      assertTrue("getFileStatus() did not throw Exception for invalid file "
+                 + " name " + path, expectedExceptionSeen);
+    }
+  }
+
+  /**
+   * Validate if file creation succeeds for correctly formed file paths on
+   * PseudoLocalFs and also verify if appropriate exception is thrown for
+   * invalid file paths.
+   * @param pfs Pseudo Local File System
+   * @param path file path for which create() is to be called
+   * @param shouldSucceed <code>true</code> if create() should succeed
+   * @throws IOException
+   */
+  private void validateCreate(FileSystem pfs, Path path,
+      boolean shouldSucceed) throws IOException {
+    boolean expectedExceptionSeen = false;
+    try {
+      pfs.create(path);
+    } catch(IOException e) {
+      expectedExceptionSeen = true;
+    }
+    if (shouldSucceed) {
+      assertFalse("create() has thrown Exception for valid file name "
+                  + path, expectedExceptionSeen);
+    } else {
+      assertTrue("create() did not throw Exception for invalid file name "
+                 + path, expectedExceptionSeen);
+    }
+  }
+
+  /**
+   * Validate if opening of file succeeds for correctly formed file paths on
+   * PseudoLocalFs and also verify if appropriate exception is thrown for
+   * invalid file paths.
+   * @param pfs Pseudo Local File System
+   * @param path file path for which open() is to be called
+   * @param shouldSucceed <code>true</code> if open() should succeed
+   * @throws IOException
+   */
+  private void validateOpen(FileSystem pfs, Path path,
+      boolean shouldSucceed) throws IOException {
+    boolean expectedExceptionSeen = false;
+    try {
+      pfs.open(path);
+    } catch(IOException e) {
+      expectedExceptionSeen = true;
+    }
+    if (shouldSucceed) {
+      assertFalse("open() has thrown Exception for valid file name "
+                  + path, expectedExceptionSeen);
+    } else {
+      assertTrue("open() did not throw Exception for invalid file name "
+                 + path, expectedExceptionSeen);
+    }
+  }
+
+  /**
+   * Validate if exists() returns <code>true</code> for correctly formed file
+   * paths on PseudoLocalFs and returns <code>false</code> for improperly
+   * formed file paths.
+   * @param pfs Pseudo Local File System
+   * @param path file path for which exists() is to be called
+   * @param shouldSucceed expected return value of exists(&lt;path&gt;)
+   * @throws IOException
+   */
+  private void validateExists(FileSystem pfs, Path path,
+      boolean shouldSucceed) throws IOException {
+    boolean ret = pfs.exists(path);
+    if (shouldSucceed) {
+      assertTrue("exists() returned false for valid file name " + path, ret);
+    } else {
+      assertFalse("exists() returned true for invalid file name " + path, ret);
+    }
+  }
+
+  /**
+   *  Test Pseudo Local File System methods like getFileStatus(), create(),
+   *  open(), exists() for <li> valid file paths and <li> invalid file paths.
+   * @throws IOException
+   */
+  @Test
+  public void testPseudoLocalFsFileNames() throws IOException {
+    PseudoLocalFs pfs = new PseudoLocalFs();
+    Configuration conf = new Configuration();
+    conf.setClass("fs.pseudo.impl", PseudoLocalFs.class, FileSystem.class);
+
+    Path path = new Path("pseudo:///myPsedoFile.1234");
+    FileSystem testFs = path.getFileSystem(conf);
+    assertEquals("Failed to obtain a pseudo local file system object from path",
+                 pfs.getUri().getScheme(), testFs.getUri().getScheme());
+
+    // Validate PseudoLocalFS operations on URI of some other file system
+    path = new Path("file:///myPsedoFile.12345");
+    validateGetFileStatus(pfs, path, false);
+    validateCreate(pfs, path, false);
+    validateOpen(pfs, path, false);
+    validateExists(pfs, path, false);
+
+    path = new Path("pseudo:///myPsedoFile");//.<fileSize> missing
+    validateGetFileStatus(pfs, path, false);
+    validateCreate(pfs, path, false);
+    validateOpen(pfs, path, false);
+    validateExists(pfs, path, false);
+
+    // thing after final '.' is not a number
+    path = new Path("pseudo:///myPsedoFile.txt");
+    validateGetFileStatus(pfs, path, false);
+    validateCreate(pfs, path, false);
+    validateOpen(pfs, path, false);
+    validateExists(pfs, path, false);
+
+    // Generate valid file name(relative path) and validate operations on it
+    long fileSize = 231456;
+    path = PseudoLocalFs.generateFilePath("my.Psedo.File", fileSize);
+    // Validate the above generateFilePath()
+    assertEquals("generateFilePath() failed.", fileSize,
+                 pfs.validateFileNameFormat(path));
+
+    validateGetFileStatus(pfs, path, true);
+    validateCreate(pfs, path, true);
+    validateOpen(pfs, path, true);
+    validateExists(pfs, path, true);
+
+    // Validate operations on valid qualified path
+    path = new Path("myPsedoFile.1237");
+    path = path.makeQualified(pfs);
+    validateGetFileStatus(pfs, path, true);
+    validateCreate(pfs, path, true);
+    validateOpen(pfs, path, true);
+    validateExists(pfs, path, true);
+  }
+}

Modified: hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/gridmix.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/gridmix.xml?rev=1079237&r1=1079236&r2=1079237&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/gridmix.xml (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/docs/src/documentation/content/xdocs/gridmix.xml Tue Mar  8 05:59:05 2011
@@ -89,15 +89,12 @@ org.apache.hadoop.mapred.gridmix.Gridmix
       100 * 2<sup>30</sup> bytes as input data.
       <code>&lt;iopath&gt;/input</code> is the destination directory for
       generated input data and/or the directory from which input data will be
-      read. HDFS-based Distributed Cache files are generated under the directory
-      <code>&lt;iopath&gt;/distributedCache</code>.
-      Private Distributed Cache files are created under the directory
-      <code>&lt;iopath&gt;/distributedCache/private</code> and public
-      Distributed Cache files are created under the directoty
-      <code>&lt;iopath&gt;/distributedCache/public</code>. If some of the needed
-      Distributed Cache files are already existing in the above directories,
-      then only the remaining non-existing Distributed Cache files are generated
-      when <code>-generate</code> option is specified.</p>
+      read. HDFS-based Distributed Cache files are generated under the
+      distributed cache directory <code>&lt;iopath&gt;/distributedCache</code>.
+      If some of the needed Distributed Cache files are already existing in the
+      distributed cache directory, then only the remaining non-existing
+      Distributed Cache files are generated when <code>-generate</code> option
+      is specified.</p>
       <p>The <code>-users</code> option is used to point to a users-list
       file (see <a href="#usersqueues">Emulating Users and Queues</a>).</p>
       <p>The <code>&lt;trace&gt;</code> parameter is a path to a job trace
@@ -534,8 +531,9 @@ hadoop jar &lt;gridmix-jar&gt; org.apach
     <ul>
     <li>input trace comes from the standard input-stream instead of file, or</li>
     <li><code>&lt;iopath&gt;</code> specified is on local file-system, or</li>
-    <li>any of the ascendant directories of <code>&lt;iopath&gt;</code> till
-    root doesn't have execute permission for others.</li>
+    <li>any of the ascendant directories of the distributed cache directory
+    i.e. <code>&lt;iopath&gt;/distributedCache</code> (including the distributed
+    cache directory) doesn't have execute permission for others.</li>
     </ul>
   </section>
 



Mime
View raw message