hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ama...@apache.org
Subject svn commit: r1185694 [3/7] - in /hadoop/common/branches/branch-0.20-security: ./ src/contrib/ src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/ src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/ src/contrib/gridmix/sr...
Date Tue, 18 Oct 2011 14:45:51 GMT
Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Progressive.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Progressive.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Progressive.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Progressive.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+/**
+ * Used to track progress of tasks.
+ */
+public interface Progressive {
+  public float getProgress();
+}
\ No newline at end of file

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/PseudoLocalFs.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,337 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred.gridmix;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Random;
+import java.net.URI;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PositionedReadable;
+import org.apache.hadoop.fs.Seekable;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.util.Progressable;
+
+/**
+ * Pseudo local file system that generates random data for any file on the fly
+ * instead of storing files on disk. So opening same file multiple times will
+ * not give same file content. There are no directories in this file system
+ * other than the root and all the files are under root i.e. "/". All file URIs
+ * on pseudo local file system should be of the format <code>
+ * pseudo:///&lt;name&gt;.&lt;fileSize&gt;</code> where name is a unique name
+ * and &lt;fileSize&gt; is a number representing the size of the file in bytes.
+ */
+class PseudoLocalFs extends FileSystem {
+  Path home;
+  /**
+   * The creation time and modification time of all files in
+   * {@link PseudoLocalFs} is same.
+   */
+  private static final long TIME = System.currentTimeMillis();
+  private static final String HOME_DIR = "/";
+  private static final long BLOCK_SIZE  = 4 * 1024 * 1024L; // 4 MB
+  private static final int DEFAULT_BUFFER_SIZE = 1024  * 1024; // 1MB
+
+  static final URI NAME = URI.create("pseudo:///");
+
+  PseudoLocalFs() {
+    this(new Path(HOME_DIR));
+  }
+
+  PseudoLocalFs(Path home) {
+    super();
+    this.home = home;
+  }
+
+  @Override
+  public URI getUri() {
+    return NAME;
+  }
+
+  @Override
+  public Path getHomeDirectory() {
+    return home;
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    return getHomeDirectory();
+  }
+
+  /**
+   * Generates a valid pseudo local file path from the given <code>fileId</code>
+   * and <code>fileSize</code>.
+   * @param fileId unique file id string
+   * @param fileSize file size
+   * @return the generated relative path
+   */
+  static Path generateFilePath(String fileId, long fileSize) {
+    return new Path(fileId + "." + fileSize);
+  }
+
+  /**
+   * Creating a pseudo local file is nothing but validating the file path.
+   * Actual data of the file is generated on the fly when client tries to open
+   * the file for reading.
+   * @param path file path to be created
+   */
+  @Override
+  public FSDataOutputStream create(Path path) throws IOException {
+    try {
+      validateFileNameFormat(path);
+    } catch (FileNotFoundException e) {
+      throw new IOException("File creation failed for " + path);
+    }
+    return null;
+  }
+
+  /**
+   * Validate if the path provided is of expected format of Pseudo Local File
+   * System based files.
+   * @param path file path
+   * @return the file size
+   * @throws FileNotFoundException
+   */
+  long validateFileNameFormat(Path path) throws FileNotFoundException {
+    path = path.makeQualified(this);
+    boolean valid = true;
+    long fileSize = 0;
+    if (!path.toUri().getScheme().equals(getUri().getScheme())) {
+      valid = false;
+    } else {
+      String[] parts = path.toUri().getPath().split("\\.");
+      try {
+        fileSize = Long.valueOf(parts[parts.length - 1]);
+        valid = (fileSize >= 0);
+      } catch (NumberFormatException e) {
+        valid = false;
+      }
+    }
+    if (!valid) {
+      throw new FileNotFoundException("File " + path
+          + " does not exist in pseudo local file system");
+    }
+    return fileSize;
+  }
+
+  /**
+   * @See create(Path) for details
+   */
+  @Override
+  public FSDataInputStream open(Path path, int bufferSize) throws IOException {
+    long fileSize = validateFileNameFormat(path);
+    InputStream in = new RandomInputStream(fileSize, bufferSize);
+    return new FSDataInputStream(in);
+  }
+
+  /**
+   * @See create(Path) for details
+   */
+  @Override
+  public FSDataInputStream open(Path path) throws IOException {
+    return open(path, DEFAULT_BUFFER_SIZE);
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path path) throws IOException {
+    long fileSize = validateFileNameFormat(path);
+    return new FileStatus(fileSize, false, 1, BLOCK_SIZE, TIME, path);
+  }
+
+  @Override
+  public boolean exists(Path path) {
+    try{
+      validateFileNameFormat(path);
+    } catch (FileNotFoundException e) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public FSDataOutputStream create(Path path, FsPermission permission,
+      boolean overwrite, int bufferSize, short replication, long blockSize,
+      Progressable progress) throws IOException {
+    return create(path);
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path path) throws FileNotFoundException,
+      IOException {
+    return new FileStatus[] {getFileStatus(path)};
+  }
+
+  /**
+   * Input Stream that generates specified number of random bytes.
+   */
+  static class RandomInputStream extends InputStream
+      implements Seekable, PositionedReadable {
+
+    private final Random r = new Random();
+    private BytesWritable val = null;
+    private int positionInVal = 0;// current position in the buffer 'val'
+
+    private long totalSize = 0;// total number of random bytes to be generated
+    private long curPos = 0;// current position in this stream
+
+    /**
+     * @param size total number of random bytes to be generated in this stream
+     * @param bufferSize the buffer size. An internal buffer array of length
+     * <code>bufferSize</code> is created. If <code>bufferSize</code> is not a
+     * positive number, then a default value of 1MB is used.
+     */
+    RandomInputStream(long size, int bufferSize) {
+      totalSize = size;
+      if (bufferSize <= 0) {
+        bufferSize = DEFAULT_BUFFER_SIZE;
+      }
+      val = new BytesWritable(new byte[bufferSize]);
+    }
+
+    @Override
+    public int read() throws IOException {
+      byte[] b = new byte[1];
+      if (curPos < totalSize) {
+        if (positionInVal < val.getLength()) {// use buffered byte
+          b[0] = val.getBytes()[positionInVal++];
+          ++curPos;
+        } else {// generate data
+          int num = read(b);
+          if (num < 0) {
+            return num;
+          }
+        }
+      } else {
+        return -1;
+      }
+      return b[0];
+    }
+
+    @Override
+    public int read(byte[] bytes) throws IOException {
+      return read(bytes, 0, bytes.length);
+    }
+
+    @Override
+    public int read(byte[] bytes, int off, int len) throws IOException {
+      if (curPos == totalSize) {
+        return -1;// EOF
+      }
+      int numBytes = len;
+      if (numBytes > (totalSize - curPos)) {// position in file is close to EOF
+        numBytes = (int)(totalSize - curPos);
+      }
+      if (numBytes > (val.getLength() - positionInVal)) {
+        // need to generate data into val
+        r.nextBytes(val.getBytes());
+        positionInVal = 0;
+      }
+
+      System.arraycopy(val.getBytes(), positionInVal, bytes, off, numBytes);
+      curPos += numBytes;
+      positionInVal += numBytes;
+      return numBytes;
+    }
+
+    @Override
+    public int available() {
+      return (int)(val.getLength() - positionInVal);
+    }
+
+    @Override
+    public int read(long position, byte[] buffer, int offset, int length)
+        throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void readFully(long position, byte[] buffer, int offset, int length)
+        throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    /**
+     * Get the current position in this stream/pseudo-file
+     * @return the position in this stream/pseudo-file
+     * @throws IOException
+     */
+    @Override
+    public long getPos() throws IOException {
+      return curPos;
+    }
+
+    @Override
+    public void seek(long pos) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean seekToNewSource(long targetPos) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  @Override
+  public FSDataOutputStream append(Path path, int bufferSize,
+      Progressable progress) throws IOException {
+    throw new UnsupportedOperationException("Append is not supported"
+        + " in pseudo local file system.");
+  }
+
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    throw new UnsupportedOperationException("Mkdirs is not supported"
+        + " in pseudo local file system.");
+  }
+
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+    throw new UnsupportedOperationException("Rename is not supported"
+        + " in pseudo local file system.");
+  }
+
+  @Override
+  public boolean delete(Path path, boolean recursive) {
+    throw new UnsupportedOperationException("File deletion is not supported "
+        + "in pseudo local file system.");
+  }
+
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    throw new UnsupportedOperationException("SetWorkingDirectory "
+        + "is not supported in pseudo local file system.");
+  }
+
+  @Override
+  public boolean delete(Path f) throws IOException {//dummy implementation
+    return true;
+  }
+}

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RandomTextDataGenerator.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.lang.RandomStringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * A random text generator. The words are simply sequences of alphabets.
+ */
+class RandomTextDataGenerator {
+  static final Log LOG = LogFactory.getLog(RandomTextDataGenerator.class);
+  
+  /**
+   * Configuration key for random text data generator's list size.
+   */
+  static final String GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE = 
+    "gridmix.datagenerator.randomtext.listsize";
+  
+  /**
+   * Configuration key for random text data generator's word size.
+   */
+  static final String GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE = 
+    "gridmix.datagenerator.randomtext.wordsize";
+  
+  /**
+   * Default random text data generator's list size.
+   */
+  static final int DEFAULT_LIST_SIZE = 200;
+  
+  /**
+   * Default random text data generator's word size.
+   */
+  static final int DEFAULT_WORD_SIZE = 10;
+  
+  /**
+   * Default random text data generator's seed.
+   */
+  static final long DEFAULT_SEED = 0L;
+  
+  /**
+   * A list of random words
+   */
+  private String[] words;
+  private Random random;
+  
+  /**
+   * Constructor for {@link RandomTextDataGenerator} with default seed.
+   * @param size the total number of words to consider.
+   * @param wordSize Size of each word
+   */
+  RandomTextDataGenerator(int size, int wordSize) {
+    this(size, DEFAULT_SEED , wordSize);
+  }
+  
+  /**
+   * Constructor for {@link RandomTextDataGenerator}.
+   * @param size the total number of words to consider.
+   * @param seed Random number generator seed for repeatability
+   * @param wordSize Size of each word
+   */
+  RandomTextDataGenerator(int size, Long seed, int wordSize) {
+    random = new Random(seed);
+    words = new String[size];
+    
+    //TODO change the default with the actual stats
+    //TODO do u need varied sized words?
+    for (int i = 0; i < size; ++i) {
+      words[i] = 
+        RandomStringUtils.random(wordSize, 0, 0, true, false, null, random);
+    }
+  }
+  
+  /**
+   * Get the configured random text data generator's list size.
+   */
+  static int getRandomTextDataGeneratorListSize(Configuration conf) {
+    return conf.getInt(GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, DEFAULT_LIST_SIZE);
+  }
+  
+  /**
+   * Set the random text data generator's list size.
+   */
+  static void setRandomTextDataGeneratorListSize(Configuration conf, 
+                                                 int listSize) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Random text data generator is configured to use a dictionary " 
+                + " with " + listSize + " words");
+    }
+    conf.setInt(GRIDMIX_DATAGEN_RANDOMTEXT_LISTSIZE, listSize);
+  }
+  
+  /**
+   * Get the configured random text data generator word size.
+   */
+  static int getRandomTextDataGeneratorWordSize(Configuration conf) {
+    return conf.getInt(GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, DEFAULT_WORD_SIZE);
+  }
+  
+  /**
+   * Set the random text data generator word size.
+   */
+  static void setRandomTextDataGeneratorWordSize(Configuration conf, 
+                                                 int wordSize) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Random text data generator is configured to use a dictionary " 
+                + " with words of length " + wordSize);
+    }
+    conf.setInt(GRIDMIX_DATAGEN_RANDOMTEXT_WORDSIZE, wordSize);
+  }
+  
+  /**
+   * Returns a randomly selected word from a list of random words.
+   */
+  String getRandomWord() {
+    int index = random.nextInt(words.length);
+    return words[index];
+  }
+  
+  /**
+   * This is mainly for testing.
+   */
+  List<String> getRandomWords() {
+    return Arrays.asList(words);
+  }
+}

Modified: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java?rev=1185694&r1=1185693&r2=1185694&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/RoundRobinUserResolver.java Tue Oct 18 14:45:48 2011
@@ -38,12 +38,18 @@ public class RoundRobinUserResolver impl
 
   private int uidx = 0;
   private List<UserGroupInformation> users = Collections.emptyList();
-  private final HashMap<UserGroupInformation,UserGroupInformation> usercache =
-    new HashMap<UserGroupInformation,UserGroupInformation>();
+
+  /**
+   *  Mapping between user names of original cluster and UGIs of proxy users of
+   *  simulated cluster
+   */
+  private final HashMap<String,UserGroupInformation> usercache =
+      new HashMap<String,UserGroupInformation>();
   
   /**
-   * Userlist assumes one UGI per line, each UGI matching
-   * &lt;username&gt;,&lt;group&gt;[,group]*
+   * Userlist assumes one user per line.
+   * Each line in users-list-file is of the form &lt;username&gt;[,group]* 
+   * <br> Group names are ignored(they are not parsed at all).
    */
   private List<UserGroupInformation> parseUserList(
       URI userUri, Configuration conf) throws IOException {
@@ -54,64 +60,78 @@ public class RoundRobinUserResolver impl
     final Path userloc = new Path(userUri.toString());
     final Text rawUgi = new Text();
     final FileSystem fs = userloc.getFileSystem(conf);
-    final ArrayList<UserGroupInformation> ret = new ArrayList();
+    final ArrayList<UserGroupInformation> ugiList =
+        new ArrayList<UserGroupInformation>();
 
     LineReader in = null;
     try {
-      final ArrayList<String> groups = new ArrayList();
       in = new LineReader(fs.open(userloc));
-      while (in.readLine(rawUgi) > 0) {
+      while (in.readLine(rawUgi) > 0) {//line is of the form username[,group]*
+        // e is end position of user name in this line
         int e = rawUgi.find(",");
-        if (e <= 0) {
+        if (rawUgi.getLength() == 0 || e == 0) {
           throw new IOException("Missing username: " + rawUgi);
         }
+        if (e == -1) {
+          e = rawUgi.getLength();
+        }
         final String username = Text.decode(rawUgi.getBytes(), 0, e);
-        int s = e;
-        while ((e = rawUgi.find(",", ++s)) != -1) {
-          groups.add(Text.decode(rawUgi.getBytes(), s, e - s));
-          s = e;
+        UserGroupInformation ugi = null;
+        try {
+          ugi = UserGroupInformation.createProxyUser(username,
+                    UserGroupInformation.getLoginUser());
+        } catch (IOException ioe) {
+          LOG.error("Error while creating a proxy user " ,ioe);
         }
-        groups.add(Text.decode(rawUgi.getBytes(), s, rawUgi.getLength() - s));
-        if (groups.size() == 0) {
-          throw new IOException("Missing groups: " + rawUgi);
+        if (ugi != null) {
+          ugiList.add(ugi);
         }
-        ret.add(UserGroupInformation.createRemoteUser(username));
+        // No need to parse groups, even if they exist. Go to next line
       }
     } finally {
       if (in != null) {
         in.close();
       }
     }
-    return ret;
+    return ugiList;
   }
 
   @Override
   public synchronized boolean setTargetUsers(URI userloc, Configuration conf)
       throws IOException {
+    uidx = 0;
     users = parseUserList(userloc, conf);
     if (users.size() == 0) {
-      throw new IOException("Empty user list");
+      throw new IOException(buildEmptyUsersErrorMsg(userloc));
     }
-    usercache.keySet().retainAll(users);
+    usercache.clear();
     return true;
   }
 
+  static String buildEmptyUsersErrorMsg(URI userloc) {
+    return "Empty user list is not allowed for RoundRobinUserResolver. Provided"
+    + " user resource URI '" + userloc + "' resulted in an empty user list.";
+  }
+
   @Override
   public synchronized UserGroupInformation getTargetUgi(
       UserGroupInformation ugi) {
-    UserGroupInformation ret = usercache.get(ugi);
-    if (null == ret) {
-      ret = users.get(uidx++ % users.size());
-      usercache.put(ugi, ret);
+    // UGI of proxy user
+    UserGroupInformation targetUGI = usercache.get(ugi.getUserName());
+    if (targetUGI == null) {
+      targetUGI = users.get(uidx++ % users.size());
+      usercache.put(ugi.getUserName(), targetUGI);
     }
-    UserGroupInformation val = null;
-    try {
-      val = UserGroupInformation.createProxyUser(
-        ret.getUserName(), UserGroupInformation.getLoginUser());
-    } catch (IOException e) {
-      LOG.error("Error while creating the proxy user " ,e);
-    }
-    return val;
+    return targetUGI;
   }
 
+  /**
+   * {@inheritDoc}
+   * <p>
+   * {@link RoundRobinUserResolver} needs to map the users in the
+   * trace to the provided list of target users. So user list is needed.
+   */
+  public boolean needsTargetUsersList() {
+    return true;
+  }
 }

Modified: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SleepJob.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SleepJob.java?rev=1185694&r1=1185693&r2=1185694&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SleepJob.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SleepJob.java Tue Oct 18 14:45:48 2011
@@ -94,6 +94,11 @@ public class SleepJob extends GridmixJob
   }
 
   @Override
+  protected boolean canEmulateCompression() {
+    return false;
+  }
+  
+  @Override
   public Job call()
     throws IOException, InterruptedException, ClassNotFoundException {
     ugi.doAs(

Modified: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java?rev=1185694&r1=1185693&r2=1185694&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Statistics.java Tue Oct 18 14:45:48 2011
@@ -189,9 +189,10 @@ public class Statistics implements Compo
         try {
           jobCompleted.await(jtPollingInterval, TimeUnit.MILLISECONDS);
         } catch (InterruptedException ie) {
-          LOG.error(
-            "Statistics interrupt while waiting for polling " + ie.getCause(),
-            ie);
+          if (!shutdown) {
+            LOG.error("Statistics interrupt while waiting for completion of "
+                + "a job.", ie);
+          }
           return;
         } finally {
           lock.unlock();

Modified: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java?rev=1185694&r1=1185693&r2=1185694&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/SubmitterUserResolver.java Tue Oct 18 14:45:48 2011
@@ -32,13 +32,13 @@ public class SubmitterUserResolver imple
   
   private UserGroupInformation ugi = null;
 
-  public SubmitterUserResolver() {
+  public SubmitterUserResolver() throws IOException {
     LOG.info(" Current user resolver is SubmitterUserResolver ");
+    ugi = UserGroupInformation.getLoginUser();
   }
 
   public synchronized boolean setTargetUsers(URI userdesc, Configuration conf)
       throws IOException {
-    ugi = UserGroupInformation.getLoginUser();
     return false;
   }
 
@@ -47,4 +47,13 @@ public class SubmitterUserResolver imple
     return this.ugi;
   }
 
+  /**
+   * {@inheritDoc}
+   * <p>
+   * Since {@link SubmitterUserResolver} returns the user name who is running
+   * gridmix, it doesn't need a target list of users.
+   */
+  public boolean needsTargetUsersList() {
+    return false;
+  }
 }

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Summarizer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Summarizer.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Summarizer.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/Summarizer.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.gridmix.GenerateData.DataStatistics;
+
+/**
+ * Summarizes various aspects of a {@link Gridmix} run.
+ */
+class Summarizer {
+  private ExecutionSummarizer executionSummarizer;
+  private ClusterSummarizer clusterSummarizer;
+  protected static final String NA = "N/A";
+  
+  Summarizer() {
+    this(new String[]{NA});
+  }
+  
+  Summarizer(String[] args) {
+    executionSummarizer = new ExecutionSummarizer(args);
+    clusterSummarizer = new ClusterSummarizer();
+  }
+  
+  ExecutionSummarizer getExecutionSummarizer() {
+    return executionSummarizer;
+  }
+  
+  ClusterSummarizer getClusterSummarizer() {
+    return clusterSummarizer;
+  }
+  
+  void start(Configuration conf) {
+    executionSummarizer.start(conf);
+    clusterSummarizer.start(conf);
+  }
+  
+  /**
+   * This finalizes the summarizer.
+   */
+  @SuppressWarnings("unchecked")
+  void finalize(JobFactory factory, String path, long size, 
+                UserResolver resolver, DataStatistics stats, Configuration conf)
+  throws IOException {
+    executionSummarizer.finalize(factory, path, size, resolver, stats, conf);
+  }
+  
+  /**
+   * Summarizes the current {@link Gridmix} run and the cluster used. 
+   */
+  @Override
+  public String toString() {
+    StringBuilder builder = new StringBuilder();
+    builder.append(executionSummarizer.toString());
+    builder.append(clusterSummarizer.toString());
+    return builder.toString();
+  }
+}
\ No newline at end of file

Modified: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/UserResolver.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/UserResolver.java?rev=1185694&r1=1185693&r2=1185694&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/UserResolver.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/UserResolver.java Tue Oct 18 14:45:48 2011
@@ -19,29 +19,27 @@ package org.apache.hadoop.mapred.gridmix
 
 import java.io.IOException;
 import java.net.URI;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.util.LineReader;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 
 /**
  * Maps users in the trace to a set of valid target users on the test cluster.
  */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
 public interface UserResolver {
 
   /**
    * Configure the user map given the URI and configuration. The resolver's
    * contract will define how the resource will be interpreted, but the default
    * will typically interpret the URI as a {@link org.apache.hadoop.fs.Path}
-   * listing target users. 
-   * @param userdesc URI (possibly null) from which user information may be
-   * loaded per the subclass contract.
+   * listing target users.
+   * This method should be called only if {@link #needsTargetUsersList()}
+   * returns true.
+   * @param userdesc URI from which user information may be loaded per the
+   * subclass contract.
    * @param conf The tool configuration.
    * @return true if the resource provided was used in building the list of
    * target users
@@ -55,4 +53,13 @@ public interface UserResolver {
    */
   public UserGroupInformation getTargetUgi(UserGroupInformation ugi);
 
+  /**
+   * Indicates whether this user resolver needs a list of target users to be
+   * provided.
+   *
+   * @return true if a list of target users is to be provided for this
+   * user resolver
+   */
+  public boolean needsTargetUsersList();
+
 }

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/CumulativeCpuUsageEmulatorPlugin.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,315 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.emulators.resourceusage;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.gridmix.Progressive;
+import org.apache.hadoop.util.ResourceCalculatorPlugin;
+import org.apache.hadoop.tools.rumen.ResourceUsageMetrics;
+
+/**
+ * <p>A {@link ResourceUsageEmulatorPlugin} that emulates the cumulative CPU 
+ * usage by performing certain CPU intensive operations. Performing such CPU 
+ * intensive operations essentially uses up some CPU. Every 
+ * {@link ResourceUsageEmulatorPlugin} is configured with a feedback module i.e 
+ * a {@link ResourceCalculatorPlugin}, to monitor the resource usage.</p>
+ * 
+ * <p>{@link CumulativeCpuUsageEmulatorPlugin} emulates the CPU usage in steps. 
+ * The frequency of emulation can be configured via 
+ * {@link #CPU_EMULATION_PROGRESS_INTERVAL}.
+ * CPU usage values are matched via emulation only on the interval boundaries.
+ * </p>
+ *  
+ * {@link CumulativeCpuUsageEmulatorPlugin} is a wrapper program for managing 
+ * the CPU usage emulation feature. It internally uses an emulation algorithm 
+ * (called as core and described using {@link CpuUsageEmulatorCore}) for 
+ * performing the actual emulation. Multiple calls to this core engine should 
+ * use up some amount of CPU.<br>
+ * 
+ * <p>{@link CumulativeCpuUsageEmulatorPlugin} provides a calibration feature 
+ * via {@link #initialize(Configuration, ResourceUsageMetrics, 
+ *                        ResourceCalculatorPlugin, Progressive)} to calibrate 
+ *  the plugin and its core for the underlying hardware. As a result of 
+ *  calibration, every call to the emulation engine's core should roughly use up
+ *  1% of the total usage value to be emulated. This makes sure that the 
+ *  underlying hardware is profiled before use and that the plugin doesn't 
+ *  accidently overuse the CPU. With 1% as the unit emulation target value for 
+ *  the core engine, there will be roughly 100 calls to the engine resulting in 
+ *  roughly 100 calls to the feedback (resource usage monitor) module. 
+ *  Excessive usage of the feedback module is discouraged as 
+ *  it might result into excess CPU usage resulting into no real CPU emulation.
+ *  </p>
+ */
+public class CumulativeCpuUsageEmulatorPlugin 
+implements ResourceUsageEmulatorPlugin {
+  protected CpuUsageEmulatorCore emulatorCore;
+  private ResourceCalculatorPlugin monitor;
+  private Progressive progress;
+  private boolean enabled = true;
+  private float emulationInterval; // emulation interval
+  private long targetCpuUsage = 0;
+  private float lastSeenProgress = 0;
+  private long lastSeenCpuUsageCpuUsage = 0;
+  
+  // Configuration parameters
+  public static final String CPU_EMULATION_PROGRESS_INTERVAL = 
+    "gridmix.emulators.resource-usage.cpu.emulation-interval";
+  private static final float DEFAULT_EMULATION_FREQUENCY = 0.1F; // 10 times
+
+  /**
+   * This is the core CPU usage emulation algorithm. This is the core engine
+   * which actually performs some CPU intensive operations to consume some
+   * amount of CPU. Multiple calls of {@link #compute()} should help the 
+   * plugin emulate the desired level of CPU usage. This core engine can be
+   * calibrated using the {@link #calibrate(ResourceCalculatorPlugin, long)}
+   * API to suit the underlying hardware better. It also can be used to optimize
+   * the emulation cycle.
+   */
+  public interface CpuUsageEmulatorCore {
+    /**
+     * Performs some computation to use up some CPU.
+     */
+    public void compute();
+    
+    /**
+     * Allows the core to calibrate itself.
+     */
+    public void calibrate(ResourceCalculatorPlugin monitor, 
+                          long totalCpuUsage);
+  }
+  
+  /**
+   * This is the core engine to emulate the CPU usage. The only responsibility 
+   * of this class is to perform certain math intensive operations to make sure 
+   * that some desired value of CPU is used.
+   */
+  public static class DefaultCpuUsageEmulator implements CpuUsageEmulatorCore {
+    // number of times to loop for performing the basic unit computation
+    private int numIterations;
+    private final Random random;
+    
+    /**
+     * This is to fool the JVM and make it think that we need the value 
+     * stored in the unit computation i.e {@link #compute()}. This will prevent
+     * the JVM from optimizing the code.
+     */
+    protected double returnValue;
+    
+    /**
+     * Initialized the {@link DefaultCpuUsageEmulator} with default values. 
+     * Note that the {@link DefaultCpuUsageEmulator} should be calibrated 
+     * (see {@link #calibrate(ResourceCalculatorPlugin, long)}) when initialized
+     * using this constructor.
+     */
+    public DefaultCpuUsageEmulator() {
+      this(-1);
+    }
+    
+    DefaultCpuUsageEmulator(int numIterations) {
+      this.numIterations = numIterations;
+      random = new Random();
+    }
+    
+    /**
+     * This will consume some desired level of CPU. This API will try to use up
+     * 'X' percent of the target cumulative CPU usage. Currently X is set to 
+     * 10%.
+     */
+    public void compute() {
+      for (int i = 0; i < numIterations; ++i) {
+        performUnitComputation();
+      }
+    }
+    
+    // Perform unit computation. The complete CPU emulation will be based on 
+    // multiple invocations to this unit computation module.
+    protected void performUnitComputation() {
+      //TODO can this be configurable too. Users/emulators should be able to 
+      // pick and choose what MATH operations to run.
+      // Example :
+      //           BASIC : ADD, SUB, MUL, DIV
+      //           ADV   : SQRT, SIN, COSIN..
+      //           COMPO : (BASIC/ADV)*
+      // Also define input generator. For now we can use the random number 
+      // generator. Later this can be changed to accept multiple sources.
+      
+      int randomData = random.nextInt();
+      int randomDataCube = randomData * randomData * randomData;
+      double randomDataCubeRoot = Math.cbrt(randomData);
+      returnValue = Math.log(Math.tan(randomDataCubeRoot 
+                                      * Math.exp(randomDataCube)) 
+                             * Math.sqrt(randomData));
+    }
+    
+    /**
+     * This will calibrate the algorithm such that a single invocation of
+     * {@link #compute()} emulates roughly 1% of the total desired resource 
+     * usage value.
+     */
+    public void calibrate(ResourceCalculatorPlugin monitor, 
+                          long totalCpuUsage) {
+      long initTime = monitor.getProcResourceValues().getCumulativeCpuTime();
+      
+      long defaultLoopSize = 0;
+      long finalTime = initTime;
+      
+      //TODO Make this configurable
+      while (finalTime - initTime < 100) { // 100 ms
+        ++defaultLoopSize;
+        performUnitComputation(); //perform unit computation
+        finalTime = monitor.getProcResourceValues().getCumulativeCpuTime();
+      }
+      
+      long referenceRuntime = finalTime - initTime;
+      
+      // time for one loop = (final-time - init-time) / total-loops
+      float timePerLoop = ((float)referenceRuntime) / defaultLoopSize;
+      
+      // compute the 1% of the total CPU usage desired
+      //TODO Make this configurable
+      long onePercent = totalCpuUsage / 100;
+      
+      // num-iterations for 1% = (total-desired-usage / 100) / time-for-one-loop
+      numIterations = Math.max(1, (int)((float)onePercent/timePerLoop));
+      
+      System.out.println("Calibration done. Basic computation runtime : " 
+          + timePerLoop + " milliseconds. Optimal number of iterations (1%): " 
+          + numIterations);
+    }
+  }
+  
+  public CumulativeCpuUsageEmulatorPlugin() {
+    this(new DefaultCpuUsageEmulator());
+  }
+  
+  /**
+   * For testing.
+   */
+  public CumulativeCpuUsageEmulatorPlugin(CpuUsageEmulatorCore core) {
+    emulatorCore = core;
+  }
+  
+  // Note that this weighing function uses only the current progress. In future,
+  // this might depend on progress, emulation-interval and expected target.
+  private float getWeightForProgressInterval(float progress) {
+    // we want some kind of exponential growth function that gives less weight
+    // on lower progress boundaries but high (exact emulation) near progress 
+    // value of 1.
+    // so here is how the current growth function looks like
+    //    progress    weight
+    //      0.1       0.0001
+    //      0.2       0.0016
+    //      0.3       0.0081
+    //      0.4       0.0256
+    //      0.5       0.0625
+    //      0.6       0.1296
+    //      0.7       0.2401
+    //      0.8       0.4096
+    //      0.9       0.6561
+    //      1.0       1.000
+    
+    return progress * progress * progress * progress;
+  }
+  
+  @Override
+  //TODO Multi-threading for speedup?
+  public void emulate() throws IOException, InterruptedException {
+    if (enabled) {
+      float currentProgress = progress.getProgress();
+      if (lastSeenProgress < currentProgress 
+          && ((currentProgress - lastSeenProgress) >= emulationInterval
+              || currentProgress == 1)) {
+        // Estimate the final cpu usage
+        //
+        //   Consider the following
+        //     Cl/Cc/Cp : Last/Current/Projected Cpu usage
+        //     Pl/Pc/Pp : Last/Current/Projected progress
+        //   Then
+        //     (Cp-Cc)/(Pp-Pc) = (Cc-Cl)/(Pc-Pl)
+        //   Solving this for Cp, we get
+        //     Cp = Cc + (1-Pc)*(Cc-Cl)/Pc-Pl)
+        //   Note that (Cc-Cl)/(Pc-Pl) is termed as 'rate' in the following 
+        //   section
+        
+        long currentCpuUsage = 
+          monitor.getProcResourceValues().getCumulativeCpuTime();
+        // estimate the cpu usage rate
+        float rate = (currentCpuUsage - lastSeenCpuUsageCpuUsage)
+                     / (currentProgress - lastSeenProgress);
+        long projectedUsage = 
+          currentCpuUsage + (long)((1 - currentProgress) * rate);
+        
+        if (projectedUsage < targetCpuUsage) {
+          // determine the correction factor between the current usage and the
+          // expected usage and add some weight to the target
+          long currentWeighedTarget = 
+            (long)(targetCpuUsage 
+                   * getWeightForProgressInterval(currentProgress));
+          
+          while (monitor.getProcResourceValues().getCumulativeCpuTime() 
+                 < currentWeighedTarget) {
+            emulatorCore.compute();
+            // sleep for 100ms
+            try {
+              Thread.sleep(100);
+            } catch (InterruptedException ie) {
+              String message = 
+                "CumulativeCpuUsageEmulatorPlugin got interrupted. Exiting.";
+              throw new RuntimeException(message);
+            }
+          }
+        }
+        
+        // set the last seen progress
+        lastSeenProgress = progress.getProgress();
+        // set the last seen usage
+        lastSeenCpuUsageCpuUsage = 
+          monitor.getProcResourceValues().getCumulativeCpuTime();
+      }
+    }
+  }
+
+  @Override
+  public void initialize(Configuration conf, ResourceUsageMetrics metrics,
+                         ResourceCalculatorPlugin monitor,
+                         Progressive progress) {
+    // get the target CPU usage
+    targetCpuUsage = metrics.getCumulativeCpuUsage();
+    if (targetCpuUsage <= 0 ) {
+      enabled = false;
+      return;
+    } else {
+      enabled = true;
+    }
+    
+    this.monitor = monitor;
+    this.progress = progress;
+    emulationInterval =  conf.getFloat(CPU_EMULATION_PROGRESS_INTERVAL, 
+                                       DEFAULT_EMULATION_FREQUENCY);
+    
+    // calibrate the core cpu-usage utility
+    emulatorCore.calibrate(monitor, targetCpuUsage);
+    
+    // initialize the states
+    lastSeenProgress = 0;
+    lastSeenCpuUsageCpuUsage = 0;
+  }
+}
\ No newline at end of file

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.emulators.resourceusage;
+
+import java.io.IOException;
+
+import org.apache.hadoop.mapred.gridmix.Progressive;
+import org.apache.hadoop.util.ResourceCalculatorPlugin;
+import org.apache.hadoop.tools.rumen.ResourceUsageMetrics;
+import org.apache.hadoop.conf.Configuration;
+
+/**
+ * <p>Each resource to be emulated should have a corresponding implementation 
+ * class that implements {@link ResourceUsageEmulatorPlugin}.</p>
+ * <br><br>
+ * {@link ResourceUsageEmulatorPlugin} will be configured using the 
+ * {@link #initialize(Configuration, ResourceUsageMetrics, 
+ *                    ResourceCalculatorPlugin, Progressive)} call.
+ * Every 
+ * {@link ResourceUsageEmulatorPlugin} is also configured with a feedback module
+ * i.e a {@link ResourceCalculatorPlugin}, to monitor the current resource 
+ * usage. {@link ResourceUsageMetrics} decides the final resource usage value to
+ * emulate. {@link Progressive} keeps track of the task's progress.</p>
+ * 
+ * <br><br>
+ * 
+ * For configuring GridMix to load and and use a resource usage emulator, 
+ * see {@link ResourceUsageMatcher}. 
+ */
+public interface ResourceUsageEmulatorPlugin {
+  /**
+   * Initialize the plugin. This might involve
+   *   - initializing the variables
+   *   - calibrating the plugin
+   */
+  void initialize(Configuration conf, ResourceUsageMetrics metrics, 
+                  ResourceCalculatorPlugin monitor,
+                  Progressive progress);
+
+  /**
+   * Emulate the resource usage to match the usage target. The plugin can use
+   * the given {@link ResourceCalculatorPlugin} to query for the current 
+   * resource usage.
+   * @throws IOException
+   * @throws InterruptedException
+   */
+  void emulate() throws IOException, InterruptedException;
+}

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageMatcher.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.emulators.resourceusage;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.gridmix.Progressive;
+import org.apache.hadoop.util.ResourceCalculatorPlugin;
+import org.apache.hadoop.tools.rumen.ResourceUsageMetrics;
+import org.apache.hadoop.util.ReflectionUtils;
+
+/**
+ * <p>This is the driver class for managing all the resource usage emulators.
+ * {@link ResourceUsageMatcher} expects a comma separated list of 
+ * {@link ResourceUsageEmulatorPlugin} implementations specified using 
+ * {@link #RESOURCE_USAGE_EMULATION_PLUGINS} as the configuration parameter.</p>
+ * 
+ * <p>Note that the order in which the emulators are invoked is same as the 
+ * order in which they are configured.
+ */
+public class ResourceUsageMatcher {
+  /**
+   * Configuration key to set resource usage emulators.
+   */
+  public static final String RESOURCE_USAGE_EMULATION_PLUGINS =
+    "gridmix.emulators.resource-usage.plugins";
+  
+  private List<ResourceUsageEmulatorPlugin> emulationPlugins = 
+    new ArrayList<ResourceUsageEmulatorPlugin>();
+  
+  /**
+   * Configure the {@link ResourceUsageMatcher} to load the configured plugins
+   * and initialize them.
+   */
+  @SuppressWarnings("unchecked")
+  public void configure(Configuration conf, ResourceCalculatorPlugin monitor, 
+                        ResourceUsageMetrics metrics, Progressive progress) {
+    Class[] plugins = conf.getClasses(RESOURCE_USAGE_EMULATION_PLUGINS);
+//, null, ResourceUsageEmulatorPlugin.class);
+    if (plugins == null) {
+      System.out.println("No resource usage emulator plugins configured.");
+    } else {
+      for (Class<? extends ResourceUsageEmulatorPlugin> plugin : plugins) {
+        if (plugin != null) {
+          emulationPlugins.add(ReflectionUtils.newInstance(plugin, conf));
+        }
+      }
+    }
+
+    // initialize the emulators once all the configured emulator plugins are
+    // loaded
+    for (ResourceUsageEmulatorPlugin emulator : emulationPlugins) {
+      emulator.initialize(conf, metrics, monitor, progress);
+    }
+  }
+  
+  public void matchResourceUsage() throws Exception {
+    for (ResourceUsageEmulatorPlugin emulator : emulationPlugins) {
+      // match the resource usage
+      emulator.emulate();
+    }
+  }
+}

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java?rev=1185694&view=auto
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java (added)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/TotalHeapUsageEmulatorPlugin.java Tue Oct 18 14:45:48 2011
@@ -0,0 +1,258 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred.gridmix.emulators.resourceusage;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.gridmix.Progressive;
+import org.apache.hadoop.util.ResourceCalculatorPlugin;
+import org.apache.hadoop.tools.rumen.ResourceUsageMetrics;
+
+/**
+ * <p>A {@link ResourceUsageEmulatorPlugin} that emulates the total heap 
+ * usage by loading the JVM heap memory. Adding smaller chunks of data to the 
+ * heap will essentially use up some heap space thus forcing the JVM to expand 
+ * its heap and thus resulting into increase in the heap usage.</p>
+ * 
+ * <p>{@link TotalHeapUsageEmulatorPlugin} emulates the heap usage in steps. 
+ * The frequency of emulation can be configured via 
+ * {@link #HEAP_EMULATION_PROGRESS_INTERVAL}.
+ * Heap usage values are matched via emulation only at specific interval 
+ * boundaries.
+ * </p>
+ *  
+ * {@link TotalHeapUsageEmulatorPlugin} is a wrapper program for managing 
+ * the heap usage emulation feature. It internally uses an emulation algorithm 
+ * (called as core and described using {@link HeapUsageEmulatorCore}) for 
+ * performing the actual emulation. Multiple calls to this core engine should 
+ * use up some amount of heap.
+ */
+public class TotalHeapUsageEmulatorPlugin 
+implements ResourceUsageEmulatorPlugin {
+  // Configuration parameters
+  //  the core engine to emulate heap usage
+  protected HeapUsageEmulatorCore emulatorCore;
+  //  the progress bar
+  private Progressive progress;
+  //  decides if this plugin can emulate heap usage or not
+  private boolean enabled = true;
+  //  the progress boundaries/interval where emulation should be done
+  private float emulationInterval;
+  //  target heap usage to emulate
+  private long targetHeapUsageInMB = 0;
+  
+  /**
+   * The frequency (based on task progress) with which memory-emulation code is
+   * run. If the value is set to 0.1 then the emulation will happen at 10% of 
+   * the task's progress. The default value of this parameter is 
+   * {@link #DEFAULT_EMULATION_PROGRESS_INTERVAL}.
+   */
+  public static final String HEAP_EMULATION_PROGRESS_INTERVAL = 
+    "gridmix.emulators.resource-usage.heap.emulation-interval";
+  
+  // Default value for emulation interval
+  private static final float DEFAULT_EMULATION_PROGRESS_INTERVAL = 0.1F; // 10 %
+
+  private float prevEmulationProgress = 0F;
+  
+  /**
+   * The minimum buffer reserved for other non-emulation activities.
+   */
+  public static final String MIN_HEAP_FREE_RATIO = 
+    "gridmix.emulators.resource-usage.heap.min-free-ratio";
+  
+  private float minFreeHeapRatio;
+  
+  private static final float DEFAULT_MIN_FREE_HEAP_RATIO = 0.3F;
+  
+  /**
+   * Determines the unit increase per call to the core engine's load API. This
+   * is expressed as a percentage of the difference between the expected total 
+   * heap usage and the current usage. 
+   */
+  public static final String HEAP_LOAD_RATIO = 
+    "gridmix.emulators.resource-usage.heap.load-ratio";
+  
+  private float heapLoadRatio;
+  
+  private static final float DEFAULT_HEAP_LOAD_RATIO = 0.1F;
+  
+  public static int ONE_MB = 1024 * 1024;
+  
+  /**
+   * Defines the core heap usage emulation algorithm. This engine is expected
+   * to perform certain memory intensive operations to consume some
+   * amount of heap. {@link #load(long)} should load the current heap and 
+   * increase the heap usage by the specified value. This core engine can be 
+   * initialized using the {@link #initialize(ResourceCalculatorPlugin, long)} 
+   * API to suit the underlying hardware better.
+   */
+  public interface HeapUsageEmulatorCore {
+    /**
+     * Performs some memory intensive operations to use up some heap.
+     */
+    public void load(long sizeInMB);
+    
+    /**
+     * Initialize the core.
+     */
+    public void initialize(ResourceCalculatorPlugin monitor, 
+                           long totalHeapUsageInMB);
+    
+    /**
+     * Reset the resource usage
+     */
+    public void reset();
+  }
+  
+  /**
+   * This is the core engine to emulate the heap usage. The only responsibility 
+   * of this class is to perform certain memory intensive operations to make 
+   * sure that some desired value of heap is used.
+   */
+  public static class DefaultHeapUsageEmulator 
+  implements HeapUsageEmulatorCore {
+    // store the unit loads in a list
+    protected static ArrayList<Object> heapSpace = new ArrayList<Object>();
+    
+    /**
+     * Increase heap usage by current process by the given amount.
+     * This is done by creating objects each of size 1MB.
+     */
+    public void load(long sizeInMB) {
+      for (long i = 0; i < sizeInMB; ++i) {
+        // Create another String object of size 1MB
+        heapSpace.add((Object)new byte[ONE_MB]);
+      }
+    }
+    
+    /**
+     * This will initialize the core and check if the core can emulate the 
+     * desired target on the underlying hardware.
+     */
+    public void initialize(ResourceCalculatorPlugin monitor, 
+                           long totalHeapUsageInMB) {
+      long maxPhysicalMemoryInMB = monitor.getPhysicalMemorySize() / ONE_MB ;
+      if(maxPhysicalMemoryInMB < totalHeapUsageInMB) {
+        throw new RuntimeException("Total heap the can be used is " 
+            + maxPhysicalMemoryInMB 
+            + " bytes while the emulator is configured to emulate a total of " 
+            + totalHeapUsageInMB + " bytes");
+      }
+    }
+    
+    /**
+     * Clear references to all the GridMix-allocated special objects so that 
+     * heap usage is reduced.
+     */
+    @Override
+    public void reset() {
+      heapSpace.clear();
+    }
+  }
+  
+  public TotalHeapUsageEmulatorPlugin() {
+    this(new DefaultHeapUsageEmulator());
+  }
+  
+  /**
+   * For testing.
+   */
+  public TotalHeapUsageEmulatorPlugin(HeapUsageEmulatorCore core) {
+    emulatorCore = core;
+  }
+  
+  protected long getTotalHeapUsageInMB() {
+    return Runtime.getRuntime().totalMemory() / ONE_MB;
+  }
+  
+  protected long getMaxHeapUsageInMB() {
+    return Runtime.getRuntime().maxMemory() / ONE_MB;
+  }
+  
+  @Override
+  public void emulate() throws IOException, InterruptedException {
+    if (enabled) {
+      float currentProgress = progress.getProgress();
+      if (prevEmulationProgress < currentProgress 
+          && ((currentProgress - prevEmulationProgress) >= emulationInterval
+              || currentProgress == 1)) {
+
+        long maxHeapSizeInMB = getMaxHeapUsageInMB();
+        long committedHeapSizeInMB = getTotalHeapUsageInMB();
+        
+        // Increase committed heap usage, if needed
+        // Using a linear weighing function for computing the expected usage
+        long expectedHeapUsageInMB = 
+          Math.min(maxHeapSizeInMB,
+                   (long) (targetHeapUsageInMB * currentProgress));
+        if (expectedHeapUsageInMB < maxHeapSizeInMB
+            && committedHeapSizeInMB < expectedHeapUsageInMB) {
+          long bufferInMB = (long)(minFreeHeapRatio * expectedHeapUsageInMB);
+          long currentDifferenceInMB = 
+            expectedHeapUsageInMB - committedHeapSizeInMB;
+          long currentIncrementLoadSizeInMB = 
+                (long)(currentDifferenceInMB * heapLoadRatio);
+          // Make sure that at least 1 MB is incremented.
+          currentIncrementLoadSizeInMB = 
+            Math.max(1, currentIncrementLoadSizeInMB);
+          while (committedHeapSizeInMB + bufferInMB < expectedHeapUsageInMB) {
+            // add blocks in order of X% of the difference, X = 10% by default
+            emulatorCore.load(currentIncrementLoadSizeInMB);
+            committedHeapSizeInMB = getTotalHeapUsageInMB();
+          }
+        }
+        
+        // store the emulation progress boundary
+        prevEmulationProgress = currentProgress;
+      }
+      
+      // reset the core so that the garbage is reclaimed
+      emulatorCore.reset();
+    }
+  }
+
+  @Override
+  public void initialize(Configuration conf, ResourceUsageMetrics metrics,
+                         ResourceCalculatorPlugin monitor,
+                         Progressive progress) {
+    // get the target heap usage
+    targetHeapUsageInMB = metrics.getHeapUsage() / ONE_MB;
+    if (targetHeapUsageInMB <= 0 ) {
+      enabled = false;
+      return;
+    } else {
+      // calibrate the core heap-usage utility
+      emulatorCore.initialize(monitor, targetHeapUsageInMB);
+      enabled = true;
+    }
+    
+    this.progress = progress;
+    emulationInterval = 
+      conf.getFloat(HEAP_EMULATION_PROGRESS_INTERVAL, 
+                    DEFAULT_EMULATION_PROGRESS_INTERVAL);
+    
+    minFreeHeapRatio = conf.getFloat(MIN_HEAP_FREE_RATIO, 
+                                     DEFAULT_MIN_FREE_HEAP_RATIO);
+    
+    heapLoadRatio = conf.getFloat(HEAP_LOAD_RATIO, DEFAULT_HEAP_LOAD_RATIO);
+    
+    prevEmulationProgress = 0;
+  }
+}
\ No newline at end of file

Added: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/data/wordcount.json.gz
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/data/wordcount.json.gz?rev=1185694&view=auto
==============================================================================
Binary file - no diff available.

Propchange: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/data/wordcount.json.gz
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Modified: hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java?rev=1185694&r1=1185693&r2=1185694&view=diff
==============================================================================
--- hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java (original)
+++ hadoop/common/branches/branch-0.20-security/src/contrib/gridmix/src/test/org/apache/hadoop/mapred/gridmix/DebugJobProducer.java Tue Oct 18 14:45:48 2011
@@ -146,7 +146,7 @@ public class DebugJobProducer implements
       final long seed = r.nextLong();
       r.setSeed(seed);
       id = seq.getAndIncrement();
-      name = String.format("MOCKJOB%05d", id);
+      name = String.format("MOCKJOB%06d", id);
       this.conf = conf;
       LOG.info(name + " (" + seed + ")");
       submitTime = timestamp.addAndGet(
@@ -209,9 +209,14 @@ public class DebugJobProducer implements
 
    @Override
    public String getUser() {
-     String s = String.format("foobar%d", id);
-     GridmixTestUtils.createHomeAndStagingDirectory(s,(JobConf)conf);
-     return s;
+     // Obtain user name from job configuration, if available.
+     // Otherwise use dummy user names.
+     String user = conf.get("user.name");
+     if (user == null) {
+       user = String.format("foobar%d", id);
+     }
+     GridmixTestUtils.createHomeAndStagingDirectory(user, (JobConf)conf);
+     return user;
    }
 
    @Override
@@ -285,7 +290,7 @@ public class DebugJobProducer implements
 
     @Override
     public org.apache.hadoop.mapred.JobConf getJobConf() {
-      throw new UnsupportedOperationException();
+      return new JobConf(conf);
     }
 
     @Override



Mime
View raw message