hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r588035 [1/3] - in /lucene/hadoop/branches/branch-0.15: ./ src/contrib/streaming/src/java/org/apache/hadoop/streaming/ src/java/org/apache/hadoop/conf/ src/java/org/apache/hadoop/filecache/ src/java/org/apache/hadoop/io/ src/java/org/apache...
Date Wed, 24 Oct 2007 21:24:53 GMT
Author: cutting
Date: Wed Oct 24 14:24:51 2007
New Revision: 588035

URL: http://svn.apache.org/viewvc?rev=588035&view=rev
Log:
Merge -r 588032:588033 from trunk to 0.15 branch.  Fixes: HADOOP-2046.

Added:
    lucene/hadoop/branches/branch-0.15/src/contrib/streaming/src/java/org/apache/hadoop/streaming/package.html
      - copied unchanged from r588033, lucene/hadoop/trunk/src/contrib/streaming/src/java/org/apache/hadoop/streaming/package.html
Modified:
    lucene/hadoop/branches/branch-0.15/CHANGES.txt
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/conf/Configuration.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/filecache/DistributedCache.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/SequenceFile.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/Writable.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/WritableComparable.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/ClusterStatus.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Counters.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/FileInputFormat.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputFormat.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputSplit.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobClient.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobConf.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobEndNotifier.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/MapReduceBase.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/MapRunnable.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Mapper.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/OutputCollector.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/OutputFormat.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Partitioner.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/RecordReader.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/RecordWriter.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Reducer.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Reporter.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/RunningJob.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/package.html
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/util/GenericOptionsParser.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/util/Progressable.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/util/Tool.java
    lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/util/ToolRunner.java
    lucene/hadoop/branches/branch-0.15/src/test/org/apache/hadoop/mapred/NotificationTestCase.java

Modified: lucene/hadoop/branches/branch-0.15/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/CHANGES.txt?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/CHANGES.txt (original)
+++ lucene/hadoop/branches/branch-0.15/CHANGES.txt Wed Oct 24 14:24:51 2007
@@ -448,7 +448,10 @@
     edits log. Reduce the number of syncs by double-buffering the changes
     to the transaction log. (Dhruba Borthakur)
 
-Release 0.14.3 - Unreleased
+    HADOOP-2046.  Improve mapred javadoc.  (Arun C. Murthy via cutting)
+
+
+Release 0.14.3 - 2007-10-19
 
   BUG FIXES
 

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/conf/Configuration.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/conf/Configuration.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/conf/Configuration.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/conf/Configuration.java Wed Oct 24 14:24:51 2007
@@ -38,43 +38,70 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
-/** Provides access to configuration parameters.  Configurations are specified
- * by resources.  A resource contains a set of name/value pairs.
+/** 
+ * Provides access to configuration parameters.
  *
- * <p>Each resource is named by either a String or by a Path.  If named by a
- * String, then the classpath is examined for a file with that name.  If a
- * File, then the local filesystem is examined directly, without referring to
- * the CLASSPATH.
+ * <h4 id="Resources">Resources</h4>
  *
- * <p>Configuration parameters may be declared 'final'.  Once a resource
- * declares a value final, no subsequently-loaded resource may alter that
- * value.  For example, one might define a final parameter with:
- * <pre>
+ * <p>Configurations are specified by resources. A resource contains a set of
+ * name/value pairs as XML data. Each resource is named by either a 
+ * <code>String</code> or by a {@link Path}. If named by a <code>String</code>, 
+ * then the classpath is examined for a file with that name.  If named by a 
+ * <code>Path</code>, then the local filesystem is examined directly, without 
+ * referring to the classpath.
+ *
+ * <p>Hadoop by default specifies two resources, loaded in-order from the
+ * classpath: <ol>
+ * <li><tt><a href="{@docRoot}/../hadoop-default.html">hadoop-default.xml</a>
+ * </tt>: Read-only defaults for hadoop.</li>
+ * <li><tt>hadoop-site.xml</tt>: Site-specific configuration for a given hadoop
+ * installation.</li>
+ * </ol>
+ * Applications may add additional resources, which are loaded
+ * subsequent to these resources in the order they are added.
+ * 
+ * <h4 id="FinalParams">Final Parameters</h4>
+ *
+ * <p>Configuration parameters may be declared <i>final</i>. 
+ * Once a resource declares a value final, no subsequently-loaded 
+ * resource can alter that value.  
+ * For example, one might define a final parameter with:
+ * <tt><pre>
  *  &lt;property&gt;
  *    &lt;name&gt;dfs.client.buffer.dir&lt;/name&gt;
  *    &lt;value&gt;/tmp/hadoop/dfs/client&lt;/value&gt;
- *    &lt;final&gt;true&lt;/final&gt;
+ *    <b>&lt;final&gt;true&lt;/final&gt;</b>
+ *  &lt;/property&gt;</pre></tt>
+ *
+ * Administrators typically define parameters as final in 
+ * <tt>hadoop-site.xml</tt> for values that user applications may not alter.
+ *
+ * <h4 id="VariableExpansion">Variable Expansion</h4>
+ *
+ * <p>Value strings are first processed for <i>variable expansion</i>. The
+ * available properties are:<ol>
+ * <li>Other properties defined in this Configuration; and, if a name is
+ * undefined here,</li>
+ * <li>Properties in {@link System#getProperties()}.</li>
+ * </ol>
+ *
+ * <p>For example, if a configuration resource contains the following property
+ * definitions: 
+ * <tt><pre>
+ *  &lt;property&gt;
+ *    &lt;name&gt;basedir&lt;/name&gt;
+ *    &lt;value&gt;/user/${<i>user.name</i>}&lt;/value&gt;
  *  &lt;/property&gt;
- * </pre>
+ *  
+ *  &lt;property&gt;
+ *    &lt;name&gt;tempdir&lt;/name&gt;
+ *    &lt;value&gt;${<i>basedir</i>}/tmp&lt;/value&gt;
+ *  &lt;/property&gt;</pre></tt>
  *
- * <p>Hadoop by default specifies two resource strings: "hadoop-default.xml"
- * and "hadoop-site.xml".  Other tools built on Hadoop may specify additional
- * resources.
- * 
- * <p>The values returned by most <tt>get*</tt> methods are based on String representations. 
- * This String is processed for <b>variable expansion</b>. The available variables are the 
- * <em>System properties</em> and the <em>other properties</em> defined in this Configuration.
- * <p>The only <tt>get*</tt> method that is not processed for variable expansion is
- * {@link #getObject(String)} (as it cannot assume that the returned values are String). 
- * You can use <tt>getObject</tt> to obtain the raw value of a String property without 
- * variable expansion: if <tt>(String)conf.getObject("my.jdk")</tt> is <tt>"JDK ${java.version}"</tt>
- * then conf.get("my.jdk")</tt> is <tt>"JDK 1.5.0"</tt> 
- * <p> Example XML config using variables:<br><tt>
- * &lt;name>basedir&lt;/name>&lt;value>/user/${user.name}&lt;/value><br> 
- * &lt;name>tempdir&lt;/name>&lt;value>${basedir}/tmp&lt;/value><br>
- * </tt>When conf.get("tempdir") is called:<br>
- * <tt>${basedir}</tt> is resolved to another property in this Configuration.
- * Then <tt>${user.name}</tt> is resolved to a System property.
+ * When <tt>conf.get("tempdir")</tt> is called, then <tt>${<i>basedir</i>}</tt>
+ * will be resolved to another property in this Configuration, while
+ * <tt>${<i>user.name</i>}</tt> would then ordinarily be resolved to the value
+ * of the System property with that name.
  */
 public class Configuration implements Iterable<Map.Entry<String,String>> {
   private static final Log LOG =
@@ -120,7 +147,11 @@
     finalResources.add("hadoop-site.xml");
   }
 
-  /** A new configuration with the same settings cloned from another. */
+  /** 
+   * A new configuration with the same settings cloned from another.
+   * 
+   * @param other the configuration from which to clone settings.
+   */
   @SuppressWarnings("unchecked")
   public Configuration(Configuration other) {
     if (LOG.isDebugEnabled()) {
@@ -186,8 +217,13 @@
   }
 
   /**
-   * Add a configuration resource.
-   * @param name resource to be added
+   * Add a configuration resource. 
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final>final</a>. 
+   * 
+   * @param name resource to be added, the classpath is examined for a file 
+   *             with that name.
    */
   public void addResource(String name) {
     addResource(resources, name);
@@ -195,7 +231,13 @@
 
   /**
    * Add a configuration resource. 
-   * @param url url of the resource to be added
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final>final</a>. 
+   * 
+   * @param url url of the resource to be added, the local filesystem is 
+   *            examined directly to find the resource, without referring to 
+   *            the classpath.
    */
   public void addResource(URL url) {
     addResource(resources, url);
@@ -203,7 +245,13 @@
 
   /**
    * Add a configuration resource. 
-   * @param file file-path of resource to be added
+   * 
+   * The properties of this resource will override properties of previously 
+   * added resources, unless they were marked <a href="#Final>final</a>. 
+   * 
+   * @param file file-path of resource to be added, the local filesystem is
+   *             examined directly to find the resource, without referring to 
+   *             the classpath.
    */
   public void addResource(Path file) {
     addResource(resources, file);
@@ -271,17 +319,28 @@
                                     + MAX_SUBST + " " + expr);
   }
   
-  /** Returns the value of the <code>name</code> property, or null if no
-   * such property exists. */
+  /**
+   * Get the value of the <code>name</code> property, <code>null</code> if
+   * no such property exists.
+   * 
+   * Values are processed for <a href="#VariableExpansion">variable expansion</a> 
+   * before being returned. 
+   * 
+   * @param name the property name.
+   * @return the value of the <code>name</code> property, 
+   *         or null if no such property exists.
+   */
   public String get(String name) {
     return substituteVars(getProps().getProperty(name));
   }
 
   /**
-   * Get the value of the <code>name</code> property, without doing variable
-   * expansion.
-   * @param name the property name
-   * @return the result or null if no such property exists
+   * Get the value of the <code>name</code> property, without doing
+   * <a href="#VariableExpansion">variable expansion</a>.
+   * 
+   * @param name the property name.
+   * @return the value of the <code>name</code> property, 
+   *         or null if no such property exists.
    */
   public String getRaw(String name) {
     return getProps().getProperty(name);
@@ -295,7 +354,11 @@
     getProps().setProperty(name, value.toString());
   }
   
-  /** Sets the value of the <code>name</code> property. 
+  /** 
+   * Set the <code>value</code> of the <code>name</code> property.
+   * 
+   * @param name property name.
+   * @param value property value.
    */
   public void set(String name, String value) {
     getOverlay().setProperty(name, value);
@@ -309,16 +372,29 @@
     return overlay;
   }
 
-  /** Returns the value of the <code>name</code> property.  If no such property
+  /** 
+   * Get the value of the <code>name</code> property. If no such property 
    * exists, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value, or <code>defaultValue</code> if the property 
+   *         doesn't exist.                    
    */
   public String get(String name, String defaultValue) {
     return substituteVars(getProps().getProperty(name, defaultValue));
   }
     
-  /** Returns the value of the <code>name</code> property as an integer.  If no
-   * such property is specified, or if the specified value is not a valid
-   * integer, then <code>defaultValue</code> is returned.
+  /** 
+   * Get the value of the <code>name</code> property as an <code>int</code>.
+   *   
+   * If no such property exists, or if the specified value is not a valid
+   * <code>int</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as an <code>int</code>, 
+   *         or <code>defaultValue</code>. 
    */
   public int getInt(String name, int defaultValue) {
     String valueString = get(name);
@@ -331,15 +407,26 @@
     }
   }
 
-  /** Sets the value of the <code>name</code> property to an integer. */
+  /** 
+   * Set the value of the <code>name</code> property to an <code>int</code>.
+   * 
+   * @param name property name.
+   * @param value <code>int</code> value of the property.
+   */
   public void setInt(String name, int value) {
     set(name, Integer.toString(value));
   }
 
 
-  /** Returns the value of the <code>name</code> property as a long.  If no
-   * such property is specified, or if the specified value is not a valid
-   * long, then <code>defaultValue</code> is returned.
+  /** 
+   * Get the value of the <code>name</code> property as a <code>long</code>.  
+   * If no such property is specified, or if the specified value is not a valid
+   * <code>long</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>long</code>, 
+   *         or <code>defaultValue</code>. 
    */
   public long getLong(String name, long defaultValue) {
     String valueString = get(name);
@@ -352,14 +439,25 @@
     }
   }
 
-  /** Sets the value of the <code>name</code> property to a long. */
+  /** 
+   * Set the value of the <code>name</code> property to a <code>long</code>.
+   * 
+   * @param name property name.
+   * @param value <code>long</code> value of the property.
+   */
   public void setLong(String name, long value) {
     set(name, Long.toString(value));
   }
 
-  /** Returns the value of the <code>name</code> property as a float.  If no
-   * such property is specified, or if the specified value is not a valid
-   * float, then <code>defaultValue</code> is returned.
+  /** 
+   * Get the value of the <code>name</code> property as a <code>float</code>.  
+   * If no such property is specified, or if the specified value is not a valid
+   * <code>float</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>float</code>, 
+   *         or <code>defaultValue</code>. 
    */
   public float getFloat(String name, float defaultValue) {
     String valueString = get(name);
@@ -372,10 +470,15 @@
     }
   }
 
-  /** Returns the value of the <code>name</code> property as an boolean.  If no
-   * such property is specified, or if the specified value is not a valid
-   * boolean, then <code>defaultValue</code> is returned.  Valid boolean values
-   * are "true" and "false".
+  /** 
+   * Get the value of the <code>name</code> property as a <code>boolean</code>.  
+   * If no such property is specified, or if the specified value is not a valid
+   * <code>boolean</code>, then <code>defaultValue</code> is returned.
+   * 
+   * @param name property name.
+   * @param defaultValue default value.
+   * @return property value as a <code>boolean</code>, 
+   *         or <code>defaultValue</code>. 
    */
   public boolean getBoolean(String name, boolean defaultValue) {
     String valueString = get(name);
@@ -386,14 +489,24 @@
     else return defaultValue;
   }
 
-  /** Sets the value of the <code>name</code> property to an integer. */
+  /** 
+   * Set the value of the <code>name</code> property to a <code>boolean</code>.
+   * 
+   * @param name property name.
+   * @param value <code>boolean</code> value of the property.
+   */
   public void setBoolean(String name, boolean value) {
     set(name, Boolean.toString(value));
   }
 
-  /** Returns the value of the <code>name</code> property as an array of
-   * strings.  If no such property is specified, then <code>null</code>
-   * is returned.  Values are comma delimited.
+  /** 
+   * Get the comma delimited values of the <code>name</code> property as 
+   * an array of <code>String</code>s.  
+   * If no such property is specified then <code>null</code> is returned.
+   * 
+   * @param name property name.
+   * @return property value as an array of <code>String</code>s, 
+   *         or <code>null</code>. 
    */
   public String[] getStrings(String name) {
     String valueString = get(name);
@@ -402,16 +515,24 @@
 
   /**
    * Load a class by name.
-   * @param name the class name
-   * @return the class object
-   * @throws ClassNotFoundException if the class is not found
+   * 
+   * @param name the class name.
+   * @return the class object.
+   * @throws ClassNotFoundException if the class is not found.
    */
   public Class<?> getClassByName(String name) throws ClassNotFoundException {
     return Class.forName(name, true, classLoader);
   }
-  
-  /** Returns the value of the <code>name</code> property as a Class.  If no
-   * such property is specified, then <code>defaultValue</code> is returned.
+
+  /** 
+   * Get the value of the <code>name</code> property as a <code>Class</code>.  
+   * If no such property is specified, then <code>defaultValue</code> is 
+   * returned.
+   * 
+   * @param name the class name.
+   * @param defaultValue default value.
+   * @return property value as a <code>Class</code>, 
+   *         or <code>defaultValue</code>. 
    */
   public Class<?> getClass(String name, Class<?> defaultValue) {
     String valueString = get(name);
@@ -424,15 +545,27 @@
     }
   }
 
-  /** Returns the value of the <code>name</code> property as a Class.  If no
-   * such property is specified, then <code>defaultValue</code> is returned.
-   * An error is thrown if the returned class does not implement the named
+  /** 
+   * Get the value of the <code>name</code> property as a <code>Class</code>
+   * implementing the interface specified by <code>xface</code>.
+   *   
+   * If no such property is specified, then <code>defaultValue</code> is 
+   * returned.
+   * 
+   * An exception is thrown if the returned class does not implement the named
    * interface. 
-   */
-  public <U> Class<? extends U> getClass(String propertyName, Class<? extends U> defaultValue,
-      Class<U> xface) {
+   * 
+   * @param name the class name.
+   * @param defaultValue default value.
+   * @param xface the interface implemented by the named class.
+   * @return property value as a <code>Class</code>, 
+   *         or <code>defaultValue</code>.
+   */
+  public <U> Class<? extends U> getClass(String name, 
+                                         Class<? extends U> defaultValue, 
+                                         Class<U> xface) {
     try {
-      Class<?> theClass = getClass(propertyName, defaultValue);
+      Class<?> theClass = getClass(name, defaultValue);
       if (theClass != null && !xface.isAssignableFrom(theClass))
         throw new RuntimeException(theClass+" not "+xface.getName());
       else if (theClass != null)
@@ -444,21 +577,32 @@
     }
   }
 
-  /** Sets the value of the <code>name</code> property to the name of a class.
-   * First checks that the class implements the named interface. 
+  /** 
+   * Set the value of the <code>name</code> property to the name of a 
+   * <code>theClass</code> implementing the given interface <code>xface</code>.
+   * 
+   * An exception is thrown if <code>theClass</code> does not implement the 
+   * interface <code>xface</code>. 
+   * 
+   * @param name property name.
+   * @param theClass property value.
+   * @param xface the interface implemented by the named class.
    */
-  public void setClass(String propertyName, Class<?> theClass,
-                       Class<?> xface) {
-    
+  public void setClass(String name, Class<?> theClass, Class<?> xface) {
     if (!xface.isAssignableFrom(theClass))
       throw new RuntimeException(theClass+" not "+xface.getName());
-    set(propertyName, theClass.getName());
+    set(name, theClass.getName());
   }
 
-  /** Returns a local file under a directory named in <i>dirsProp</i> with
+  /** 
+   * Get a local file under a directory named by <i>dirsProp</i> with
    * the given <i>path</i>.  If <i>dirsProp</i> contains multiple directories,
    * then one is chosen based on <i>path</i>'s hash code.  If the selected
    * directory does not exist, an attempt is made to create it.
+   * 
+   * @param dirsProp directory in which to locate the file.
+   * @param path file-path.
+   * @return local file under the directory with the given path.
    */
   public Path getLocalPath(String dirsProp, String path)
     throws IOException {
@@ -482,10 +626,15 @@
     throw new IOException("No valid local directories in property: "+dirsProp);
   }
 
-  /** Returns a local file name under a directory named in <i>dirsProp</i> with
+  /** 
+   * Get a local file name under a directory named in <i>dirsProp</i> with
    * the given <i>path</i>.  If <i>dirsProp</i> contains multiple directories,
    * then one is chosen based on <i>path</i>'s hash code.  If the selected
    * directory does not exist, an attempt is made to create it.
+   * 
+   * @param dirsProp directory in which to locate the file.
+   * @param path file-path.
+   * @return local file under the directory with the given path.
    */
   public File getFile(String dirsProp, String path)
     throws IOException {
@@ -502,14 +651,22 @@
     throw new IOException("No valid local directories in property: "+dirsProp);
   }
 
-
-
-  /** Returns the URL for the named resource. */
+  /** 
+   * Get the {@link URL} for the named resource.
+   * 
+   * @param name resource name.
+   * @return the url for the named resource.
+   */
   public URL getResource(String name) {
     return classLoader.getResource(name);
   }
-  /** Returns an input stream attached to the configuration resource with the
+  
+  /** 
+   * Get an input stream attached to the configuration resource with the
    * given <code>name</code>.
+   * 
+   * @param name configuration resource name.
+   * @return an input stream attached to the resource.
    */
   public InputStream getConfResourceAsInputStream(String name) {
     try {
@@ -528,8 +685,12 @@
     }
   }
 
-  /** Returns a reader attached to the configuration resource with the
+  /** 
+   * Get a {@link Reader} attached to the configuration resource with the
    * given <code>name</code>.
+   * 
+   * @param name configuration resource name.
+   * @return a reader attached to the resource.
    */
   public Reader getConfResourceAsReader(String name) {
     try {
@@ -561,15 +722,17 @@
   }
 
   /** @return Iterator&lt; Map.Entry&lt;String,String> >  
-   * @deprecated use <code>iterator()</code> instead 
+   * @deprecated Use {@link #iterator()} instead. 
    */
   public Iterator entries() {
     return iterator();
   }
 
   /**
-   * Go through the list of String key-value pairs in the configuration.
-   * @return an iterator over the entries
+   * Get an {@link Iterator} to go through the list of <code>String</code> 
+   * key-value pairs in the configuration.
+   * 
+   * @return an iterator over the entries.
    */
   public Iterator<Map.Entry<String, String>> iterator() {
     // Get a copy of just the string to string pairs. After the old object
@@ -689,7 +852,12 @@
     
   }
 
-  /** Writes non-default properties in this configuration.*/
+  /** 
+   * Write out the non-default properties in this configuration to the give
+   * {@link OutputStream}.
+   * 
+   * @param out the output stream to write to.
+   */
   public void write(OutputStream out) throws IOException {
     Properties properties = getProps();
     try {
@@ -732,8 +900,9 @@
   }
 
   /**
-   * Get the class loader for this job.
-   * @return the correct class loader
+   * Get the {@link ClassLoader} for this job.
+   * 
+   * @return the correct class loader.
    */
   public ClassLoader getClassLoader() {
     return classLoader;
@@ -741,7 +910,8 @@
   
   /**
    * Set the class loader that will be used to load the various objects.
-   * @param classLoader the new class loader
+   * 
+   * @param classLoader the new class loader.
    */
   public void setClassLoader(ClassLoader classLoader) {
     this.classLoader = classLoader;
@@ -767,11 +937,16 @@
     }
   }
 
-  /** Make this class quiet. Error and informational
-   *  messages might not be logged.
+  /** 
+   * Set the quiteness-mode. 
+   * 
+   * In the the quite-mode error and informational messages might not be logged.
+   * 
+   * @param quietmode <code>true</code> to set quiet-mode on, <code>false</code>
+   *              to turn it off.
    */
-  public synchronized void setQuietMode(boolean value) {
-    quietmode = value;
+  public synchronized void setQuietMode(boolean quietmode) {
+    this.quietmode = quietmode;
   }
 
   /** For debugging.  List non-default properties to the terminal and exit. */

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/filecache/DistributedCache.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/filecache/DistributedCache.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/filecache/DistributedCache.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/filecache/DistributedCache.java Wed Oct 24 14:24:51 2007
@@ -24,14 +24,89 @@
 import org.apache.hadoop.conf.*;
 import org.apache.hadoop.util.*;
 import org.apache.hadoop.fs.*;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.Reducer;
 
 import java.net.URI;
 
-/*******************************************************************************
- * The DistributedCache maintains all the caching information of cached archives
- * and unarchives all the files as well and returns the path
+/**
+ * Distribute application-specific large, read-only files efficiently.
  * 
- ******************************************************************************/
+ * <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
+ * framework to cache files (text, archives, jars etc.) needed by applications.
+ * </p>
+ * 
+ * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached 
+ * via the {@link JobConf}. The <code>DistributedCache</code> assumes that the
+ * files specified via hdfs:// urls are already present on the 
+ * {@link FileSystem} at the path specified by the url.</p>
+ * 
+ * <p>The framework will copy the necessary files on to the slave node before 
+ * any tasks for the job are executed on that node. Its efficiency stems from 
+ * the fact that the files are only copied once per job and the ability to 
+ * cache archives which are un-archived on the slaves.</p> 
+ *
+ * <p><code>DistributedCache</code> can be used to distribute simple, read-only
+ * data/text files and/or more complex types such as archives, jars etc. 
+ * Archives (zip files) are un-archived at the slave nodes. Jars maybe be 
+ * optionally added to the classpath of the tasks, a rudimentary software
+ * distribution mechanism. Optionally users can also direct it to symlink the 
+ * distributed cache file(s) into the working directory of the task.</p>
+ * 
+ * <p><code>DistributedCache</code> tracks modification timestamps of the cache 
+ * files. Clearly the cache files should not be modified by the application 
+ * or externally while the job is executing.</p>
+ * 
+ * <p>Here is an illustrative example on how to use the 
+ * <code>DistributedCache</code>:</p>
+ * <p><blockquote><pre>
+ *     // Setting up the cache for the application
+ *     
+ *     1. Copy the requisite files to the <code>FileSystem</code>:
+ *     
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
+ *     
+ *     2. Setup the application's <code>JobConf</code>:
+ *     
+ *     JobConf job = new JobConf();
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *                                   job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
+ *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
+ *
+ *     3. Use the cached files in the {@link Mapper} or {@link Reducer}:
+ *     
+ *     public static class MapClass extends MapReduceBase  
+ *     implements Mapper&lt;K, V, K, V&gt; {
+ *     
+ *       private Path[] localArchives;
+ *       private Path[] localFiles;
+ *       
+ *       public void configure(JobConf job) {
+ *         // Get the cached archives/files
+ *         localArchives = DistributedCache.getLocalCacheArchives(job);
+ *         localFiles = DistributedCache.getLocalCacheFiles(job);
+ *       }
+ *       
+ *       public void map(K key, V value, 
+ *                       OutputCollector&lt;K, V&gt; output, Reporter reporter) 
+ *       throws IOException {
+ *         // Use data from the cached archives/files here
+ *         // ...
+ *         // ...
+ *         output.collect(k, v);
+ *       }
+ *     }
+ *     
+ * </pre></blockquote></p>
+ * 
+ * @see JobConf
+ * @see JobClient
+ */
 public class DistributedCache {
   // cacheID to cacheStatus mapping
   private static TreeMap<String, CacheStatus> cachedArchives = new TreeMap<String, CacheStatus>();
@@ -47,7 +122,7 @@
    * previously cached (and valid) or copy it from the {@link FileSystem} now.
    * 
    * @param cache the cache to be localized, this should be specified as 
-   * new URI(hdfs://hostname:port/absoulte_path_to_file#LINKNAME). If no schema 
+   * new URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema 
    * or hostname:port is provided the file is assumed to be in the filesystem
    * being used in the Configuration
    * @param conf The Confguration file which contains the filesystem

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/SequenceFile.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/SequenceFile.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/SequenceFile.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/SequenceFile.java Wed Oct 24 14:24:51 2007
@@ -40,7 +40,145 @@
 import org.apache.hadoop.util.MergeSort;
 import org.apache.hadoop.util.PriorityQueue;
 
-/** Support for flat files of binary key/value pairs. */
+/** 
+ * <code>SequenceFile</code>s are flat files consisting of binary key/value 
+ * pairs.
+ * 
+ * <p><code>SequenceFile</code> provides {@link Writer}, {@link Reader} and
+ * {@link Sorter} classes for writing, reading and sorting respectively.</p>
+ * 
+ * There are three <code>SequenceFile</code> <code>Writer</code>s based on the 
+ * {@link CompressionType} used to compress key/value pairs:
+ * <ol>
+ *   <li>
+ *   <code>Writer</code> : Uncompressed records.
+ *   </li>
+ *   <li>
+ *   <code>RecordCompressWriter</code> : Record-compressed files, only compress 
+ *                                       values.
+ *   </li>
+ *   <li>
+ *   <code>BlockCompressWriter</code> : Block-compressed files, both keys & 
+ *                                      values are collected in 'blocks' 
+ *                                      separately and compressed. The size of 
+ *                                      the 'block' is configurable.
+ * </ol>
+ * 
+ * <p>The actual compression algorithm used to compress key and/or values can be
+ * specified by using the appropriate {@link CompressionCodec}.</p>
+ * 
+ * <p>The recommended way is to use the static <tt>createWriter</tt> methods
+ * provided by the <code>SequenceFile</code> to chose the preferred format.</p>
+ *
+ * <p>The {@link Reader} acts as the bridge and can read any of the above 
+ * <code>SequenceFile</code> formats.</p>
+ *
+ * <h4 id="Formats">SequenceFile Formats</h4>
+ * 
+ * <p>Essentially there are 3 different formats for <code>SequenceFile</code>s
+ * depending on the <code>CompressionType</code> specified. All of them share a
+ * <a href="#Header">common header</a> described below.
+ * 
+ * <h5 id="Header">SequenceFile Header</h5>
+ * <ul>
+ *   <li>
+ *   version - 3 bytes of magic header <b>SEQ</b>, followed by 1 byte of actual 
+ *             version number (e.g. SEQ4 or SEQ6)
+ *   </li>
+ *   <li>
+ *   keyClassName -key class
+ *   </li>
+ *   <li>
+ *   valueClassName - value class
+ *   </li>
+ *   <li>
+ *   compression - A boolean which specifies if compression is turned on for 
+ *                 keys/values in this file.
+ *   </li>
+ *   <li>
+ *   blockCompression - A boolean which specifies if block-compression is 
+ *                      turned on for keys/values in this file.
+ *   </li>
+ *   <li>
+ *   compression codec - <code>CompressionCodec</code> class which is used for  
+ *                       compression of keys and/or values (if compression is 
+ *                       enabled).
+ *   </li>
+ *   <li>
+ *   metadata - {@link Metadata} for this file.
+ *   </li>
+ *   <li>
+ *   sync - A sync marker to denote end of the header.
+ *   </li>
+ * </ul>
+ * 
+ * <h5 id="#UncompressedFormat">Uncompressed SequenceFile Format</h5>
+ * <ul>
+ * <li>
+ * <a href="#Header">Header</a>
+ * </li>
+ * <li>
+ * Record
+ *   <ul>
+ *     <li>Record length</li>
+ *     <li>Key length</li>
+ *     <li>Key</li>
+ *     <li>Value</li>
+ *   </ul>
+ * </li>
+ * <li>
+ * A sync-marker every few <code>100</code> bytes or so.
+ * </li>
+ * </ul>
+ *
+ * <h5 id="#RecordCompressedFormat">Record-Compressed SequenceFile Format</h5>
+ * <ul>
+ * <li>
+ * <a href="#Header">Header</a>
+ * </li>
+ * <li>
+ * Record
+ *   <ul>
+ *     <li>Record length</li>
+ *     <li>Key length</li>
+ *     <li>Key</li>
+ *     <li><i>Compressed</i> Value</li>
+ *   </ul>
+ * </li>
+ * <li>
+ * A sync-marker every few <code>100</code> bytes or so.
+ * </li>
+ * </ul>
+ * 
+ * <h5 id="#BlockCompressedFormat">Block-Compressed SequenceFile Format</h5>
+ * <ul>
+ * <li>
+ * <a href="#Header">Header</a>
+ * </li>
+ * <li>
+ * Record <i>Block</i>
+ *   <ul>
+ *     <li>Compressed key-lengths block-size</li>
+ *     <li>Compressed key-lengths block</li>
+ *     <li>Compressed keys block-size</li>
+ *     <li>Compressed keys block</li>
+ *     <li>Compressed value-lengths block-size</li>
+ *     <li>Compressed value-lengths block</li>
+ *     <li>Compressed values block-size</li>
+ *     <li>Compressed values block</li>
+ *   </ul>
+ * </li>
+ * <li>
+ * A sync-marker every few <code>100</code> bytes or so.
+ * </li>
+ * </ul>
+ * 
+ * <p>The compressed blocks of key lengths and value lengths consist of the 
+ * actual lengths of individual keys/values encoded in ZeroCompressedInteger 
+ * format.</p>
+ * 
+ * @see CompressionCodec
+ */
 public class SequenceFile {
   private static final Log LOG = LogFactory.getLog(SequenceFile.class);
 
@@ -60,7 +198,10 @@
   /** The number of bytes between sync points.*/
   public static final int SYNC_INTERVAL = 100*SYNC_SIZE; 
 
-  /** The type of compression.
+  /** 
+   * The compression type used to compress key/value pairs in the 
+   * {@link SequenceFile}.
+   * 
    * @see SequenceFile.Writer
    */
   public static enum CompressionType {

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/Writable.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/Writable.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/Writable.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/Writable.java Wed Oct 24 14:24:51 2007
@@ -22,20 +22,59 @@
 import java.io.DataInput;
 import java.io.IOException;
 
-/** A simple, efficient, serialization protocol, based on {@link DataInput} and
- * {@link DataOutput}.
+/**
+ * A serializable object which implements a simple, efficient, serialization 
+ * protocol, based on {@link DataInput} and {@link DataOutput}.
  *
+ * <p>Any <code>key</code> or <code>value</code> type in the Hadoop Map-Reduce
+ * framework implements this interface.</p>
+ * 
  * <p>Implementations typically implement a static <code>read(DataInput)</code>
- * method which constructs a new instance, calls {@link
- * #readFields(DataInput)}, and returns the instance.
+ * method which constructs a new instance, calls {@link #readFields(DataInput)} 
+ * and returns the instance.</p>
+ * 
+ * <p>Example:</p>
+ * <p><blockquote><pre>
+ *     public class MyWritable implements Writable {
+ *       // Some data     
+ *       private int counter;
+ *       private long timestamp;
+ *       
+ *       public void write(DataOutput out) throws IOException {
+ *         out.writeInt(counter);
+ *         out.writeLong(timestamp);
+ *       }
+ *       
+ *       public void readFields(DataInput in) throws IOException {
+ *         counter = in.readInt();
+ *         timestamp = in.readLong();
+ *       }
+ *       
+ *       public static MyWritable read(DataInput in) throws IOException {
+ *         MyWritable w = new MyWritable();
+ *         w.readFields(in);
+ *         return w;
+ *       }
+ *     }
+ * </pre></blockquote></p>
  */
 public interface Writable {
-  /** Writes the fields of this object to <code>out</code>. */
+  /** 
+   * Serialize the fields of this object to <code>out</code>.
+   * 
+   * @param out <code>DataOuput</code> to serialize this object into.
+   * @throws IOException
+   */
   void write(DataOutput out) throws IOException;
 
-  /** Reads the fields of this object from <code>in</code>.  For efficiency,
-   * implementations should attempt to re-use storage in the existing object
-   * where possible.
+  /** 
+   * Deserialize the fields of this object from <code>in</code>.  
+   * 
+   * <p>For efficiency, implementations should attempt to re-use storage in the 
+   * existing object where possible.</p>
+   * 
+   * @param in <code>DataInput</code> to deseriablize this object from.
+   * @throws IOException
    */
   void readFields(DataInput in) throws IOException;
 }

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/WritableComparable.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/WritableComparable.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/WritableComparable.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/io/WritableComparable.java Wed Oct 24 14:24:51 2007
@@ -18,7 +18,38 @@
 
 package org.apache.hadoop.io;
 
-/** An interface which extends both {@link Writable} and {@link Comparable}.
+/**
+ * A {@link Writable} which is also {@link Comparable}. 
+ *
+ * <p><code>WritableComparable</code>s can be compared to each other, typically 
+ * via <code>Comparator</code>s. Any type which is to be used as a 
+ * <code>key</code> in the Hadoop Map-Reduce framework should implement this
+ * interface.</p>
+ *  
+ * <p>Example:</p>
+ * <p><blockquote><pre>
+ *     public class MyWritableComparable implements WritableComparable {
+ *       // Some data
+ *       private int counter;
+ *       private long timestamp;
+ *       
+ *       public void write(DataOutput out) throws IOException {
+ *         out.writeInt(counter);
+ *         out.writeLong(timestamp);
+ *       }
+ *       
+ *       public void readFields(DataInput in) throws IOException {
+ *         counter = in.readInt();
+ *         timestamp = in.readLong();
+ *       }
+ *       
+ *       public int compareTo(MyWritableComparable w) {
+ *         int thisValue = this.value;
+ *         int thatValue = ((IntWritable)o).value;
+ *         return (thisValue<thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ *       }
+ *     }
+ * </pre></blockquote></p>
  */
 public interface WritableComparable extends Writable, Comparable {
 }

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/ClusterStatus.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/ClusterStatus.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/ClusterStatus.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/ClusterStatus.java Wed Oct 24 14:24:51 2007
@@ -26,7 +26,28 @@
 import org.apache.hadoop.io.WritableUtils;
 
 /**
- * Summarizes the size and current state of the cluster.
+ * Status information on the current state of the Map-Reduce cluster.
+ * 
+ * <p><code>ClusterStatus</code> provides clients with information such as:
+ * <ol>
+ *   <li>
+ *   Size of the cluster. 
+ *   </li>
+ *   <li>
+ *   Task capacity of the cluster. 
+ *   </li>
+ *   <li>
+ *   The number of currently running map & reduce tasks.
+ *   </li>
+ *   <li>
+ *   State of the <code>JobTracker</code>.
+ *   </li>
+ * </ol></p>
+ * 
+ * <p>Clients can query for the latest <code>ClusterStatus</code>, via 
+ * {@link JobClient#getClusterStatus()}.</p>
+ * 
+ * @see JobClient
  */
 public class ClusterStatus implements Writable {
 
@@ -38,6 +59,15 @@
 
   ClusterStatus() {}
   
+  /**
+   * Construct a new cluster status.
+   * 
+   * @param trackers no. of tasktrackers in the cluster
+   * @param maps no. of currently running map-tasks in the cluster
+   * @param reduces no. of currently running reduce-tasks in the cluster
+   * @param max the maximum no. of tasks in the cluster
+   * @param state the {@link JobTracker.State} of the <code>JobTracker</code>
+   */
   ClusterStatus(int trackers, int maps, int reduces, int max,
                 JobTracker.State state) {
     task_trackers = trackers;
@@ -49,33 +79,47 @@
   
 
   /**
-   * The number of task trackers in the cluster.
+   * Get the number of task trackers in the cluster.
+   * 
+   * @return the number of task trackers in the cluster.
    */
   public int getTaskTrackers() {
     return task_trackers;
   }
   
   /**
-   * The number of currently running map tasks.
+   * Get the number of currently running map tasks in the cluster.
+   * 
+   * @return the number of currently running map tasks in the cluster.
    */
   public int getMapTasks() {
     return map_tasks;
   }
   
   /**
-   * The number of current running reduce tasks.
+   * Get the number of currently running reduce tasks in the cluster.
+   * 
+   * @return the number of currently running reduce tasks in the cluster.
    */
   public int getReduceTasks() {
     return reduce_tasks;
   }
   
   /**
-   * The maximum capacity for running tasks in the cluster.
+   * Get the maximum capacity for running tasks in the cluster.
+   * 
+   * @return the maximum capacity for running tasks in the cluster.
    */
   public int getMaxTasks() {
     return max_tasks;
   }
 
+  /**
+   * Get the current state of the <code>JobTracker</code>, 
+   * as {@link JobTracker.State}
+   * 
+   * @return the current state of the <code>JobTracker</code>.
+   */
   public JobTracker.State getJobTrackerState() {
     return state;
   }

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Counters.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Counters.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Counters.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/Counters.java Wed Oct 24 14:24:51 2007
@@ -36,6 +36,13 @@
 
 /**
  * A set of named counters.
+ * 
+ * <p><code>Counters</code> represent global counters, defined either by the 
+ * Map-Reduce framework or applications. Each <code>Counter</code> can be of
+ * any {@link Enum} type.</p>
+ * 
+ * <p><code>Counters</code> are bunched into {@link Group}s, each comprising of
+ * counters from a particular <code>Enum</code> class. 
  */
 public class Counters implements Writable {
   
@@ -57,10 +64,11 @@
   } // end class CounterRec
   
   /**
-   *  Represents a group of counters, comprising the counters from a particular 
-   *  counter enum class.  
+   *  <code>Group</code> of counters, comprising of counters from a particular 
+   *  counter {@link Enum} class.  
    *
-   *  This class handles localization of the class name and the counter names.
+   *  <p><code>Group</code>handles localization of the class name and the 
+   *  counter names.</p>
    */
   public static class Group {
     

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/FileInputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/FileInputFormat.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/FileInputFormat.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/FileInputFormat.java Wed Oct 24 14:24:51 2007
@@ -32,8 +32,14 @@
 import org.apache.hadoop.io.WritableComparable;
 
 /** 
- * A base class for {@link InputFormat}. 
+ * A base class for file-based {@link InputFormat}.
  * 
+ * <p><code>FileInputFormat</code> is the base class for all file-based 
+ * <code>InputFormat</code>s. This provides generic implementations of
+ * {@link #validateInput(JobConf)} and {@link #getSplits(JobConf, int)}.
+ * Implementations fo <code>FileInputFormat</code> can also override the 
+ * {@link #isSplitable(FileSystem, Path)} method to ensure input-files are
+ * not split-up and are processed as a whole by {@link Mapper}s.
  */
 public abstract class FileInputFormat<K extends WritableComparable,
                                       V extends Writable>
@@ -58,6 +64,11 @@
   /**
    * Is the given filename splitable? Usually, true, but if the file is
    * stream compressed, it will not be.
+   * 
+   * <code>FileInputFormat</code> implementations can override this and return
+   * <code>false</code> to ensure that individual input files are never split-up
+   * so that {@link Mapper}s process entire files.
+   * 
    * @param fs the file system that the file is on
    * @param filename the file name to check
    * @return is this file splitable?

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputFormat.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputFormat.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputFormat.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputFormat.java Wed Oct 24 14:24:51 2007
@@ -21,36 +21,88 @@
 import java.io.IOException;
 
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableComparable;
 
-/** An input data format.  Input files are stored in a {@link FileSystem}.
- * The processing of an input file may be split across multiple machines.
- * Files are processed as sequences of records, implementing {@link
- * RecordReader}.  Files must thus be split on record boundaries. */
+/** 
+ * <code>InputFormat</code> describes the input-specification for a 
+ * Map-Reduce job. 
+ * 
+ * <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
+ * job to:<p>
+ * <ol>
+ *   <li>
+ *   Validate the input-specification of the job. 
+ *   <li>
+ *   Split-up the input file(s) into logical {@link InputSplit}s, each of 
+ *   which is then assigned to an individual {@link Mapper}.
+ *   </li>
+ *   <li>
+ *   Provide the {@link RecordReader} implementation to be used to glean
+ *   input records from the logical <code>InputSplit</code> for processing by 
+ *   the {@link Mapper}.
+ *   </li>
+ * </ol>
+ * 
+ * <p>The default behavior of file-based {@link InputFormat}s, typically 
+ * sub-classes of {@link FileInputFormat}, is to split the 
+ * input into <i>logical</i> {@link InputSplit}s based on the total size, in 
+ * bytes, of the input files. However, the {@link FileSystem} blocksize of  
+ * the input files is treated as an upper bound for input splits. A lower bound 
+ * on the split size can be set via 
+ * <a href="{@docRoot}/../hadoop-default.html#mapred.min.split.size">
+ * mapred.min.split.size</a>.</p>
+ * 
+ * <p>Clearly, logical splits based on input-size is insufficient for many 
+ * applications since record boundaries are to respected. In such cases, the
+ * application has to also implement a {@link RecordReader} on whom lies the
+ * responsibilty to respect record-boundaries and present a record-oriented
+ * view of the logical <code>InputSplit</code> to the individual task.
+ *
+ * @see InputSplit
+ * @see RecordReader
+ * @see JobClient
+ * @see FileInputFormat
+ */
 public interface InputFormat<K extends WritableComparable,
                              V extends Writable> {
 
   /**
-   * Are the input directories valid? This method is used to test the input
-   * directories when a job is submitted so that the framework can fail early
-   * with a useful error message when the input directory does not exist.
-   * @param job the job to check
+   * Check for validity of the input-specification for the job. 
+   * 
+   * <p>This method is used to validate the input directories when a job is 
+   * submitted so that the {@link JobClient} can fail early, with an useful 
+   * error message, in case of errors. For e.g. input directory does not exist.
+   * </p>
+   * 
+   * @param job job configuration.
    * @throws InvalidInputException if the job does not have valid input
    */
   void validateInput(JobConf job) throws IOException;
   
-  /** Splits a set of input files.  One split is created per map task.
+  /** 
+   * Logically split the set of input files for the job.  
+   * 
+   * <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
+   * for processing.</p>
    *
-   * @param job the job whose input files are to be split
-   * @param numSplits the desired number of splits
-   * @return the splits
+   * <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
+   * input files are not physically split into chunks. For e.g. a split could
+   * be <i>&lt;input-file-path, start, offset&gt;</i> tuple.
+   * 
+   * @param job job configuration.
+   * @param numSplits the desired number of splits, a hint.
+   * @return an array of {@link InputSplit}s for the job.
    */
   InputSplit[] getSplits(JobConf job, int numSplits) throws IOException;
 
-  /** Construct a {@link RecordReader} for a {@link FileSplit}.
+  /** 
+   * Get the {@link RecordReader} for the given {@link InputSplit}.
    *
+   * <p>It is the responsibility of the <code>RecordReader</code> to respect
+   * record boundaries while processing the logical split to present a 
+   * record-oriented view to the individual task.</p>
+   * 
    * @param split the {@link InputSplit}
    * @param job the job that this split belongs to
    * @return a {@link RecordReader}

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputSplit.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputSplit.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputSplit.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/InputSplit.java Wed Oct 24 14:24:51 2007
@@ -22,20 +22,31 @@
 import org.apache.hadoop.io.Writable;
 
 /**
- * The description of the data for a single map task.
+ * <code>InputSplit</code> represents the data to be processed by an 
+ * individual {@link Mapper}. 
+ *
+ * <p>Typically, it presents a byte-oriented view on the input and is the 
+ * responsibility of {@link RecordReader} of the job to process this and present
+ * a record-oriented view.
+ * 
+ * @see InputFormat
+ * @see RecordReader
  */
 public interface InputSplit extends Writable {
 
   /**
-   * Get the number of input bytes in the split.
-   * @return the number of bytes in the input split
+   * Get the total number of bytes in the data of the <code>InputSplit</code>.
+   * 
+   * @return the number of bytes in the input split.
    * @throws IOException
    */
   long getLength() throws IOException;
   
   /**
    * Get the list of hostnames where the input split is located.
-   * @return A list of prefered hostnames
+   * 
+   * @return list of hostnames where data of the <code>InputSplit</code> is
+   *         located as an array of <code>String</code>s.
    * @throws IOException
    */
   String[] getLocations() throws IOException;

Modified: lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobClient.java?rev=588035&r1=588034&r2=588035&view=diff
==============================================================================
--- lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobClient.java (original)
+++ lucene/hadoop/branches/branch-0.15/src/java/org/apache/hadoop/mapred/JobClient.java Wed Oct 24 14:24:51 2007
@@ -60,13 +60,89 @@
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
-/*******************************************************
- * JobClient interacts with the JobTracker network interface.
- * This object implements the job-control interface, and
- * should be the primary method by which user programs interact
- * with the networked job system.
+/**
+ * <code>JobClient</code> is the primary interface for the user-job to interact
+ * with the {@link JobTracker}.
+ * 
+ * <code>JobClient</code> provides facilities to submit jobs, track their 
+ * progress, access component-tasks' reports/logs, get the Map-Reduce cluster
+ * status information etc.
+ * 
+ * <p>The job submission process involves:
+ * <ol>
+ *   <li>
+ *   Checking the input and output specifications of the job.
+ *   </li>
+ *   <li>
+ *   Computing the {@link InputSplit}s for the job.
+ *   </li>
+ *   <li>
+ *   Setup the requisite accounting information for the {@link DistributedCache} 
+ *   of the job, if necessary.
+ *   </li>
+ *   <li>
+ *   Copying the job's jar and configuration to the map-reduce system directory 
+ *   on the distributed file-system. 
+ *   </li>
+ *   <li>
+ *   Submitting the job to the <code>JobTracker</code> and optionally monitoring
+ *   it's status.
+ *   </li>
+ * </ol></p>
+ *  
+ * Normally the user creates the application, describes various facets of the
+ * job via {@link JobConf} and then uses the <code>JobClient</code> to submit 
+ * the job and monitor its progress.
+ * 
+ * <p>Here is an example on how to use <code>JobClient</code>:</p>
+ * <p><blockquote><pre>
+ *     // Create a new JobConf
+ *     JobConf job = new JobConf(new Configuration(), MyJob.class);
+ *     
+ *     // Specify various job-specific parameters     
+ *     job.setJobName("myjob");
+ *     
+ *     job.setInputPath(new Path("in"));
+ *     job.setOutputPath(new Path("out"));
+ *     
+ *     job.setMapperClass(MyJob.MyMapper.class);
+ *     job.setReducerClass(MyJob.MyReducer.class);
  *
- *******************************************************/
+ *     // Submit the job, then poll for progress until the job is complete
+ *     JobClient.runJob(job);
+ * </pre></blockquote></p>
+ * 
+ * <h4 id="JobControl">Job Control</h4>
+ * 
+ * <p>At times clients would chain map-reduce jobs to accomplish complex tasks 
+ * which cannot be done via a single map-reduce job. This is fairly easy since 
+ * the output of the job, typically, goes to distributed file-system and that 
+ * can be used as the input for the next job.</p>
+ * 
+ * <p>However, this also means that the onus on ensuring jobs are complete 
+ * (success/failure) lies squarely on the clients. In such situations the 
+ * various job-control options are:
+ * <ol>
+ *   <li>
+ *   {@link #runJob(JobConf)} : submits the job and returns only after 
+ *   the job has completed.
+ *   </li>
+ *   <li>
+ *   {@link #submitJob(JobConf)} : only submits the job, then poll the 
+ *   returned handle to the {@link RunningJob} to query status and make 
+ *   scheduling decisions.
+ *   </li>
+ *   <li>
+ *   {@link JobConf#setJobEndNotificationURI(String)} : setup a notification
+ *   on job-completion, thus avoiding polling.
+ *   </li>
+ * </ol></p>
+ * 
+ * @see JobConf
+ * @see ClusterStatus
+ * @see Tool
+ * @see DistributedCache
+ */
 public class JobClient extends Configured implements MRConstants, Tool  {
   private static final Log LOG = LogFactory.getLog("org.apache.hadoop.mapred.JobClient");
   public static enum TaskStatusFilter { NONE, KILLED, FAILED, SUCCEEDED, ALL }
@@ -239,16 +315,28 @@
   static Random r = new Random();
 
   /**
-   * Build a job client, connect to the default job tracker
+   * Create a job client.
    */
   public JobClient() {
   }
     
+  /**
+   * Build a job client with the given {@link JobConf}, and connect to the 
+   * default {@link JobTracker}.
+   * 
+   * @param conf the job configuration.
+   * @throws IOException
+   */
   public JobClient(JobConf conf) throws IOException {
     setConf(conf);
     init(conf);
   }
     
+  /**
+   * Connect to the default {@link JobTracker}.
+   * @param conf the job configuration.
+   * @throws IOException
+   */
   public void init(JobConf conf) throws IOException {
     String tracker = conf.get("mapred.job.tracker", "local");
     if ("local".equals(tracker)) {
@@ -260,9 +348,10 @@
 
   /**
    * Create a proxy JobSubmissionProtocol that retries timeouts.
-   * @param addr the address to connect to
-   * @param conf the server's configuration
-   * @return a proxy object that will retry timeouts
+   * 
+   * @param addr the address to connect to.
+   * @param conf the server's configuration.
+   * @return a proxy object that will retry timeouts.
    * @throws IOException
    */
   private JobSubmissionProtocol createProxy(InetSocketAddress addr,
@@ -286,6 +375,9 @@
 
   /**
    * Build a job client, connect to the indicated job tracker.
+   * 
+   * @param jobTrackAddr the job tracker to connect to.
+   * @param conf configuration.
    */
   public JobClient(InetSocketAddress jobTrackAddr, 
                    Configuration conf) throws IOException {
@@ -293,6 +385,7 @@
   }
 
   /**
+   * Close the <code>JobClient</code>.
    */
   public synchronized void close() throws IOException {
   }
@@ -300,6 +393,8 @@
   /**
    * Get a filesystem handle.  We need this to prepare jobs
    * for submission to the MapReduce system.
+   * 
+   * @return the filesystem handle.
    */
   public synchronized FileSystem getFs() throws IOException {
     if (this.fs == null) {
@@ -310,10 +405,21 @@
   }
 
   /**
-   * Submit a job to the MR system
+   * Submit a job to the MR system.
+   * 
+   * This returns a handle to the {@link RunningJob} which can be used to track
+   * the running-job.
+   * 
+   * @param jobFile the job configuration.
+   * @return a handle to the {@link RunningJob} which can be used to track the
+   *         running-job.
+   * @throws FileNotFoundException
+   * @throws InvalidJobConfException
+   * @throws IOException
    */
   public RunningJob submitJob(String jobFile) throws FileNotFoundException, 
-                                                     InvalidJobConfException, IOException {
+                                                     InvalidJobConfException, 
+                                                     IOException {
     // Load in the submitted job details
     JobConf job = new JobConf(jobFile);
     return submitJob(job);
@@ -321,7 +427,16 @@
     
    
   /**
-   * Submit a job to the MR system
+   * Submit a job to the MR system.
+   * This returns a handle to the {@link RunningJob} which can be used to track
+   * the running-job.
+   * 
+   * @param job the job configuration.
+   * @return a handle to the {@link RunningJob} which can be used to track the
+   *         running-job.
+   * @throws FileNotFoundException
+   * @throws InvalidJobConfException
+   * @throws IOException
    */
   public RunningJob submitJob(JobConf job) throws FileNotFoundException, 
                                                   InvalidJobConfException, IOException {
@@ -551,8 +666,13 @@
   }
     
   /**
-   * Get an RunningJob object to track an ongoing job.  Returns
+   * Get an {@link RunningJob} object to track an ongoing job.  Returns
    * null if the id does not correspond to any known job.
+   * 
+   * @param jobid the jobid of the job.
+   * @return the {@link RunningJob} handle to track the job, null if the 
+   *         <code>jobid</code> doesn't correspond to any known job.
+   * @throws IOException
    */
   public RunningJob getJob(String jobid) throws IOException {
     JobStatus status = jobSubmitClient.getJobStatus(jobid);
@@ -565,8 +685,10 @@
 
   /**
    * Get the information of the current state of the map tasks of a job.
-   * @param jobId the job to query
-   * @return the list of all of the map tips
+   * 
+   * @param jobId the job to query.
+   * @return the list of all of the map tips.
+   * @throws IOException
    */
   public TaskReport[] getMapTaskReports(String jobId) throws IOException {
     return jobSubmitClient.getMapTaskReports(jobId);
@@ -574,23 +696,44 @@
     
   /**
    * Get the information of the current state of the reduce tasks of a job.
-   * @param jobId the job to query
-   * @return the list of all of the map tips
+   * 
+   * @param jobId the job to query.
+   * @return the list of all of the reduce tips.
+   * @throws IOException
    */    
   public TaskReport[] getReduceTaskReports(String jobId) throws IOException {
     return jobSubmitClient.getReduceTaskReports(jobId);
   }
-    
+   
+  /**
+   * Get status information about the Map-Reduce cluster.
+   *  
+   * @return the status information about the Map-Reduce cluster as an object
+   *         of {@link ClusterStatus}.
+   * @throws IOException
+   */
   public ClusterStatus getClusterStatus() throws IOException {
     return jobSubmitClient.getClusterStatus();
   }
     
+
+  /** 
+   * Get the jobs that are not completed and not failed.
+   * 
+   * @return array of {@link JobStatus} for the running/to-be-run jobs.
+   * @throws IOException
+   */
   public JobStatus[] jobsToComplete() throws IOException {
     return jobSubmitClient.jobsToComplete();
   }
     
-  /** Utility that submits a job, then polls for progress until the job is
-   * complete. */
+  /** 
+   * Utility that submits a job, then polls for progress until the job is
+   * complete.
+   * 
+   * @param job the job configuration.
+   * @throws IOException
+   */
   public static RunningJob runJob(JobConf job) throws IOException {
     JobClient jc = new JobClient(job);
     boolean error = true;
@@ -764,9 +907,10 @@
   }
     
   /**
-   * Get the task output filter out of the JobConf
-   * @param job the JobConf to examine
-   * @return the filter level
+   * Get the task output filter out of the JobConf.
+   * 
+   * @param job the JobConf to examine.
+   * @return the filter level.
    */
   public static TaskStatusFilter getTaskOutputFilter(JobConf job) {
     return TaskStatusFilter.valueOf(job.get("jobclient.output.filter", 
@@ -774,9 +918,10 @@
   }
     
   /**
-   * Modify the JobConf to set the task output filter
-   * @param job the JobConf to modify
-   * @param newValue the value to set
+   * Modify the JobConf to set the task output filter.
+   * 
+   * @param job the JobConf to modify.
+   * @param newValue the value to set.
    */
   public static void setTaskOutputFilter(JobConf job, 
                                          TaskStatusFilter newValue) {



Mime
View raw message