hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ste...@apache.org
Subject svn commit: r903227 [8/16] - in /hadoop/mapreduce/branches/MAPREDUCE-233: ./ .eclipse.templates/ conf/ ivy/ src/benchmarks/gridmix/ src/benchmarks/gridmix/javasort/ src/benchmarks/gridmix/maxent/ src/benchmarks/gridmix/monsterQuery/ src/benchmarks/grid...
Date Tue, 26 Jan 2010 14:03:09 GMT
Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/build.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/build.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/build.xml Tue Jan 26 14:02:53 2010
@@ -20,7 +20,6 @@
 <project name="vaidya" default="jar">
 
 	<import file="../build-contrib.xml" />
-        <import file="../../../build.xml" />
 
 	<target name="init">
 		<mkdir dir="${build.dir}" />

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy.xml Tue Jan 26 14:02:53 2010
@@ -1,4 +1,21 @@
 <?xml version="1.0" ?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
 <ivy-module version="1.0">
   <info organisation="org.apache.hadoop" module="${ant.project.name}">
     <license name="Apache 2.0"/>

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy/libraries.properties?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy/libraries.properties (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vaidya/ivy/libraries.properties Tue Jan 26 14:02:53 2010
@@ -1,3 +1,15 @@
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
 #This properties file lists the versions of the various artifacts used by streaming.
 #It drives ivy and the generation of a maven POM
 

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaConfiguration.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaConfiguration.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaConfiguration.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaConfiguration.java Tue Jan 26 14:02:53 2010
@@ -55,6 +55,9 @@
  * @see VerticaOutputFormat#setOutput(Job, String, boolean, String...)
  */
 public class VerticaConfiguration {
+  /** Vertica Version Constants */
+  public static final Integer VERSION_3_5 = 305;
+  
   /** Class name for Vertica JDBC Driver */
   public static final String VERTICA_DRIVER_CLASS = "com.vertica.Driver";
 

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaOutputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaOutputFormat.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaOutputFormat.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaOutputFormat.java Tue Jan 26 14:02:53 2010
@@ -63,7 +63,7 @@
    * @param dropTable
    */
   public static void setOutput(Job job, String tableName, boolean dropTable) {
-    setOutput(job, tableName, dropTable);
+    setOutput(job, tableName, dropTable, (String[])null);
   }
 
   /**
@@ -193,39 +193,46 @@
     stmt.execute("select create_projection_design('" + designName + "', '', '"
         + designTables.toString() + "')");
 
-    rs = stmt.executeQuery("select get_design_script('" + designName + "', '"
-        + designName + "')");
-    rs.next();
-    String[] projSet = rs.getString(1).split(";");
-    for (String proj : projSet) {
-      stmt.execute(proj);
-    }
-    stmt.execute("select start_refresh()");
-
-    // pool for refresh complete
-    boolean refreshing = true;
-    Long timeout = vtconfig.getOptimizePollTimeout();
-    while (refreshing) {
-      refreshing = false;
-      rs = stmt
-          .executeQuery("select table_name, projection_name, status from vt_projection_refresh");
-      while (rs.next()) {
-        String table = rs.getString(1);
-        String stat = rs.getString(3);
-        if (stat.equals("refreshing") && tablesWithTemp.contains(table))
-          refreshing = true;
+    if(VerticaUtil.verticaVersion(conf, true) >= VerticaConfiguration.VERSION_3_5) {
+      stmt.execute("select deploy_design('" + designName + "', '" + designName + "')");
+    } else {
+      rs = stmt.executeQuery("select get_design_script('" + designName + "', '"
+          + designName + "')");
+      rs.next();
+      String[] projSet = rs.getString(1).split(";");
+      for (String proj : projSet) {
+        stmt.execute(proj);
       }
-
-      Thread.sleep(timeout);
-    }
-
-    // refresh done, move the ahm and drop the temp projections
-    stmt.execute("select make_ahm_now()");
-
-    for (String table : tablesWithTemp) {
-      for (String proj : tableProj.get(table)) {
-        stmt.execute("DROP PROJECTION " + proj);
+      stmt.execute("select start_refresh()");
+  
+      // poll for refresh complete
+      boolean refreshing = true;
+      Long timeout = vtconfig.getOptimizePollTimeout();
+      while (refreshing) {
+        refreshing = false;
+        rs = stmt
+            .executeQuery("select table_name, status from vt_projection_refresh");
+        while (rs.next()) {
+          String table = rs.getString(1);
+          String stat = rs.getString(2);
+          if (stat.equals("refreshing") && tablesWithTemp.contains(table))
+            refreshing = true;
+        }
+        rs.close();
+  
+        Thread.sleep(timeout);
       }
+  
+      // refresh done, move the ancient history mark (ahm) and drop the temp projections
+      stmt.execute("select make_ahm_now()");
+  
+      for (String table : tablesWithTemp) {
+        for (String proj : tableProj.get(table)) {
+          stmt.execute("DROP PROJECTION " + proj);
+        }
+      }
+
+      stmt.close();
     }
   }
 
@@ -235,4 +242,4 @@
     return new FileOutputCommitter(FileOutputFormat.getOutputPath(context),
         context);
   }
-}
+}
\ No newline at end of file

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecord.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecord.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecord.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecord.java Tue Jan 26 14:02:53 2010
@@ -21,6 +21,7 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.sql.Date;
 import java.sql.ResultSet;
@@ -178,7 +179,7 @@
     if (i >= values.size())
       throw new IndexOutOfBoundsException("Index " + i
           + " greater than input size " + values.size());
-    if (validate) {
+    if (validate && value != null) {
       Integer type = types.get(i);
       switch (type) {
       case Types.BIGINT:
@@ -204,6 +205,9 @@
       case Types.REAL:
       case Types.DECIMAL:
       case Types.NUMERIC:
+        if (!(value instanceof BigDecimal))
+          throw new ClassCastException("Cannot cast "
+              + value.getClass().getName() + " to BigDecimal");
       case Types.DOUBLE:
         if (!(value instanceof Double) && !(value instanceof Float)
             && !(value instanceof DoubleWritable)
@@ -278,7 +282,7 @@
   private void objectTypes() {
     for (Object obj : values) {
       if (obj == null) {
-        this.types.add(null);
+        this.types.add(Types.NULL);
       } else if (obj instanceof Long) {
         this.types.add(Types.BIGINT);
       } else if (obj instanceof LongWritable) {
@@ -291,6 +295,8 @@
         this.types.add(Types.INTEGER);
       } else if (obj instanceof Short) {
         this.types.add(Types.SMALLINT);
+      } else if (obj instanceof BigDecimal) {
+        this.types.add(Types.NUMERIC);
       } else if (obj instanceof DoubleWritable) {
         this.types.add(Types.DOUBLE);
       } else if (obj instanceof Double) {
@@ -343,20 +349,23 @@
       // switch statement uses fall through to handle type variations
       // e.g. type specified as BIGINT but passed in as Integer
       switch (type) {
+      case Types.NULL:
+        sb.append("");
+        break;
       case Types.BIGINT:
         if (obj instanceof Long) {
-          sb.append(((Long) obj).toString());
+          sb.append(obj.toString());
           break;
         }
       case Types.INTEGER:
         if (obj instanceof Integer) {
-          sb.append(((Integer) obj).toString());
+          sb.append(obj.toString());
           break;
         }
       case Types.TINYINT:
       case Types.SMALLINT:
         if (obj instanceof Short) {
-          sb.append(((Short) obj).toString());
+          sb.append(obj.toString());
           break;
         }
         if (obj instanceof LongWritable) {
@@ -374,18 +383,22 @@
       case Types.REAL:
       case Types.DECIMAL:
       case Types.NUMERIC:
+        if (obj instanceof BigDecimal) {
+          sb.append(obj.toString());
+          break;
+        }
       case Types.DOUBLE:
         if (obj instanceof Double) {
-          sb.append(((Double) obj).toString());
+          sb.append(obj.toString());
           break;
         }
         if (obj instanceof DoubleWritable) {
-          sb.append(((DoubleWritable) obj).toString());
+          sb.append(((DoubleWritable) obj).get());
           break;
         }
       case Types.FLOAT:
         if (obj instanceof Float) {
-          sb.append(((Float) obj).toString());
+          sb.append(obj.toString());
           break;
         }
         if (obj instanceof FloatWritable) {
@@ -395,7 +408,8 @@
       case Types.BINARY:
       case Types.LONGVARBINARY:
       case Types.VARBINARY:
-        sb.append(ByteBuffer.wrap((byte[]) obj).asCharBuffer());
+        if(obj == null) sb.append("");
+        else sb.append(ByteBuffer.wrap((byte[]) obj).asCharBuffer());
         break;
       case Types.BIT:
       case Types.BOOLEAN:
@@ -452,7 +466,8 @@
           sb.append(sqlfmt.format((Timestamp) obj));
         break;
       default:
-        throw new RuntimeException("Unknown type value " + types.get(i));
+        if(obj == null) sb.append("");
+        else throw new RuntimeException("Unknown type value " + types.get(i));
       }
       if (i < columns - 1)
         sb.append(delimiterArg);
@@ -473,6 +488,9 @@
     for (int i = 0; i < columns; i++) {
       int type = types.get(i);
       switch (type) {
+      case Types.NULL:
+        values.add(null);
+        break;
       case Types.BIGINT:
         values.add(in.readLong());
         break;
@@ -486,6 +504,8 @@
       case Types.REAL:
       case Types.DECIMAL:
       case Types.NUMERIC:
+        values.add(new BigDecimal(Text.readString(in)));
+        break;
       case Types.DOUBLE:
         values.add(in.readDouble());
         break;
@@ -551,13 +571,20 @@
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeInt(columns);
-    for (Integer type : types)
-      out.writeInt(type);
+    
+    for (int i = 0; i < columns; i++) {
+      Object obj = values.get(i);
+      Integer type = types.get(i);
+      if(obj == null) out.writeInt(Types.NULL);
+      else out.writeInt(type);
+    }
 
     for (int i = 0; i < columns; i++) {
       Object obj = values.get(i);
       Integer type = types.get(i);
 
+      if(obj == null) continue;
+      
       switch (type) {
       case Types.BIGINT:
         out.writeLong((Long) obj);
@@ -572,6 +599,8 @@
       case Types.REAL:
       case Types.DECIMAL:
       case Types.NUMERIC:
+        Text.writeString(out, obj.toString());
+        break;
       case Types.DOUBLE:
         out.writeDouble((Double) obj);
         break;
@@ -628,4 +657,4 @@
     }
   }
 
-}
+}
\ No newline at end of file

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecordWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecordWriter.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecordWriter.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaRecordWriter.java Tue Jan 26 14:02:53 2010
@@ -31,6 +31,7 @@
 import java.util.List;
 
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
@@ -99,8 +100,9 @@
   @Override
   public void close(TaskAttemptContext context) throws IOException {
     try {
-      if (statement != null)
+      if (statement != null) {
         finishCopyIn.invoke(statement); // statement.finishCopyIn();
+      }
     } catch (Exception e) {
       throw new IOException(e);
     }

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaStreamingRecordWriter.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaStreamingRecordWriter.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaStreamingRecordWriter.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaStreamingRecordWriter.java Tue Jan 26 14:02:53 2010
@@ -27,6 +27,7 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaUtil.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaUtil.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaUtil.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/java/org/apache/hadoop/vertica/VerticaUtil.java Tue Jan 26 14:02:53 2010
@@ -31,13 +31,29 @@
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.conf.Configuration;
 
 public class VerticaUtil {
   private static final Log LOG = LogFactory.getLog(VerticaUtil.class);
 
+  public static int verticaVersion(Configuration conf, boolean output) throws IOException {
+    int ver = -1;
+    try {
+    VerticaConfiguration vtconfig = new VerticaConfiguration(conf);
+    Connection conn = vtconfig.getConnection(output);
+    DatabaseMetaData dbmd = conn.getMetaData();
+    ver = dbmd.getDatabaseMajorVersion() * 100;
+    ver += dbmd.getDatabaseMinorVersion();
+    } catch(ClassNotFoundException e) { 
+      throw new IOException("Vertica Driver required to use Vertica Input or Output Formatters"); 
+    } catch (SQLException e) { throw new IOException(e); }
+    return ver;
+  }
+  
   public static void checkOutputSpecs(Configuration conf) throws IOException {
     VerticaConfiguration vtconfig = new VerticaConfiguration(conf);
 
@@ -67,20 +83,24 @@
       stmt = conn.createStatement();
 
       if (tableExists && dropTable) {
-        // TODO: need truncate support
-        // for now drop the table if it exists
-        // if def is empty, grab the columns first
-        if (def == null) {
-          rs = dbmd.getColumns(null, schema, table, null);
-          ArrayList<String> defs = new ArrayList<String>();
-          while (rs.next())
-            defs.add(rs.getString(4) + " " + rs.getString(5));
-          def = defs.toArray(new String[0]);
+        if(verticaVersion(conf, true) >= 305) {
+          stmt = conn.createStatement();
+          stmt.execute("TRUNCATE TABLE " + writerTable);
+        } else {
+          // for version < 3.0 drop the table if it exists
+          // if def is empty, grab the columns first to redfine the table
+          if (def == null) {
+            rs = dbmd.getColumns(null, schema, table, null);
+            ArrayList<String> defs = new ArrayList<String>();
+            while (rs.next())
+              defs.add(rs.getString(4) + " " + rs.getString(5));
+            def = defs.toArray(new String[0]);
+          }
+  
+          stmt = conn.createStatement();
+          stmt.execute("DROP TABLE " + writerTable + " CASCADE");
+          tableExists = false; // force create
         }
-
-        stmt = conn.createStatement();
-        stmt.execute("DROP TABLE " + writerTable + " CASCADE");
-        tableExists = false; // force create
       }
 
       // create table if it doesn't exist
@@ -120,7 +140,7 @@
   public static List<InputSplit> getSplits(JobContext context)
       throws IOException {
     Configuration conf = context.getConfiguration();
-    int numSplits = conf.getInt("mapred.map.tasks", 1);
+    int numSplits = conf.getInt("mapreduce.job.maps", 1);
     LOG.debug("creating splits up to " + numSplits);
     List<InputSplit> splits = new ArrayList<InputSplit>();
     int i = 0;

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/AllTests.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/AllTests.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/AllTests.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/AllTests.java Tue Jan 26 14:02:53 2010
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.vertica;
 
-import java.io.FileNotFoundException;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestExample.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestExample.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestExample.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestExample.java Tue Jan 26 14:02:53 2010
@@ -29,6 +29,7 @@
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
@@ -87,13 +88,16 @@
   }
 
   public Job getJob() throws IOException {
-    Job job = new Job();
+    Configuration conf = new Configuration(true);
+    Cluster cluster = new Cluster(conf);
+    Job job = Job.getInstance(cluster);
+    
+    conf = job.getConfiguration();
+    conf.set("mapreduce.job.tracker", "local");
+
     job.setJarByClass(TestExample.class);
     job.setJobName("vertica test");
 
-    Configuration conf = job.getConfiguration();
-    conf.set("mapred.job.tracker", "local");
-
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(VerticaRecord.class);
     job.setInputFormatClass(VerticaInputFormat.class);

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestVertica.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestVertica.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestVertica.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/contrib/vertica/src/test/org/apache/hadoop/vertica/TestVertica.java Tue Jan 26 14:02:53 2010
@@ -20,6 +20,7 @@
 
 import java.io.IOException;
 import java.lang.reflect.Array;
+import java.math.BigDecimal;
 import java.sql.Date;
 import java.sql.Time;
 import java.sql.Timestamp;
@@ -37,6 +38,7 @@
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
@@ -71,7 +73,8 @@
 
   public Job getVerticaJob() throws IOException {
     Configuration conf = new Configuration(true);
-    Job job = new Job(conf, "TestVertica");
+    Cluster cluster = new Cluster(conf);
+    Job job = Job.getInstance(cluster);
     job.setJarByClass(VerticaTestMR.class);
 
     VerticaConfiguration.configureVertica(job.getConfiguration(),
@@ -123,11 +126,11 @@
     types.add(Types.SMALLINT);
     values.add((short) 4); // SMALLINT
     types.add(Types.REAL);
-    values.add(15234342345.532637); // REAL
+    values.add(new BigDecimal(15234342345.532637)); // REAL
     types.add(Types.DECIMAL);
-    values.add(346223093.4256); // DECIMAL
+    values.add(new BigDecimal(346223093.4256)); // DECIMAL
     types.add(Types.NUMERIC);
-    values.add(209232301132.4203); // NUMERIC
+    values.add(new BigDecimal(209232301132.4203)); // NUMERIC
     types.add(Types.DOUBLE);
     values.add(934029342.234); // DOUBLE
     types.add(Types.FLOAT);
@@ -158,6 +161,51 @@
     values
         .add(new Timestamp(tmstmpfmt.parse("2007-08-09 6:07:05.06").getTime())); // TIMESTAMP
 
+    types.add(Types.BIGINT);
+    values.add(null); // BIGINT
+    types.add(Types.INTEGER);
+    values.add(null); // INTGER
+    types.add(Types.TINYINT);
+    values.add(null); // TINYINT
+    types.add(Types.SMALLINT);
+    values.add(null); // SMALLINT
+    types.add(Types.REAL);
+    values.add(null); // REAL
+    types.add(Types.DECIMAL);
+    values.add(null); // DECIMAL
+    types.add(Types.NUMERIC);
+    values.add(null); // NUMERIC
+    types.add(Types.DOUBLE);
+    values.add(null); // DOUBLE
+    types.add(Types.FLOAT);
+    values.add(null); // FLOAT
+    types.add(Types.BINARY);
+    values.add(null); // BINARY
+    types.add(Types.LONGVARBINARY);
+    values.add(null); // LONGVARBINARY
+    types.add(Types.VARBINARY);
+    values.add(null); // VARBINARY
+    types.add(Types.BOOLEAN);
+    values.add(null); // BOOLEAN
+    types.add(Types.CHAR);
+    values.add(null); // CHAR
+    types.add(Types.LONGNVARCHAR);
+    values.add(null); // LONGNVARCHAR
+    types.add(Types.LONGVARCHAR);
+    values.add(null); // LONGVARCHAR
+    types.add(Types.NCHAR);
+    values.add(null); // NCHAR
+    types.add(Types.VARCHAR);
+    values.add(null); // VARCHAR
+    types.add(Types.DATE);
+    values.add(null); // DATE
+    types.add(Types.TIME);
+    values.add(null); // TIME
+    types.add(Types.TIMESTAMP);
+    values
+        .add(null); // TIMESTAMP
+    
+    
     String sql1 = null;
     sql1 = recordTest(types, values, out, in, true);
     
@@ -191,7 +239,8 @@
 
     // compare values
     for(int i = 0; i < values.size(); i++)
-      if(values.get(i).getClass().isArray()) {
+      if(values.get(i) == null) assertSame("Vertica Record serialized value " + i + " is null", values.get(i), new_values.get(i));
+      else if(values.get(i).getClass().isArray()) {
         Object a = values.get(i);
         Object b = new_values.get(i);
         for(int j = 0; j < Array.getLength(a); j++)
@@ -255,17 +304,17 @@
     List<InputSplit> splits = null;
 
     Configuration conf = job.getConfiguration();
-    conf.setInt("mapred.map.tasks", 1);
+    conf.setInt("mapreduce.job.maps", 1);
     JobContext context = new JobContextImpl(conf, new JobID());
 
     splits = input.getSplits(context);
     assert splits.size() == 1;
 
-    conf.setInt("mapred.map.tasks", 3);
+    conf.setInt("mapreduce.job.maps", 3);
     splits = input.getSplits(context);
     assert splits.size() == 3;
 
-    conf.setInt("mapred.map.tasks", 10);
+    conf.setInt("mapreduce.job.maps", 10);
     splits = input.getSplits(context);
     assert splits.size() == 10;
   }

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml Tue Jan 26 14:02:53 2010
@@ -40,7 +40,8 @@
       <p>The Capacity Scheduler supports the following features:</p> 
       <ul>
         <li>
-          Multiple queues, where a job is submitted to a queue.
+          Multiple queues, possibly hierarchical/recursive, where a job is
+          submitted to a queue.
         </li>
         <li>
           Queues are allocated a fraction of the capacity of the grid in the 
@@ -72,11 +73,23 @@
           competition for them.  
         </li>
         <li>
+          Queues can use idle resources of other queues. In order to prevent
+          monopolizing of resources by particular queues, each queue can be
+          set a cap on the maximum number of resources it can expand to in
+          the presence of idle resources in other queues of the cluster.
+        </li>
+        <li>
           Support for memory-intensive jobs, wherein a job can optionally 
           specify higher memory-requirements than the default, and the tasks 
           of the job will only be run on TaskTrackers that have enough memory 
           to spare.
         </li>
+        <li>
+          Support for refreshing/reloading some of the queue-properties
+          without restarting the JobTracker, taking advantage of the
+          <a href="cluster_setup.html#Refreshing+queue+configuration">
+          queue-refresh</a> feature in the framework.
+        </li>
       </ul>
     </section>
     
@@ -144,54 +157,113 @@
       <section>
         <title>Setting Up Queues</title>
         <p>
-          You can define multiple queues to which users can submit jobs with
-          the Capacity Scheduler. To define multiple queues, you should edit
-          the site configuration for Hadoop and modify the
-          <em>mapreduce.jobtracker.taskscheduler.queue.names</em> property.
-        </p>
-        <p>
-          You can also configure ACLs for controlling which users or groups
-          have access to the queues.
-        </p>
-        <p>
-          For more details, see
-          <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Configuring+the+Hadoop+Daemons">Configuring the Hadoop Daemons</a>.
-        </p>
+          You can define multiple, possibly hierarchical queues to which users
+          can submit jobs with the Capacity Scheduler. To define queues,
+          various properties should be set in two configuration files -
+          <a href="cluster_setup.html#mapred-queues.xml">mapred-queues.xml</a>
+          and
+          <a href="ext:capacity-scheduler-conf">conf/capacity-scheduler.xml</a>
+          .</p>
+          <p><em>conf/capacity-scheduler.xml</em> can be used to configure (1)
+          job-initialization-poller related properties and (2) the
+          default values for various properties in the queues</p>
+          <p><em>conf/mapred-queues.xml</em> contains the actual queue
+          configuration including (1) framework specific properties like ACLs
+          for controlling which users or groups have access to the queues and
+          state of the queues and (2) the scheduler specific properties for
+          each queue. If any of these scheduler specific properties are
+          missing and not configured for a queue, then the properties in
+          <em>conf/capacity-scheduler.xml</em> are used to set default values.
+          More details about the properties that can be configured, and their
+          semantics is mentioned below. Also, a default template for 
+          mapred-queues.xml tailored for using with
+          Capacity-scheduler can be found
+          <a href="ext:mapred-queues-capacity-scheduler">here</a>.</p>
       </section>
   
       <section>
         <title>Configuring Properties for Queues</title>
 
         <p>The Capacity Scheduler can be configured with several properties
-        for each queue that control the behavior of the Scheduler. This
-        configuration is in the <em>conf/capacity-scheduler.xml</em>. By
+        for each queue that control the behavior of the Scheduler. As
+        described above, this scheduler specific configuration has to be in
+        the <em>conf/mapred-queues.xml</em> along with the rest of the
+        framework specific configuration. By
         default, the configuration is set up for one queue, named 
         <em>default</em>.</p>
-        <p>To specify a property for a queue that is defined in the site
-        configuration, you should use the property name as
-        <em>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.&lt;property-name&gt;</em>.
-        </p>
-        <p>For example, to define the property <em>capacity</em>
-        for queue named <em>research</em>, you should specify the property
-        name as 
-        <em>mapred.capacity-scheduler.queue.research.capacity</em>.
+        <p>To specify a property for a specific queue that is defined in the
+        mapred-queues.xml, you should set the corresponding property in a
+        &lt;property&gt; tag explained
+        <a href="cluster_setup.html#property_tag">here</a>.
         </p>
 
         <p>The properties defined for queues and their descriptions are
         listed in the table below:</p>
 
         <table>
-          <tr><th>Name</th><th>Description</th></tr>
-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.capacity</td>
-          	<td>Percentage of the number of slots in the cluster that are made 
-            to be available for jobs in this queue. The sum of capacities 
-            for all queues should be less than or equal 100.</td>
+          <tr>
+          	<th>Name</th>
+            <th>
+            	<a href="commands_manual.html#RefreshQueues">
+            	Refresh-able?</a>
+           	</th>
+            <th>Applicable to?</th>
+            <th>Description</th>
+          </tr>
+          <tr>
+          	<td>capacity</td>
+          	<td>Yes</td>
+          	<td>Container queues as well as leaf queues</td>
+          	<td>For a root-level container queue, this is the percentage of the
+          	number of slots in the cluster that will be available for all its
+          	immediate children together. For a root-level leaf-queue, this is
+          	the percentage of the number of slots in the cluster that will be
+          	available for all its jobs.	For a non-root level container queue,
+          	this is the percentage of the number of slots in its parent queue
+          	that will be available for all its	children together. For a
+          	non-root-level leaf queue, this	is the percentage of the number of
+          	slots in its parent queue that will be available for jobs in this
+          	queue. The sum of capacities for all children of a container queue
+          	should be less than or equal 100. The sum of capacities of all the
+          	root-level queues should be less than or equal to 100.
+            </td>
+          </tr>
+          <tr>
+            <td>maximum-capacity</td>
+            <td>Yes</td>
+          	<td>Container queues as well as leaf queues</td>
+            <td>
+	          A limit in percentage beyond which a non-root-level queue cannot use
+	          the capacity of its parent queue; for a root-level queue, this is
+	          the limit in percentage beyond which it cannot use the
+	          cluster-capacity. This property provides a means to limit how much
+	          excess capacity a queue can use.  It can be used to prevent queues
+	          with long running jobs from occupying more than a certain percentage
+	          of the parent-queue or the cluster, which, in the absence of
+	          pre-emption, can lead to capacity guarantees of other queues getting
+	          affected.
+	
+	          The maximum-capacity of a queue can only be greater than or equal to
+	          its capacity. By default, there is no limit for a queue. For a
+	          non-root-level queue this means it can occupy till the
+	          maximum-capacity of its parent, for a root-level queue, it means that
+	          it can occupy the whole cluster. A value of 100 implies that a queue
+	          can use the complete capacity of its parent, or the complete
+	          cluster-capacity in case of root-level-queues.
+            </td>
           </tr>
-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.supports-priority</td>
+          <tr>
+          	<td>supports-priority</td>
+          	<td>No</td>
+          	<td>Leaf queues only</td>
           	<td>If true, priorities of jobs will be taken into account in scheduling 
-          	decisions.</td>
+          	decisions.
+          	</td>
           </tr>
-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.minimum-user-limit-percent</td>
+          <tr>
+          	<td>minimum-user-limit-percent</td>
+          	<td>Yes</td>
+          	<td>Leaf queues only</td>
           	<td>Each queue enforces a limit on the percentage of resources 
           	allocated to a user at any given time, if there is competition 
           	for them. This user limit can vary between a minimum and maximum 
@@ -202,9 +274,25 @@
           	of the queue resources. If a third user submits a job, no single 
           	user can use more than 33% of the queue resources. With 4 or more 
           	users, no user can use more than 25% of the queue's resources. A 
-          	value of 100 implies no user limits are imposed.</td>
+          	value of 100 implies no user limits are imposed.
+	        </td>
           </tr>
+          <tr>
+            <td>maximum-initialized-jobs-per-user</td>
+            <td>Yes</td>
+          	<td>Leaf queues only</td>
+            <td>
+              Maximum number of jobs which are allowed to be pre-initialized for
+              a particular user in the queue. Once a job is scheduled, i.e.
+              it starts running, then that job is not considered
+              while scheduler computes the maximum job a user is allowed to
+              initialize.
+            </td>
+          </tr>  
         </table>
+        <p>See  <a href="ext:mapred-queues-capacity-scheduler">
+        this configuration file</a> for a default configuration of queues in
+        capacity-scheduler.</p>
       </section>
       
       <section>
@@ -296,18 +384,6 @@
           <tr><th>Name</th><th>Description</th></tr>
           <tr>
             <td>
-              mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.maximum-initialized-jobs-per-user
-            </td>
-            <td>
-              Maximum number of jobs which are allowed to be pre-initialized for
-              a particular user in the queue. Once a job is scheduled, i.e. 
-              it starts running, then that job is not considered
-              while scheduler computes the maximum job a user is allowed to
-              initialize. 
-            </td>
-          </tr>
-          <tr>
-            <td>
               mapred.capacity-scheduler.init-poll-interval
             </td>
             <td>

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/cluster_setup.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/cluster_setup.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/cluster_setup.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/cluster_setup.xml Tue Jan 26 14:02:53 2010
@@ -33,7 +33,7 @@
       Hadoop clusters ranging from a few nodes to extremely large clusters with 
       thousands of nodes.</p>
       <p>
-      To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="quickstart.html"> Hadoop Quick Start</a>).
+      To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="ext:single-node-setup"> Hadoop Quick Start</a>).
       </p>
     </section>
     
@@ -42,11 +42,11 @@
       
       <ol>
         <li>
-          Make sure all <a href="quickstart.html#PreReqs">requisite</a> software 
+          Make sure all <a href="ext:single-node-setup/PreReqs">requisite</a> software 
           is installed on all nodes in your cluster.
         </li>
         <li>
-          <a href="quickstart.html#Download">Get</a> the Hadoop software.
+          <a href="ext:single-node-setup/Download">Get</a> the Hadoop software.
         </li>
       </ol>
     </section>
@@ -81,15 +81,17 @@
         <ol>
           <li>
             Read-only default configuration - 
-            <a href="ext:core-default">src/core/core-default.xml</a>, 
-            <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a> and 
-            <a href="ext:mapred-default">src/mapred/mapred-default.xml</a>.
+            <a href="ext:common-default">src/core/core-default.xml</a>, 
+            <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a>, 
+            <a href="ext:mapred-default">src/mapred/mapred-default.xml</a> and
+            <a href="ext:mapred-queues">conf/mapred-queues.xml.template</a>.
           </li>
           <li>
             Site-specific configuration - 
-            <em>conf/core-site.xml</em>, 
-            <em>conf/hdfs-site.xml</em> and 
-            <em>conf/mapred-site.xml</em>.
+            <a href="#core-site.xml">conf/core-site.xml</a>, 
+            <a href="#hdfs-site.xml">conf/hdfs-site.xml</a>, 
+            <a href="#mapred-site.xml">conf/mapred-site.xml</a> and
+            <a href="#mapred-queues.xml">conf/mapred-queues.xml</a>.
           </li>
         </ol>
       
@@ -163,9 +165,8 @@
           <title>Configuring the Hadoop Daemons</title>
           
           <p>This section deals with important parameters to be specified in the
-          following:
-          <br/>
-          <code>conf/core-site.xml</code>:</p>
+          following:</p>
+          <anchor id="core-site.xml"/><p><code>conf/core-site.xml</code>:</p>
 
 		  <table>
   		    <tr>
@@ -180,7 +181,7 @@
             </tr>
           </table>
 
-      <p><br/><code>conf/hdfs-site.xml</code>:</p>
+      <anchor id="hdfs-site.xml"/><p><code>conf/hdfs-site.xml</code>:</p>
           
       <table>   
         <tr>
@@ -212,7 +213,7 @@
 		    </tr>
       </table>
 
-      <p><br/><code>conf/mapred-site.xml</code>:</p>
+      <anchor id="mapred-site.xml"/><p><code>conf/mapred-site.xml</code>:</p>
 
       <table>
           <tr>
@@ -271,83 +272,321 @@
 		        TaskTrackers.
 		      </td>
   		    </tr>
-        <tr>
-          <td>mapred.queue.names</td>
-          <td>Comma separated list of queues to which jobs can be submitted.</td>
-          <td>
-            The Map/Reduce system always supports atleast one queue
-            with the name as <em>default</em>. Hence, this parameter's
-            value should always contain the string <em>default</em>.
-            Some job schedulers supported in Hadoop, like the 
-            <a href="capacity_scheduler.html">Capacity 
-            Scheduler</a>, support multiple queues. If such a scheduler is
-            being used, the list of configured queue names must be
-            specified here. Once queues are defined, users can submit
-            jobs to a queue using the property name 
-            <em>mapreduce.job.queuename</em> in the job configuration.
-            There could be a separate 
-            configuration file for configuring properties of these 
-            queues that is managed by the scheduler. 
-            Refer to the documentation of the scheduler for information on 
-            the same.
-          </td>
-        </tr>
-        <tr>
-          <td>mapred.acls.enabled</td>
-          <td>Specifies whether ACLs are supported for controlling job
-              submission and administration</td>
-          <td>
-            If <em>true</em>, ACLs would be checked while submitting
-            and administering jobs. ACLs can be specified using the
-            configuration parameters of the form
-            <em>mapred.queue.queue-name.acl-name</em>, defined below.
-          </td>
-        </tr>
-		  </table>
-      
-      <p><br/><code> conf/mapred-queue-acls.xml</code></p>
-      
-      <table>
-       <tr>
-          <th>Parameter</th>
-          <th>Value</th> 
-          <th>Notes</th>
-       </tr>
-        <tr>
-          <td>mapred.queue.<em>queue-name</em>.acl-submit-job</td>
-          <td>List of users and groups that can submit jobs to the
-              specified <em>queue-name</em>.</td>
-          <td>
-            The list of users and groups are both comma separated
-            list of names. The two lists are separated by a blank.
-            Example: <em>user1,user2 group1,group2</em>.
-            If you wish to define only a list of groups, provide
-            a blank at the beginning of the value.
-          </td>
-        </tr>
-        <tr>
-          <td>mapred.queue.<em>queue-name</em>.acl-administer-job</td>
-          <td>List of users and groups that can change the priority
-              or kill jobs that have been submitted to the
-              specified <em>queue-name</em>.</td>
-          <td>
-            The list of users and groups are both comma separated
-            list of names. The two lists are separated by a blank.
-            Example: <em>user1,user2 group1,group2</em>.
-            If you wish to define only a list of groups, provide
-            a blank at the beginning of the value. Note that an
-            owner of a job can always change the priority or kill
-            his/her own job, irrespective of the ACLs.
-          </td>
-        </tr>
-      </table>
-      
+		  </table>      
 
           <p>Typically all the above parameters are marked as 
           <a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
           final</a> to ensure that they cannot be overriden by user-applications.
           </p>
 
+          <anchor id="mapred-queues.xml"/><p><code>conf/mapred-queues.xml
+          </code>:</p>
+          <p>This file is used to configure the queues in the Map/Reduce
+          system. Queues are abstract entities in the JobTracker that can be
+          used to manage collections of jobs. They provide a way for 
+          administrators to organize jobs in specific ways and to enforce 
+          certain policies on such collections, thus providing varying
+          levels of administrative control and management functions on jobs.
+          </p> 
+          <p>One can imagine the following sample scenarios:</p>
+          <ul>
+            <li> Jobs submitted by a particular group of users can all be 
+            submitted to one queue. </li> 
+            <li> Long running jobs in an organization can be submitted to a
+            queue. </li>
+            <li> Short running jobs can be submitted to a queue and the number
+            of jobs that can run concurrently can be restricted. </li> 
+          </ul> 
+          <p>The usage of queues is closely tied to the scheduler configured
+          at the JobTracker via <em>mapreduce.jobtracker.taskscheduler</em>.
+          The degree of support of queues depends on the scheduler used. Some
+          schedulers support a single queue, while others support more complex
+          configurations. Schedulers also implement the policies that apply 
+          to jobs in a queue. Some schedulers, such as the Fairshare scheduler,
+          implement their own mechanisms for collections of jobs and do not rely
+          on queues provided by the framework. The administrators are 
+          encouraged to refer to the documentation of the scheduler they are
+          interested in for determining the level of support for queues.</p>
+          <p>The Map/Reduce framework supports some basic operations on queues
+          such as job submission to a specific queue, access control for queues,
+          queue states, viewing configured queues and their properties
+          and refresh of queue properties. In order to fully implement some of
+          these operations, the framework takes the help of the configured
+          scheduler.</p>
+          <p>The following types of queue configurations are possible:</p>
+          <ul>
+            <li> Single queue: The default configuration in Map/Reduce comprises
+            of a single queue, as supported by the default scheduler. All jobs
+            are submitted to this default queue which maintains jobs in a priority
+            based FIFO order.</li>
+            <li> Multiple single level queues: Multiple queues are defined, and
+            jobs can be submitted to any of these queues. Different policies
+            can be applied to these queues by schedulers that support this 
+            configuration to provide a better level of support. For example,
+            the <a href="capacity_scheduler.html">capacity scheduler</a>
+            provides ways of configuring different 
+            capacity and fairness guarantees on these queues.</li>
+            <li> Hierarchical queues: Hierarchical queues are a configuration in
+            which queues can contain other queues within them recursively. The
+            queues that contain other queues are referred to as 
+            container queues. Queues that do not contain other queues are 
+            referred as leaf or job queues. Jobs can only be submitted to leaf
+            queues. Hierarchical queues can potentially offer a higher level 
+            of control to administrators, as schedulers can now build a
+            hierarchy of policies where policies applicable to a container
+            queue can provide context for policies applicable to queues it
+            contains. It also opens up possibilities for delegating queue
+            administration where administration of queues in a container queue
+            can be turned over to a different set of administrators, within
+            the context provided by the container queue. For example, the
+            <a href="capacity_scheduler.html">capacity scheduler</a>
+            uses hierarchical queues to partition capacity of a cluster
+            among container queues, and allowing queues they contain to divide
+            that capacity in more ways.</li> 
+          </ul>
+
+          <p>Most of the configuration of the queues can be refreshed/reloaded
+          without restarting the Map/Reduce sub-system by editing this
+          configuration file as described in the section on
+          <a href="commands_manual.html#RefreshQueues">reloading queue 
+          configuration</a>.
+          Not all configuration properties can be reloaded of course,
+          as will description of each property below explain.</p>
+
+          <p>The format of conf/mapred-queues.xml is different from the other 
+          configuration files, supporting nested configuration
+          elements to support hierarchical queues. The format is as follows:
+          </p>
+
+          <source>
+          &lt;queues aclsEnabled="$aclsEnabled"&gt;
+            &lt;queue&gt;
+              &lt;name&gt;$queue-name&lt;/name&gt;
+              &lt;state&gt;$state&lt;/state&gt;
+              &lt;queue&gt;
+                &lt;name&gt;$child-queue1&lt;/name&gt;
+                &lt;properties&gt;
+                   &lt;property key="$key" value="$value"/&gt;
+                   ...
+                &lt;/properties&gt;
+                &lt;queue&gt;
+                  &lt;name&gt;$grand-child-queue1&lt;/name&gt;
+                  ...
+                &lt;/queue&gt;
+              &lt;/queue&gt;
+              &lt;queue&gt;
+                &lt;name&gt;$child-queue2&lt;/name&gt;
+                ...
+              &lt;/queue&gt;
+              ...
+              ...
+              ...
+              &lt;queue&gt;
+                &lt;name&gt;$leaf-queue&lt;/name&gt;
+                &lt;acl-submit-job&gt;$acls&lt;/acl-submit-job&gt;
+                &lt;acl-administer-jobs&gt;$acls&lt;/acl-administer-jobs&gt;
+                &lt;properties&gt;
+                   &lt;property key="$key" value="$value"/&gt;
+                   ...
+                &lt;/properties&gt;
+              &lt;/queue&gt;
+            &lt;/queue&gt;
+          &lt;/queues&gt;
+          </source>
+          <table>
+            <tr>
+              <th>Tag/Attribute</th>
+              <th>Value</th>
+              <th>
+              	<a href="commands_manual.html#RefreshQueues">Refresh-able?</a>
+              </th>
+              <th>Notes</th>
+            </tr>
+
+            <tr>
+              <td><anchor id="queues_tag"/>queues</td>
+              <td>Root element of the configuration file.</td>
+              <td>Not-applicable</td>
+              <td>All the queues are nested inside this root element of the
+              file. There can be only one root queues element in the file.</td>
+            </tr>
+
+            <tr>
+              <td>aclsEnabled</td>
+              <td>Boolean attribute to the
+              <a href="#queues_tag"><em>&lt;queues&gt;</em></a> tag
+              specifying whether ACLs are supported for controlling job
+              submission and administration for <em>all</em> the queues
+              configured.
+              </td>
+              <td>Yes</td>
+              <td>If <em>false</em>, ACLs are ignored for <em>all</em> the
+              configured queues. <br/><br/>
+              If <em>true</em>, the user and group details of the user
+              are checked against the configured ACLs of the corresponding
+              job-queue while submitting and administering jobs. ACLs can be
+              specified for each queue using the queue-specific tags
+              "acl-$acl_name", defined below. ACLs are checked only against
+              the job-queues, i.e. the leaf-level queues; ACLs configured
+              for the rest of the queues in the hierarchy are ignored.
+              </td>
+            </tr>
+
+            <tr>
+              <td><anchor id="queue_tag"/>queue</td>
+              <td>A child element of the
+              <a href="#queues_tag"><em>&lt;queues&gt;</em></a> tag or another
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a>. Denotes a queue
+              in the system.
+              </td>
+              <td>Not applicable</td>
+              <td>Queues can be hierarchical and so this element can contain
+              children of this same type.</td>
+            </tr>
+
+            <tr>
+              <td>name</td>
+              <td>Child element of a 
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              name of the queue.</td>
+              <td>No</td>
+              <td>Name of the queue cannot contain the character <em>":"</em>
+              which is reserved as the queue-name delimiter when addressing a
+              queue in a hierarchy.</td>
+            </tr>
+
+            <tr>
+              <td>state</td>
+              <td>Child element of a
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              state of the queue.
+              </td>
+              <td>Yes</td>
+              <td>Each queue has a corresponding state. A queue in
+              <em>'running'</em> state can accept new jobs, while a queue in
+              <em>'stopped'</em> state will stop accepting any new jobs. State
+              is defined and respected by the framework only for the
+              leaf-level queues and is ignored for all other queues.
+              <br/><br/>
+              The state of the queue can be viewed from the command line using
+              <code>'bin/mapred queue'</code> command and also on the the Web
+              UI.<br/><br/>
+              Administrators can stop and start queues at runtime using the
+              feature of <a href="commands_manual.html#RefreshQueues">reloading
+              queue configuration</a>. If a queue is stopped at runtime, it
+              will complete all the existing running jobs and will stop
+              accepting any new jobs.
+              </td>
+            </tr>
+
+            <tr>
+              <td>acl-submit-job</td>
+              <td>Child element of a
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              list of users and groups that can submit jobs to the specified
+              queue.</td>
+              <td>Yes</td>
+              <td>
+              Applicable only to leaf-queues.<br/><br/>
+              The list of users and groups are both comma separated
+              list of names. The two lists are separated by a blank.
+              Example: <em>user1,user2 group1,group2</em>.
+              If you wish to define only a list of groups, provide
+              a blank at the beginning of the value.
+              <br/><br/>
+              </td>
+            </tr>
+
+            <tr>
+              <td>acl-administer-job</td>
+              <td>Child element of a
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              list of users and groups that can change the priority of a job
+              or kill a job that has been submitted to the specified queue.
+              </td>
+              <td>Yes</td>
+              <td>
+              Applicable only to leaf-queues.<br/><br/>
+              The list of users and groups are both comma separated
+              list of names. The two lists are separated by a blank.
+              Example: <em>user1,user2 group1,group2</em>.
+              If you wish to define only a list of groups, provide
+              a blank at the beginning of the value. Note that an
+              owner of a job can always change the priority or kill
+              his/her own job, irrespective of the ACLs.
+              </td>
+            </tr>
+
+            <tr>
+              <td><anchor id="properties_tag"/>properties</td>
+              <td>Child element of a 
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              scheduler specific properties.</td>
+              <td>Not applicable</td>
+              <td>The scheduler specific properties are the children of this
+              element specified as a group of &lt;property&gt; tags described
+              below. The JobTracker completely ignores these properties. These
+              can be used as per-queue properties needed by the scheduler
+              being configured. Please look at the scheduler specific
+              documentation as to how these properties are used by that
+              particular scheduler.
+              </td>
+            </tr>
+
+            <tr>
+              <td><anchor id="property_tag"/>property</td>
+              <td>Child element of
+              <a href="#properties_tag"><em>&lt;properties&gt;</em></a> for a
+              specific queue.</td>
+              <td>Not applicable</td>
+              <td>A single scheduler specific queue-property. Ignored by
+              the JobTracker and used by the scheduler that is configured.</td>
+            </tr>
+
+            <tr>
+              <td>key</td>
+              <td>Attribute of a
+              <a href="#property_tag"><em>&lt;property&gt;</em></a> for a
+              specific queue.</td>
+              <td>Scheduler-specific</td>
+              <td>The name of a single scheduler specific queue-property.</td>
+            </tr>
+
+            <tr>
+              <td>value</td>
+              <td>Attribute of a
+              <a href="#property_tag"><em>&lt;property&gt;</em></a> for a
+              specific queue.</td>
+              <td>Scheduler-specific</td>
+              <td>The value of a single scheduler specific queue-property.
+              The value can be anything that is left for the proper
+              interpretation by the scheduler that is configured.</td>
+            </tr>
+
+         </table>
+
+          <p>Once the queues are configured properly and the Map/Reduce
+          system is up and running, from the command line one can
+          <a href="commands_manual.html#QueuesList">get the list
+          of queues</a> and
+          <a href="commands_manual.html#QueuesInfo">obtain
+          information specific to each queue</a>. This information is also
+          available from the web UI. On the web UI, queue information can be
+          seen by going to queueinfo.jsp, linked to from the queues table-cell
+          in the cluster-summary table. The queueinfo.jsp prints the hierarchy
+          of queues as well as the specific information for each queue.
+          </p>
+
+          <p> Users can submit jobs only to a
+          leaf-level queue by specifying the fully-qualified queue-name for
+          the property name <em>mapreduce.job.queuename</em> in the job
+          configuration. The character ':' is the queue-name delimiter and so,
+          for e.g., if one wants to submit to a configured job-queue 'Queue-C'
+          which is one of the sub-queues of 'Queue-B' which in-turn is a
+          sub-queue of 'Queue-A', then the job configuration should contain
+          property <em>mapreduce.job.queuename</em> set to the <em>
+          &lt;value&gt;Queue-A:Queue-B:Queue-C&lt;/value&gt;</em></p>
+         </section>
           <section>
             <title>Real-World Cluster Configurations</title>
             
@@ -881,7 +1120,6 @@
             <code>$ bin/hadoop job -history all output-dir</code><br/></p> 
           </section>
         </section>
-      </section>
       
       <p>Once all the necessary configuration is complete, distribute the files
       to the <code>HADOOP_CONF_DIR</code> directory on all the machines, 
@@ -952,7 +1190,7 @@
       and starts the <code>TaskTracker</code> daemon on all the listed slaves.
       </p>
     </section>
-    
+
     <section>
       <title>Hadoop Shutdown</title>
       

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/commands_manual.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/commands_manual.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/commands_manual.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/commands_manual.xml Tue Jan 26 14:02:53 2010
@@ -369,13 +369,13 @@
           <th> COMMAND_OPTION </th><th> Description </th>
         </tr>
         <tr>
-          <td><code>-list</code> </td>
+          <td><anchor id="QueuesList"/><code>-list</code> </td>
           <td>Gets list of Job Queues configured in the system. Along with scheduling information
           associated with the job queues.
           </td>
         </tr>
         <tr>
-          <td><code>-info &lt;job-queue-name&gt; [-showJobs]</code></td>
+          <td><anchor id="QueuesInfo"/><code>-info &lt;job-queue-name&gt; [-showJobs]</code></td>
           <td>
            Displays the job queue information and associated scheduling information of particular
            job queue. If -showJobs options is present a list of jobs submitted to the particular job
@@ -581,16 +581,61 @@
         <p>Runs MR admin client</p>
         <p><code>Usage: hadoop mradmin  [</code>
         <a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a>
-        <code>] [-refreshQueueAcls] </code></p>
+        <code>] [-refreshServiceAcl] [-refreshQueues] [-refreshNodes] [-help [cmd]] </code></p>
         <table>
         <tr>
         <th> COMMAND_OPTION </th><th> Description </th>
         </tr>
         <tr>
-        <td><code>-refreshQueueAcls</code></td>
-        <td> Refresh the queue acls used by Hadoop, to check access during submissions
-        and administration of the job by the user. The properties present in
-        <code>mapred-queue-acls.xml</code> is reloaded by the queue manager.</td>
+        <td><code>-refreshServiceAcl</code></td>
+        <td> Reload the service-level authorization policies. Jobtracker
+         will reload the authorization policy file.</td>
+        </tr>
+        <tr>
+        <td><anchor id="RefreshQueues"/><code>-refreshQueues</code></td>
+        <td><p> Reload the queues' configuration at the JobTracker.
+          Most of the configuration of the queues can be refreshed/reloaded
+          without restarting the Map/Reduce sub-system. Administrators
+          typically own the
+          <a href="cluster_setup.html#mapred-queues.xml">
+          <em>conf/mapred-queues.xml</em></a>
+          file, can edit it while the JobTracker is still running, and can do
+          a reload by running this command.</p>
+          <p>It should be noted that while trying to refresh queues'
+          configuration, one cannot change the hierarchy of queues itself.
+          This means no operation that involves a change in either the
+          hierarchy structure itself or the queues' names will be allowed.
+          Only selected properties of queues can be changed during refresh.
+          For example, new queues cannot be added dynamically, neither can an
+          existing queue be deleted.</p>
+          <p>If during a reload of queue configuration,
+          a syntactic or semantic error in made during the editing of the
+          configuration file, the refresh command fails with an exception that
+          is printed on the standard output of this command, thus informing the
+          requester with any helpful messages of what has gone wrong during
+          the edit/reload. Importantly, the existing queue configuration is
+          untouched and the system is left in a consistent state.
+          </p>
+          <p>As described in the
+          <a href="cluster_setup.html#mapred-queues.xml"><em>
+          conf/mapred-queues.xml</em></a> section, the
+          <a href="cluster_setup.html#properties_tag"><em>
+          &lt;properties&gt;</em></a> tag in the queue configuration file can
+          also be used to specify per-queue properties needed by the scheduler.
+           When the framework's queue configuration is reloaded using this
+          command, this scheduler specific configuration will also be reloaded
+          , provided the scheduler being configured supports this reload.
+          Please see the documentation of the particular scheduler in use.</p>
+          </td>
+        </tr>
+        <tr>
+        <td><code>-refreshNodes</code></td>
+        <td> Refresh the hosts information at the jobtracker.</td>
+        </tr>
+        <tr>
+        <td><code>-help [cmd]</code></td>
+        <td>Displays help for the given command or all commands if none
+                is specified.</td>
         </tr>
         </table>
       </section>

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/fair_scheduler.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/fair_scheduler.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/fair_scheduler.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/fair_scheduler.xml Tue Jan 26 14:02:53 2010
@@ -79,7 +79,7 @@
         jobs tolerate losing tasks; it only makes them take longer to finish.
       </p>
       <p>
-        Finally, the Fair Scheduler can limit the number of concurrent
+        The Fair Scheduler can limit the number of concurrent
         running jobs per user and per pool. This can be useful when a 
         user must submit hundreds of jobs at once, or for ensuring that
         intermediate data does not fill up disk space on a cluster when too many
@@ -89,6 +89,13 @@
         Jobs to run from each user/pool are chosen in order of priority and then
         submit time.
       </p>
+      <p>
+        Finally, the Fair Scheduler can limit the number of concurrent
+        running tasks per pool. This can be useful when jobs have a
+        dependency on an external service like a database or web
+        service that could be overloaded if too many map or reduce
+        tasks are run at once.
+      </p>
     </section>
 
     <section>
@@ -176,7 +183,7 @@
           </td>
           <td>
             Specify which jobconf property is used to determine the pool that a
-            job belongs in. String, default: <em>mapreduce.job.mapreduce.job.user.name</em>
+            job belongs in. String, default: <em>mapreduce.job.user.name</em>
             (i.e. one pool for each user). 
             Another useful value is <em>group.name</em> to create a
             pool per Unix group.
@@ -351,6 +358,8 @@
           <ul>
           <li><em>minMaps</em> and <em>minReduces</em>,
             to set the pool's minimum share of task slots.</li>
+          <li><em>maxMaps</em> and <em>maxReduces</em>, to set the
+            pool's maximum concurrent task slots.</li>
           <li><em>schedulingMode</em>, the pool's internal scheduling mode,
           which can be <em>fair</em> for fair sharing or <em>fifo</em> for
           first-in-first-out.</li>
@@ -398,9 +407,11 @@
   &lt;pool name="sample_pool"&gt;
     &lt;minMaps&gt;5&lt;/minMaps&gt;
     &lt;minReduces&gt;5&lt;/minReduces&gt;
+    &lt;maxMaps&gt;25&lt;/maxMaps&gt;
+    &lt;maxReduces&gt;25&lt;/maxReduces&gt;
     &lt;minSharePreemptionTimeout&gt;300&lt;/minSharePreemptionTimeout&gt;
   &lt;/pool&gt;
-  &lt;mapreduce.job.mapreduce.job.user.name="sample_user"&gt;
+  &lt;mapreduce.job.user.name="sample_user"&gt;
     &lt;maxRunningJobs&gt;6&lt;/maxRunningJobs&gt;
   &lt;/user&gt;
   &lt;userMaxJobsDefault&gt;3&lt;/userMaxJobsDefault&gt;
@@ -412,7 +423,9 @@
         slots and 5 reduce slots. The pool also has a minimum share preemption
         timeout of 300 seconds (5 minutes), meaning that if it does not get its
         guaranteed share within this time, it is allowed to kill tasks from
-        other pools to achieve its share.
+        other pools to achieve its share. The pool has a cap of 25 map and 25
+        reduce slots, which means that once 25 tasks are running, no more will
+        be scheduled even if the pool's fair share is higher.
         The example also limits the number of running jobs 
         per user to 3, except for sample_user, who can run 6 jobs concurrently. 
         Finally, the example sets a fair share preemption timeout of 600 seconds

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml Tue Jan 26 14:02:53 2010
@@ -41,10 +41,10 @@
       </p> 
       <ul>
         <li>
-          <a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">Single Node Setup</a> for first-time users.
+          <a href="ext:single-node-setup">Single Node Setup</a> for first-time users.
         </li>
         <li>
-          <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
+          <a href="cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
         </li>
       </ul>
     </section>
@@ -152,8 +152,8 @@
       occurences of each word in a given input set.</p>
       
       <p>This example works with a 
-      pseudo-distributed (<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html#SingleNodeSetup">Single Node Setup</a>) 
-     or fully-distributed (<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a>) 
+      pseudo-distributed (<a href="ext:single-node-setup">Single Node Setup</a>) 
+     or fully-distributed (<a href="cluster_setup.html">Cluster Setup</a>) 
       Hadoop installation.</p>   
       
       <section>
@@ -947,6 +947,177 @@
             map-outputs before writing them out to the <code>FileSystem</code>.
             </p>
           </section>
+
+          <section>
+            <title>Mark-Reset</title>
+
+            <p>While applications iterate through the values for a given key, it is
+            possible to mark the current position and later reset the iterator to
+            this position and continue the iteration process. The corresponding
+            methods are <code>mark()</code> and <code>reset()</code>. 
+            </p>
+
+            <p><code>mark()</code> and <code>reset()</code> can be called any
+            number of times during the iteration cycle.  The <code>reset()</code>
+            method will reset the iterator to the last record before a call to
+            the previous <code>mark()</code>.
+            </p>
+
+            <p>This functionality is available only with the new context based
+               reduce iterator.
+            </p>
+
+            <p> The following code snippet demonstrates the use of this 
+                functionality.
+            </p>
+           
+            <section>
+            <title>Source Code</title>
+
+            <table>
+            <tr><td>
+            <code>
+              public void reduce(IntWritable key, 
+                Iterable&lt;IntWritable&gt; values,
+                Context context) throws IOException, InterruptedException {
+            </code>
+            </td></tr>
+
+            <tr><td></td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                MarkableIterator&lt;IntWritable&gt; mitr = 
+                  new MarkableIterator&lt;IntWritable&gt;(values.iterator());
+            </code>
+            </td></tr>
+
+            <tr><td></td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                // Mark the position
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                values.mark();
+            </code>
+            </td></tr>
+
+            <tr><td></td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                while (values.hasNext()) {
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                  &nbsp;&nbsp;&nbsp;&nbsp;
+                  i = values.next();
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                  &nbsp;&nbsp;&nbsp;&nbsp;
+                  // Do the necessary processing
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                }
+            </code>
+            </td></tr>
+
+            <tr><td></td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                // Reset
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                values.reset();
+            </code>
+            </td></tr>
+
+            <tr><td></td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                // Iterate all over again. Since mark was called before the first
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                // call to values.next() in this example, we will iterate over all
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                // the values now
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                while (values.hasNext()) {
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                  &nbsp;&nbsp;&nbsp;&nbsp;
+                  i = values.next();
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                  &nbsp;&nbsp;&nbsp;&nbsp;
+                  // Do the necessary processing
+            </code>
+            </td></tr>
+
+            <tr><td>
+            <code>
+                &nbsp;&nbsp;
+                }
+            </code>
+            </td></tr>
+
+            <tr><td></td></tr>
+
+            <tr><td>
+            <code>
+              }
+            </code>
+            </td></tr>
+
+            </table>
+          </section>
+
+          </section>
         </section>
         
         <section>
@@ -1130,7 +1301,7 @@
         <p>Note: <code>mapred.{map|reduce}.child.java.opts</code> are used only 
         for configuring the launched child tasks from task tracker. Configuring 
         the memory options for daemons is documented under
-        <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Configuring+the+Environment+of+the+Hadoop+Daemons">
+        <a href="cluster_setup.html#Configuring+the+Environment+of+the+Hadoop+Daemons">
         Configuring the Environment of the Hadoop Daemons</a> (Cluster Setup).</p>
         
         <p>The memory available to some parts of the framework is also
@@ -1336,9 +1507,11 @@
         <li><code>${mapreduce.cluster.local.dir}/taskTracker/jobcache/$jobid/jars/</code>
         : The jars directory, which has the job jar file and expanded jar.
         The <code>job.jar</code> is the application's jar file that is
-        automatically distributed to each machine. It is expanded in jars
-        directory before the tasks for the job start. The job.jar location
-        is accessible to the application through the api
+        automatically distributed to each machine. Any library jars that are dependencies
+        of the application code may be packaged inside this jar in a <code>lib/</code> directory.
+        This directory is extracted from <code>job.jar</code> and its contents are
+        automatically added to the classpath for each task.
+        The job.jar location is accessible to the application through the api
         <a href="ext:api/org/apache/hadoop/mapred/jobconf/getjar"> 
         JobConf.getJar() </a>. To access the unjarred directory,
         JobConf.getJar().getParent() can be called.</li>
@@ -2239,8 +2412,8 @@
       
       <p>This example needs the HDFS to be up and running, especially for the 
       <code>DistributedCache</code>-related features. Hence it only works with a 
-      pseudo-distributed (<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html#SingleNodeSetup">Single Node Setup</a>) 
-     or fully-distributed (<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Fully-Distributed+Operation">Cluster Setup</a>) 
+      pseudo-distributed (<a href="ext:single-node-setup">Single Node Setup</a>) 
+     or fully-distributed (<a href="cluster_setup.html#Fully-Distributed+Operation">Cluster Setup</a>) 
       Hadoop installation.</p>     
       
       <section>

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/site.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/site.xml Tue Jan 26 14:02:53 2010
@@ -34,6 +34,7 @@
   
    <docs label="Getting Started"> 
 		<overview   				label="Overview" 					href="index.html" />
+		<setup label="Cluster Setup" href="cluster_setup.html"/>
 		<mapred    				label="MapReduce Tutorial" 	href="mapred_tutorial.html" />
 		 <streaming 				label="Hadoop Streaming"  href="streaming.html" />
    </docs>	
@@ -71,11 +72,18 @@
     <jira      href="http://hadoop.apache.org/mapreduce/issue_tracking.html"/>
     <wiki      href="http://wiki.apache.org/hadoop/MapReduce" />
     <faq       href="http://wiki.apache.org/hadoop/MapReduce/FAQ" />
-    
     <common-default href="http://hadoop.apache.org/common/docs/current/common-default.html" />
     <hdfs-default href="http://hadoop.apache.org/hdfs/docs/current/hdfs-default.html" />
     <mapred-default href="http://hadoop.apache.org/mapreduce/docs/current/mapred-default.html" />
-    
+    <mapred-queues href="http://hadoop.apache.org/mapreduce/docs/current/mapred-queues.xml" />
+    <mapred-queues-capacity-scheduler href="http://hadoop.apache.org/mapreduce/docs/current/mapred-queues-capacity-scheduler.xml" />
+    <capacity-scheduler-conf href="http://hadoop.apache.org/mapreduce/docs/current/capacity-scheduler-conf.html" />
+
+    <single-node-setup href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">
+      <PreReqs href="#PreReqs" />
+      <Download href="#Download" />
+    </single-node-setup>
+
     <zlib      href="http://www.zlib.net/" />
     <gzip      href="http://www.gzip.org/" />
     <bzip      href="http://www.bzip.org/" />

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/streaming.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/streaming.xml?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/streaming.xml (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/docs/src/documentation/content/xdocs/streaming.xml Tue Jan 26 14:02:53 2010
@@ -580,8 +580,8 @@
     -D map.output.key.field.separa=. \
     -D mapreduce.partition.keypartitioner.options=-k1,2 \
     -D mapreduce.fieldsel.data.field.separator=. \
-    -D mapreduce.fieldsel.mapreduce.fieldsel.map.output.key.value.fields.spec=6,5,1-3:0- \
-    -D mapreduce.fieldsel.mapreduce.fieldsel.reduce.output.key.value.fields.spec=0-2:5- \
+    -D mapreduce.fieldsel.map.output.key.value.fields.spec=6,5,1-3:0- \
+    -D mapreduce.fieldsel.reduce.output.key.value.fields.spec=0-2:5- \
     -D mapreduce.job.reduces=12 \
     -input myInputDirs \
     -output myOutputDir \
@@ -591,13 +591,13 @@
 </source>
 
 <p>
-The option "-D mapreduce.fieldsel.mapreduce.fieldsel.map.output.key.value.fields.spec=6,5,1-3:0-" specifies key/value selection for the map outputs. 
+The option "-D mapreduce.fieldsel.map.output.key.value.fields.spec=6,5,1-3:0-" specifies key/value selection for the map outputs. 
 Key selection spec and value selection spec are separated by ":". 
 In this case, the map output key will consist of fields 6, 5, 1, 2, and 3. 
 The map output value will consist of all fields (0- means field 0 and all the subsequent fields). 
 </p>
 <p>
-The option "-D mapreduce.fieldsel.mapreduce.fieldsel.reduce.output.key.value.fields.spec=0-2:5-" specifies 
+The option "-D mapreduce.fieldsel.reduce.output.key.value.fields.spec=0-2:5-" specifies 
 key/value selection for the reduce outputs. In this case, the reduce 
 output key will consist of fields 0, 1, 2 (corresponding to the original 
 fields 6, 5, 1). The reduce output value will consist of all fields starting

Propchange: hadoop/mapreduce/branches/MAPREDUCE-233/src/examples/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Jan 26 14:02:53 2010
@@ -1,3 +1,3 @@
 /hadoop/core/branches/branch-0.19/mapred/src/examples:713112
 /hadoop/core/trunk/src/examples:776175-784663
-/hadoop/mapreduce/trunk/src/examples:804974-885774
+/hadoop/mapreduce/trunk/src/examples:804974-903221

Modified: hadoop/mapreduce/branches/MAPREDUCE-233/src/examples/org/apache/hadoop/examples/terasort/TeraChecksum.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/MAPREDUCE-233/src/examples/org/apache/hadoop/examples/terasort/TeraChecksum.java?rev=903227&r1=903226&r2=903227&view=diff
==============================================================================
--- hadoop/mapreduce/branches/MAPREDUCE-233/src/examples/org/apache/hadoop/examples/terasort/TeraChecksum.java (original)
+++ hadoop/mapreduce/branches/MAPREDUCE-233/src/examples/org/apache/hadoop/examples/terasort/TeraChecksum.java Tue Jan 26 14:02:53 2010
@@ -18,40 +18,31 @@
 package org.apache.hadoop.examples.terasort;
 
 import java.io.IOException;
-import java.util.Iterator;
 import java.util.zip.Checksum;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.Cluster;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 import org.apache.hadoop.util.PureJavaCrc32;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 
 public class TeraChecksum extends Configured implements Tool {
-  static class ChecksumMapper extends MapReduceBase 
-         implements Mapper<Text,Text,NullWritable,Unsigned16> {
-    private OutputCollector<NullWritable,Unsigned16> output;
+  static class ChecksumMapper 
+      extends Mapper<Text, Text, NullWritable, Unsigned16> {
     private Unsigned16 checksum = new Unsigned16();
     private Unsigned16 sum = new Unsigned16();
     private Checksum crc32 = new PureJavaCrc32();
 
     public void map(Text key, Text value, 
-                    OutputCollector<NullWritable,Unsigned16> output,
-                    Reporter reporter) throws IOException {
-      if (this.output == null) {
-        this.output = output;
-      }
+                    Context context) throws IOException {
       crc32.reset();
       crc32.update(key.getBytes(), 0, key.getLength());
       crc32.update(value.getBytes(), 0, value.getLength());
@@ -59,23 +50,22 @@
       sum.add(checksum);
     }
 
-    public void close() throws IOException {
-      if (output != null) {
-        output.collect(NullWritable.get(), sum);
-      }
+    public void cleanup(Context context) 
+        throws IOException, InterruptedException {
+      context.write(NullWritable.get(), sum);
     }
   }
 
-  static class ChecksumReducer extends MapReduceBase 
-         implements Reducer<NullWritable,Unsigned16,NullWritable,Unsigned16> {
-    public void reduce(NullWritable key, Iterator<Unsigned16> values,
-                       OutputCollector<NullWritable, Unsigned16> output, 
-                       Reporter reporter) throws IOException {
+  static class ChecksumReducer 
+      extends Reducer<NullWritable, Unsigned16, NullWritable, Unsigned16> {
+
+    public void reduce(NullWritable key, Iterable<Unsigned16> values,
+        Context context) throws IOException, InterruptedException  {
       Unsigned16 sum = new Unsigned16();
-      while (values.hasNext()) {
-        sum.add(values.next());
+      for (Unsigned16 val : values) {
+        sum.add(val);
       }
-      output.collect(key, sum);
+      context.write(key, sum);
     }
   }
 
@@ -84,10 +74,10 @@
   }
 
   public int run(String[] args) throws Exception {
-    JobConf job = (JobConf) getConf();
+    Job job = Job.getInstance(new Cluster(getConf()), getConf());
     if (args.length != 2) {
       usage();
-      return 1;
+      return 2;
     }
     TeraInputFormat.setInputPaths(job, new Path(args[0]));
     FileOutputFormat.setOutputPath(job, new Path(args[1]));
@@ -99,16 +89,15 @@
     job.setOutputValueClass(Unsigned16.class);
     // force a single reducer
     job.setNumReduceTasks(1);
-    job.setInputFormat(TeraInputFormat.class);
-    JobClient.runJob(job);
-    return 0;
+    job.setInputFormatClass(TeraInputFormat.class);
+    return job.waitForCompletion(true) ? 0 : 1;
   }
 
   /**
    * @param args
    */
   public static void main(String[] args) throws Exception {
-    int res = ToolRunner.run(new JobConf(), new TeraChecksum(), args);
+    int res = ToolRunner.run(new Configuration(), new TeraChecksum(), args);
     System.exit(res);
   }
 



Mime
View raw message