hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From yhema...@apache.org
Subject svn commit: r893469 - in /hadoop/mapreduce/trunk: ./ conf/ src/contrib/ src/contrib/capacity-scheduler/ src/contrib/capacity-scheduler/src/java/ src/docs/src/documentation/content/xdocs/ src/java/org/apache/hadoop/mapred/tools/
Date Wed, 23 Dec 2009 10:53:57 GMT
Author: yhemanth
Date: Wed Dec 23 10:53:56 2009
New Revision: 893469

URL: http://svn.apache.org/viewvc?rev=893469&view=rev
Log:
MAPREDUCE-1009. Update forrest documentation describing hierarchical queues. Contributed by Vinod Kumar Vavilapalli.

Added:
    hadoop/mapreduce/trunk/build-utils.xml
    hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template
Modified:
    hadoop/mapreduce/trunk/.gitignore
    hadoop/mapreduce/trunk/CHANGES.txt
    hadoop/mapreduce/trunk/build.xml
    hadoop/mapreduce/trunk/conf/capacity-scheduler.xml.template
    hadoop/mapreduce/trunk/conf/mapred-queues.xml.template
    hadoop/mapreduce/trunk/src/contrib/build-contrib.xml
    hadoop/mapreduce/trunk/src/contrib/build.xml
    hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/build.xml
    hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/   (props changed)
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/commands_manual.xml
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
    hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/site.xml
    hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java

Modified: hadoop/mapreduce/trunk/.gitignore
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/.gitignore?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/.gitignore (original)
+++ hadoop/mapreduce/trunk/.gitignore Wed Dec 23 10:53:56 2009
@@ -36,6 +36,7 @@
 conf/mapred-queues.xml
 docs/api/
 logs/
+src/contrib/capacity-scheduler/src/java/mapred-queues.xml
 src/contrib/index/conf/index-config.xml
 src/docs/build
 src/docs/cn/build

Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Wed Dec 23 10:53:56 2009
@@ -1073,3 +1073,7 @@
 
     MAPREDUCE-1059. Use distcp.bytes.per.map when adding sync markers in
     distcp. (Aaron Kimball via cdouglas)
+
+    MAPREDUCE-1009. Update forrest documentation describing hierarchical
+    queues. (Vinod Kumar Vavilapalli via yhemanth)
+

Added: hadoop/mapreduce/trunk/build-utils.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/build-utils.xml?rev=893469&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/build-utils.xml (added)
+++ hadoop/mapreduce/trunk/build-utils.xml Wed Dec 23 10:53:56 2009
@@ -0,0 +1,33 @@
+<?xml version="1.0"?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<!--
+Contains utilities that are common for the main and contrib builds.
+-->
+<project name="build-utils">
+
+  <target name="java5.check" unless="java5.home">
+    <fail message="'java5.home' is not defined.  Forrest requires Java 5.  Please pass -Djava5.home=&lt;base of Java 5 distribution&gt; to Ant on the command-line." />
+  </target>
+	
+  <target name="forrest.check" unless="forrest.home" depends="java5.check">
+    <fail message="'forrest.home' is not defined. Please pass -Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the command-line." />
+  </target>
+  
+</project>
\ No newline at end of file

Modified: hadoop/mapreduce/trunk/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/build.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/build.xml (original)
+++ hadoop/mapreduce/trunk/build.xml Wed Dec 23 10:53:56 2009
@@ -21,6 +21,8 @@
    xmlns:artifact="urn:maven-artifact-ant"
    xmlns:ivy="antlib:org.apache.ivy.ant"> 
 
+  <import file="build-utils.xml" />
+
   <!-- Load all the default properties, and any the user wants    -->
   <!-- to contribute (without having to type -D or edit this file -->
   <property file="${user.home}/build.properties" />
@@ -787,6 +789,8 @@
   <!-- ================================================================== -->
   
   <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
+  	<copy file="${conf.dir}/mapred-queues.xml.template"
+  	      tofile="${build.docs}/mapred-queues.xml"/>
     <exec dir="${docs.src}" executable="${forrest.home}/bin/forrest"
 	  failonerror="true">
       <env key="JAVA_HOME" value="${java5.home}"/>
@@ -798,16 +802,12 @@
     <style basedir="${mapred.src.dir}" destdir="${build.docs}"
            includes="mapred-default.xml" style="conf/configuration.xsl"/>
     <antcall target="changes-to-html"/>
+    <subant target="docs">
+       <property name="build.docs" value="${build.docs}"/>
+       <fileset file="${contrib.dir}/build.xml"/>
+    </subant> 
   </target>
 
-  <target name="forrest.check" unless="forrest.home" depends="java5.check">
-    <fail message="'forrest.home' is not defined. Please pass -Dforrest.home=&lt;base of Apache Forrest installation&gt; to Ant on the command-line." />
-  </target>
-
-  <target name="java5.check" unless="java5.home">
-    <fail message="'java5.home' is not defined.  Forrest requires Java 5.  Please pass -Djava5.home=&lt;base of Java 5 distribution&gt; to Ant on the command-line." />
-  </target>
-	
   <target name="javadoc-dev" depends="compile, ivy-retrieve-javadoc" description="Generate javadoc for hadoop developers">
     <mkdir dir="${build.javadoc.dev}"/>
     <javadoc

Modified: hadoop/mapreduce/trunk/conf/capacity-scheduler.xml.template
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/conf/capacity-scheduler.xml.template?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/conf/capacity-scheduler.xml.template (original)
+++ hadoop/mapreduce/trunk/conf/capacity-scheduler.xml.template Wed Dec 23 10:53:56 2009
@@ -1,77 +1,38 @@
 <?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- This is one of the configuration files for capacity-scheduler
+     (org.apache.hadoop.mapred.CapacityTaskScheduler), a TaskScheduler
+     for Map/Reduce system. The other configuration file is
+     conf/mapred-queues.xml which it shares with the framework for
+     configuring queues in the system. -->
+
+<!-- This file can be used to configure (1) job-initialization-poller
+     related properties and (2) the default values for various properties
+     for all the queues.-->
+
+<configuration>   
+  <!-- The default configuration settings for the capacity task scheduler --> 
+  <!-- The default values would be applied to all the queues which don't have -->    
+  <!-- the appropriate property for the particular queue configured in the -->       
+  <!-- queue-configuration file conf/mapred-queues.xml -->                           
 
-<!-- This is the configuration file for the resource manager in Hadoop. -->
-<!-- You can configure various scheduling parameters related to queues. -->
-<!-- The properties for a queue follow a naming convention,such as, -->
-<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->
-
-<configuration>
-
-  <property>
-    <name>mapred.capacity-scheduler.queue.default.capacity</name>
-    <value>100</value>
-    <description>Percentage of the number of slots in the cluster that are
-      to be available for jobs in this queue.
-    </description>    
-  </property>
-
-  <property>
-    <name>mapred.capacity-scheduler.queue.default.subQueues</name>
-    <value></value>
-    <description>Sub-queues are queues configured within queues. 
-       They provide a mechanism for administrators to link logically related queues
-       Sub-queues can be nested. So there can be queues within a sub-queue.
-    </description>    
-  </property>
-
-  <property>
-    <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
-    <value>-1</value>
-    <description>
-	maximum-capacity-stretch defines a limit beyond which a sub-queue cannot use the capacity of its parent queue.
-	This provides a means to limit how much excess capacity a sub-queue can use. By default, there is no limit.
-	The maximum-capacity-stretch of a queue can only be greater than or equal to its minimum capacity.
-        Default value of 100 implies , sub-queue can use complete capacity of its parent.
-        This property could be to curtail certain jobs which are long running in nature from occupying more than a 
-        certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of 
-        other queues being affected.
-    </description>    
-  </property>
-  
-  <property>
-    <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
-    <value>false</value>
-    <description>If true, priorities of jobs will be taken into 
-      account in scheduling decisions.
-    </description>
-  </property>
-
-  <property>
-    <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
-    <value>100</value>
-    <description> Each queue enforces a limit on the percentage of resources 
-    allocated to a user at any given time, if there is competition for them. 
-    This user limit can vary between a minimum and maximum value. The former
-    depends on the number of users who have submitted jobs, and the latter is
-    set to this property value. For example, suppose the value of this 
-    property is 25. If two users have submitted jobs to a queue, no single 
-    user can use more than 50% of the queue resources. If a third user submits
-    a job, no single user can use more than 33% of the queue resources. With 4 
-    or more users, no user can use more than 25% of the queue's resources. A 
-    value of 100 implies no user limits are imposed. 
-    </description>
-  </property>
-  <property>
-    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
-    <value>2</value>
-    <description>The maximum number of jobs to be pre-initialized for a user
-    of the job queue.
-    </description>
-  </property>
-
-  <!-- The default configuration settings for the capacity task scheduler -->
-  <!-- The default values would be applied to all the queues which don't have -->
-  <!-- the appropriate property for the particular queue -->
   <property>
     <name>mapred.capacity-scheduler.default-supports-priority</name>
     <value>false</value>

Modified: hadoop/mapreduce/trunk/conf/mapred-queues.xml.template
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/conf/mapred-queues.xml.template?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/conf/mapred-queues.xml.template (original)
+++ hadoop/mapreduce/trunk/conf/mapred-queues.xml.template Wed Dec 23 10:53:56 2009
@@ -1,4 +1,20 @@
 <?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
 <!-- This is the template for queue configuration. The format supports nesting of
      queues within queues - a feature called hierarchical queues. All queues are
      defined within the 'queues' tag which is the top level element for this

Modified: hadoop/mapreduce/trunk/src/contrib/build-contrib.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/build-contrib.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/build-contrib.xml (original)
+++ hadoop/mapreduce/trunk/src/contrib/build-contrib.xml Wed Dec 23 10:53:56 2009
@@ -21,6 +21,8 @@
 
 <project name="hadoopbuildcontrib" xmlns:ivy="antlib:org.apache.ivy.ant">
 
+  <import file="../../build-utils.xml" />
+
   <property name="name" value="${ant.project.name}"/>
   <property name="root" value="${basedir}"/>
 
@@ -259,6 +261,10 @@
     <antcall target="checkfailure"/>
   </target>
 
+  <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
+	<!-- Nothing by default -->
+  </target>
+
   <target name="checkfailure" if="tests.failed">
     <touch file="${build.contrib.dir}/testsfailed"/>
     <fail unless="continueOnFailure">Contrib Tests failed!</fail>

Modified: hadoop/mapreduce/trunk/src/contrib/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/build.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/build.xml (original)
+++ hadoop/mapreduce/trunk/src/contrib/build.xml Wed Dec 23 10:53:56 2009
@@ -64,6 +64,12 @@
     <fail if="testsfailed">Tests failed!</fail>
   </target>
 
+  <target name="docs">
+    <subant target="docs">
+      <fileset dir="." includes="capacity-scheduler/build.xml"/> 
+    </subant>
+  </target>
+
   <!-- ====================================================== -->
   <!-- Clean all the contribs.                              -->
   <!-- ====================================================== -->

Modified: hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/build.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/build.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/build.xml (original)
+++ hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/build.xml Wed Dec 23 10:53:56 2009
@@ -25,4 +25,12 @@
 
   <import file="../build-contrib.xml"/>
 
+  <target name="docs" depends="forrest.check" description="Generate forrest-based documentation. To use, specify -Dforrest.home=&lt;base of Apache Forrest installation&gt; on the command line." if="forrest.home">
+  	<copy file="src/java/mapred-queues.xml.template"
+  	      tofile="${build.docs}/mapred-queues-capacity-scheduler.xml"/>
+    <xslt in="${conf.dir}/capacity-scheduler.xml.template"
+    	out="${build.docs}/capacity-scheduler-conf.html"
+    	style="${conf.dir}/configuration.xsl"/>
+  </target>
+
 </project>

Propchange: hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Wed Dec 23 10:53:56 2009
@@ -0,0 +1 @@
+mapred-queues.xml

Added: hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template?rev=893469&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template (added)
+++ hadoop/mapreduce/trunk/src/contrib/capacity-scheduler/src/java/mapred-queues.xml.template Wed Dec 23 10:53:56 2009
@@ -0,0 +1,164 @@
+<?xml version="1.0"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<!--
+     This is the template for queue configuration when the configured
+     scheduler to use is capacity-scheduler
+     (org.apache.hadoop.mapred.CapacityTaskScheduler). To use this,
+     copy this file into conf directory renaming it to mapred-queues.xml.
+-->
+<queues aclsEnabled="false">
+  <queue>
+
+    <name>default</name>
+
+    <state>running</state>
+
+    <acl-submit-job>*</acl-submit-job>
+    <acl-administer-jobs>*</acl-administer-jobs>
+
+    <properties>
+
+      <property key="capacity" value="100">
+		<!--
+        <description>
+        For a root-level container queue, this is the percentage of the
+      	number of slots in the cluster that will be available for all its
+      	immediate children together. For a root-level leaf-queue, this is
+      	the percentage of the number of slots in the cluster that will be
+      	available for all its jobs.	For a non-root level container queue,
+      	this is the percentage of the number of slots in its parent queue
+      	that will be available for all its	children together. For a
+      	non-root-level leaf queue, this	is the percentage of the number of
+      	slots in its parent queue that will be available for jobs in this
+      	queue. The sum of capacities for all children of a container queue
+      	should be less than or equal 100. The sum of capacities of all the
+      	root-level queues should be less than or equal to 100.
+
+          This property can be refreshed.
+        </description>
+        -->    
+      </property>
+
+      <property key="maximum-capacity" value="-1">
+        <!--
+        <description>
+          A limit in percentage beyond which a non-root-level queue cannot use
+          the capacity of its parent queue; for a root-level queue, this is
+          the limit in percentage beyond which it cannot use the
+          cluster-capacity. This property provides a means to limit how much
+          excess capacity a queue can use.  It can be used to prevent queues
+          with long running jobs from occupying more than a certain percentage
+          of the parent-queue or the cluster, which, in the absence of
+          pre-emption, can lead to capacity guarantees of other queues getting
+          affected.
+
+          The maximum-capacity of a queue can only be greater than or equal to
+          its capacity. By default, there is no limit for a queue. For a
+          non-root-level queue this means it can occupy till the
+          maximum-capacity of its parent, for a root-level queue, it means that
+          it can occupy the whole cluster. A value of 100 implies that a queue
+          can use the complete capacity of its parent, or the complete
+          cluster-capacity in case of root-level-queues.
+
+          This property can be refreshed.
+        </description>
+        -->
+      </property>
+
+      <property key="supports-priority" value="false">
+        <!--
+        <description>This is only applicable to leaf queues. If true,
+          priorities of jobs will be taken into account in scheduling
+          decisions.
+
+          This property CANNOT be refreshed.
+        </description>
+        -->
+      </property>
+
+      <property key="minimum-user-limit-percent" value="100">
+        <!--
+        <description>This is only applicable to leaf queues. Each queue
+        enforces a limit on the percentage of resources allocated to a user at
+        any given time, if there is competition for them. This user limit can
+        vary between a minimum and maximum value. The former depends on the
+        number of users who have submitted jobs, and the latter is set to this
+        property value. For example, suppose the value of this property is 25.
+        If two users have submitted jobs to a queue, no single user can use
+        more than 50% of the queue resources. If a third user submits a job,
+        no single user can use more than 33% of the queue resources. With 4 
+        or more users, no user can use more than 25% of the queue's resources.
+        A value of 100 implies no user limits are imposed.
+
+        This property can be refreshed.
+        </description>
+        -->
+      </property>
+
+      <property key="maximum-initialized-jobs-per-user" value="2">
+      <!--
+        <description>This is only applicable to leaf queues. The maximum number
+        of jobs to be pre-initialized for a user of the job queue.
+
+        This property can be refreshed.
+        </description>
+      -->
+      </property>
+
+    </properties>
+  </queue>
+
+  <!-- Here is a sample of a hierarchical queue configuration
+       where q2 and q3 are children of q1 sharing the capacity
+       of q1. In this example, q2 and q3 are leaf level
+       queues as it has no queues configured within it. Currently, ACLs
+       and state are only supported for the leaf level queues.
+  <queue>
+    <name>q1</name>
+    <properties>
+      <property key="capacity" value="100"/>
+    </properties>
+    <queue>
+      <name>q2</name>
+      <state>stopped</state>
+      <acl-submit-job>*</acl-submit-job>
+      <acl-administer-jobs>*</acl-administer-jobs>
+      <properties>
+        <property key="capacity" value="50"/>
+        <property key="maximum-capacity" value="60"/>
+        <property key="supports-priority" value="false"/>
+        <property key="minimum-user-limit-percent" value="100"/>
+        <property key="maximum-initialized-jobs-per-user" value="2"/>
+      </properties>
+    </queue>
+    <queue>
+      <name>q3</name>
+      <state>stopped</state>
+      <acl-submit-job>*</acl-submit-job>
+      <acl-administer-jobs>*</acl-administer-jobs>
+      <properties>
+        <property key="capacity" value="50"/>
+        <property key="maximum-capacity" value="-1"/>
+        <property key="supports-priority" value="false"/>
+        <property key="minimum-user-limit-percent" value="100"/>
+        <property key="maximum-initialized-jobs-per-user" value="2"/>
+      </properties>
+    </queue>
+  </queue>
+ -->
+</queues>

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/capacity_scheduler.xml Wed Dec 23 10:53:56 2009
@@ -40,7 +40,8 @@
       <p>The Capacity Scheduler supports the following features:</p> 
       <ul>
         <li>
-          Multiple queues, where a job is submitted to a queue.
+          Multiple queues, possibly hierarchical/recursive, where a job is
+          submitted to a queue.
         </li>
         <li>
           Queues are allocated a fraction of the capacity of the grid in the 
@@ -72,11 +73,23 @@
           competition for them.  
         </li>
         <li>
+          Queues can use idle resources of other queues. In order to prevent
+          monopolizing of resources by particular queues, each queue can be
+          set a cap on the maximum number of resources it can expand to in
+          the presence of idle resources in other queues of the cluster.
+        </li>
+        <li>
           Support for memory-intensive jobs, wherein a job can optionally 
           specify higher memory-requirements than the default, and the tasks 
           of the job will only be run on TaskTrackers that have enough memory 
           to spare.
         </li>
+        <li>
+          Support for refreshing/reloading some of the queue-properties
+          without restarting the JobTracker, taking advantage of the
+          <a href="cluster_setup.html#Refreshing+queue+configuration">
+          queue-refresh</a> feature in the framework.
+        </li>
       </ul>
     </section>
     
@@ -144,54 +157,113 @@
       <section>
         <title>Setting Up Queues</title>
         <p>
-          You can define multiple queues to which users can submit jobs with
-          the Capacity Scheduler. To define multiple queues, you should edit
-          the site configuration for Hadoop and modify the
-          <em>mapreduce.jobtracker.taskscheduler.queue.names</em> property.
-        </p>
-        <p>
-          You can also configure ACLs for controlling which users or groups
-          have access to the queues.
-        </p>
-        <p>
-          For more details, see
-          <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Configuring+the+Hadoop+Daemons">Configuring the Hadoop Daemons</a>.
-        </p>
+          You can define multiple, possibly hierarchical queues to which users
+          can submit jobs with the Capacity Scheduler. To define queues,
+          various properties should be set in two configuration files -
+          <a href="cluster_setup.html#mapred-queues.xml">mapred-queues.xml</a>
+          and
+          <a href="ext:capacity-scheduler-conf">conf/capacity-scheduler.xml</a>
+          .</p>
+          <p><em>conf/capacity-scheduler.xml</em> can be used to configure (1)
+          job-initialization-poller related properties and (2) the
+          default values for various properties in the queues</p>
+          <p><em>conf/mapred-queues.xml</em> contains the actual queue
+          configuration including (1) framework specific properties like ACLs
+          for controlling which users or groups have access to the queues and
+          state of the queues and (2) the scheduler specific properties for
+          each queue. If any of these scheduler specific properties are
+          missing and not configured for a queue, then the properties in
+          <em>conf/capacity-scheduler.xml</em> are used to set default values.
+          More details about the properties that can be configured, and their
+          semantics is mentioned below. Also, a default template for 
+          mapred-queues.xml tailored for using with
+          Capacity-scheduler can be found
+          <a href="ext:mapred-queues-capacity-scheduler">here</a>.</p>
       </section>
   
       <section>
         <title>Configuring Properties for Queues</title>
 
         <p>The Capacity Scheduler can be configured with several properties
-        for each queue that control the behavior of the Scheduler. This
-        configuration is in the <em>conf/capacity-scheduler.xml</em>. By
+        for each queue that control the behavior of the Scheduler. As
+        described above, this scheduler specific configuration has to be in
+        the <em>conf/mapred-queues.xml</em> along with the rest of the
+        framework specific configuration. By
         default, the configuration is set up for one queue, named 
         <em>default</em>.</p>
-        <p>To specify a property for a queue that is defined in the site
-        configuration, you should use the property name as
-        <em>mapred.capacity-scheduler.queue.&lt;queue-name&gt;.&lt;property-name&gt;</em>.
-        </p>
-        <p>For example, to define the property <em>capacity</em>
-        for queue named <em>research</em>, you should specify the property
-        name as 
-        <em>mapred.capacity-scheduler.queue.research.capacity</em>.
+        <p>To specify a property for a specific queue that is defined in the
+        mapred-queues.xml, you should set the corresponding property in a
+        &lt;property&gt; tag explained
+        <a href="cluster_setup.html#property_tag">here</a>.
         </p>
 
         <p>The properties defined for queues and their descriptions are
         listed in the table below:</p>
 
         <table>
-          <tr><th>Name</th><th>Description</th></tr>
-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.capacity</td>
-          	<td>Percentage of the number of slots in the cluster that are made 
-            to be available for jobs in this queue. The sum of capacities 
-            for all queues should be less than or equal 100.</td>
+          <tr>
+          	<th>Name</th>
+            <th>
+            	<a href="commands_manual.html#RefreshQueues">
+            	Refresh-able?</a>
+           	</th>
+            <th>Applicable to?</th>
+            <th>Description</th>
+          </tr>
+          <tr>
+          	<td>capacity</td>
+          	<td>Yes</td>
+          	<td>Container queues as well as leaf queues</td>
+          	<td>For a root-level container queue, this is the percentage of the
+          	number of slots in the cluster that will be available for all its
+          	immediate children together. For a root-level leaf-queue, this is
+          	the percentage of the number of slots in the cluster that will be
+          	available for all its jobs.	For a non-root level container queue,
+          	this is the percentage of the number of slots in its parent queue
+          	that will be available for all its	children together. For a
+          	non-root-level leaf queue, this	is the percentage of the number of
+          	slots in its parent queue that will be available for jobs in this
+          	queue. The sum of capacities for all children of a container queue
+          	should be less than or equal 100. The sum of capacities of all the
+          	root-level queues should be less than or equal to 100.
+            </td>
+          </tr>
+          <tr>
+            <td>maximum-capacity</td>
+            <td>Yes</td>
+          	<td>Container queues as well as leaf queues</td>
+            <td>
+	          A limit in percentage beyond which a non-root-level queue cannot use
+	          the capacity of its parent queue; for a root-level queue, this is
+	          the limit in percentage beyond which it cannot use the
+	          cluster-capacity. This property provides a means to limit how much
+	          excess capacity a queue can use.  It can be used to prevent queues
+	          with long running jobs from occupying more than a certain percentage
+	          of the parent-queue or the cluster, which, in the absence of
+	          pre-emption, can lead to capacity guarantees of other queues getting
+	          affected.
+	
+	          The maximum-capacity of a queue can only be greater than or equal to
+	          its capacity. By default, there is no limit for a queue. For a
+	          non-root-level queue this means it can occupy till the
+	          maximum-capacity of its parent, for a root-level queue, it means that
+	          it can occupy the whole cluster. A value of 100 implies that a queue
+	          can use the complete capacity of its parent, or the complete
+	          cluster-capacity in case of root-level-queues.
+            </td>
           </tr>
-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.supports-priority</td>
+          <tr>
+          	<td>supports-priority</td>
+          	<td>No</td>
+          	<td>Leaf queues only</td>
           	<td>If true, priorities of jobs will be taken into account in scheduling 
-          	decisions.</td>
+          	decisions.
+          	</td>
           </tr>
-          <tr><td>mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.minimum-user-limit-percent</td>
+          <tr>
+          	<td>minimum-user-limit-percent</td>
+          	<td>Yes</td>
+          	<td>Leaf queues only</td>
           	<td>Each queue enforces a limit on the percentage of resources 
           	allocated to a user at any given time, if there is competition 
           	for them. This user limit can vary between a minimum and maximum 
@@ -202,9 +274,25 @@
           	of the queue resources. If a third user submits a job, no single 
           	user can use more than 33% of the queue resources. With 4 or more 
           	users, no user can use more than 25% of the queue's resources. A 
-          	value of 100 implies no user limits are imposed.</td>
+          	value of 100 implies no user limits are imposed.
+	        </td>
           </tr>
+          <tr>
+            <td>maximum-initialized-jobs-per-user</td>
+            <td>Yes</td>
+          	<td>Leaf queues only</td>
+            <td>
+              Maximum number of jobs which are allowed to be pre-initialized for
+              a particular user in the queue. Once a job is scheduled, i.e.
+              it starts running, then that job is not considered
+              while scheduler computes the maximum job a user is allowed to
+              initialize.
+            </td>
+          </tr>  
         </table>
+        <p>See  <a href="ext:mapred-queues-capacity-scheduler">
+        this configuration file</a> for a default configuration of queues in
+        capacity-scheduler.</p>
       </section>
       
       <section>
@@ -296,18 +384,6 @@
           <tr><th>Name</th><th>Description</th></tr>
           <tr>
             <td>
-              mapred.capacity-scheduler.queue.&lt;queue-<br/>name&gt;.maximum-initialized-jobs-per-user
-            </td>
-            <td>
-              Maximum number of jobs which are allowed to be pre-initialized for
-              a particular user in the queue. Once a job is scheduled, i.e. 
-              it starts running, then that job is not considered
-              while scheduler computes the maximum job a user is allowed to
-              initialize. 
-            </td>
-          </tr>
-          <tr>
-            <td>
               mapred.capacity-scheduler.init-poll-interval
             </td>
             <td>

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/cluster_setup.xml Wed Dec 23 10:53:56 2009
@@ -33,7 +33,7 @@
       Hadoop clusters ranging from a few nodes to extremely large clusters with 
       thousands of nodes.</p>
       <p>
-      To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="quickstart.html"> Hadoop Quick Start</a>).
+      To play with Hadoop, you may first want to install Hadoop on a single machine (see <a href="ext:single-node-setup"> Hadoop Quick Start</a>).
       </p>
     </section>
     
@@ -42,11 +42,11 @@
       
       <ol>
         <li>
-          Make sure all <a href="quickstart.html#PreReqs">requisite</a> software 
+          Make sure all <a href="ext:single-node-setup/PreReqs">requisite</a> software 
           is installed on all nodes in your cluster.
         </li>
         <li>
-          <a href="quickstart.html#Download">Get</a> the Hadoop software.
+          <a href="ext:single-node-setup/Download">Get</a> the Hadoop software.
         </li>
       </ol>
     </section>
@@ -81,15 +81,17 @@
         <ol>
           <li>
             Read-only default configuration - 
-            <a href="ext:core-default">src/core/core-default.xml</a>, 
-            <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a> and 
-            <a href="ext:mapred-default">src/mapred/mapred-default.xml</a>.
+            <a href="ext:common-default">src/core/core-default.xml</a>, 
+            <a href="ext:hdfs-default">src/hdfs/hdfs-default.xml</a>, 
+            <a href="ext:mapred-default">src/mapred/mapred-default.xml</a> and
+            <a href="ext:mapred-queues">conf/mapred-queues.xml.template</a>.
           </li>
           <li>
             Site-specific configuration - 
-            <em>conf/core-site.xml</em>, 
-            <em>conf/hdfs-site.xml</em> and 
-            <em>conf/mapred-site.xml</em>.
+            <a href="#core-site.xml">conf/core-site.xml</a>, 
+            <a href="#hdfs-site.xml">conf/hdfs-site.xml</a>, 
+            <a href="#mapred-site.xml">conf/mapred-site.xml</a> and
+            <a href="#mapred-queues.xml">conf/mapred-queues.xml</a>.
           </li>
         </ol>
       
@@ -163,9 +165,8 @@
           <title>Configuring the Hadoop Daemons</title>
           
           <p>This section deals with important parameters to be specified in the
-          following:
-          <br/>
-          <code>conf/core-site.xml</code>:</p>
+          following:</p>
+          <anchor id="core-site.xml"/><p><code>conf/core-site.xml</code>:</p>
 
 		  <table>
   		    <tr>
@@ -180,7 +181,7 @@
             </tr>
           </table>
 
-      <p><br/><code>conf/hdfs-site.xml</code>:</p>
+      <anchor id="hdfs-site.xml"/><p><code>conf/hdfs-site.xml</code>:</p>
           
       <table>   
         <tr>
@@ -212,7 +213,7 @@
 		    </tr>
       </table>
 
-      <p><br/><code>conf/mapred-site.xml</code>:</p>
+      <anchor id="mapred-site.xml"/><p><code>conf/mapred-site.xml</code>:</p>
 
       <table>
           <tr>
@@ -271,83 +272,321 @@
 		        TaskTrackers.
 		      </td>
   		    </tr>
-        <tr>
-          <td>mapred.queue.names</td>
-          <td>Comma separated list of queues to which jobs can be submitted.</td>
-          <td>
-            The Map/Reduce system always supports atleast one queue
-            with the name as <em>default</em>. Hence, this parameter's
-            value should always contain the string <em>default</em>.
-            Some job schedulers supported in Hadoop, like the 
-            <a href="capacity_scheduler.html">Capacity 
-            Scheduler</a>, support multiple queues. If such a scheduler is
-            being used, the list of configured queue names must be
-            specified here. Once queues are defined, users can submit
-            jobs to a queue using the property name 
-            <em>mapreduce.job.queuename</em> in the job configuration.
-            There could be a separate 
-            configuration file for configuring properties of these 
-            queues that is managed by the scheduler. 
-            Refer to the documentation of the scheduler for information on 
-            the same.
-          </td>
-        </tr>
-        <tr>
-          <td>mapred.acls.enabled</td>
-          <td>Specifies whether ACLs are supported for controlling job
-              submission and administration</td>
-          <td>
-            If <em>true</em>, ACLs would be checked while submitting
-            and administering jobs. ACLs can be specified using the
-            configuration parameters of the form
-            <em>mapred.queue.queue-name.acl-name</em>, defined below.
-          </td>
-        </tr>
-		  </table>
-      
-      <p><br/><code> conf/mapred-queue-acls.xml</code></p>
-      
-      <table>
-       <tr>
-          <th>Parameter</th>
-          <th>Value</th> 
-          <th>Notes</th>
-       </tr>
-        <tr>
-          <td>mapred.queue.<em>queue-name</em>.acl-submit-job</td>
-          <td>List of users and groups that can submit jobs to the
-              specified <em>queue-name</em>.</td>
-          <td>
-            The list of users and groups are both comma separated
-            list of names. The two lists are separated by a blank.
-            Example: <em>user1,user2 group1,group2</em>.
-            If you wish to define only a list of groups, provide
-            a blank at the beginning of the value.
-          </td>
-        </tr>
-        <tr>
-          <td>mapred.queue.<em>queue-name</em>.acl-administer-job</td>
-          <td>List of users and groups that can change the priority
-              or kill jobs that have been submitted to the
-              specified <em>queue-name</em>.</td>
-          <td>
-            The list of users and groups are both comma separated
-            list of names. The two lists are separated by a blank.
-            Example: <em>user1,user2 group1,group2</em>.
-            If you wish to define only a list of groups, provide
-            a blank at the beginning of the value. Note that an
-            owner of a job can always change the priority or kill
-            his/her own job, irrespective of the ACLs.
-          </td>
-        </tr>
-      </table>
-      
+		  </table>      
 
           <p>Typically all the above parameters are marked as 
           <a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
           final</a> to ensure that they cannot be overriden by user-applications.
           </p>
 
+          <anchor id="mapred-queues.xml"/><p><code>conf/mapred-queues.xml
+          </code>:</p>
+          <p>This file is used to configure the queues in the Map/Reduce
+          system. Queues are abstract entities in the JobTracker that can be
+          used to manage collections of jobs. They provide a way for 
+          administrators to organize jobs in specific ways and to enforce 
+          certain policies on such collections, thus providing varying
+          levels of administrative control and management functions on jobs.
+          </p> 
+          <p>One can imagine the following sample scenarios:</p>
+          <ul>
+            <li> Jobs submitted by a particular group of users can all be 
+            submitted to one queue. </li> 
+            <li> Long running jobs in an organization can be submitted to a
+            queue. </li>
+            <li> Short running jobs can be submitted to a queue and the number
+            of jobs that can run concurrently can be restricted. </li> 
+          </ul> 
+          <p>The usage of queues is closely tied to the scheduler configured
+          at the JobTracker via <em>mapreduce.jobtracker.taskscheduler</em>.
+          The degree of support of queues depends on the scheduler used. Some
+          schedulers support a single queue, while others support more complex
+          configurations. Schedulers also implement the policies that apply 
+          to jobs in a queue. Some schedulers, such as the Fairshare scheduler,
+          implement their own mechanisms for collections of jobs and do not rely
+          on queues provided by the framework. The administrators are 
+          encouraged to refer to the documentation of the scheduler they are
+          interested in for determining the level of support for queues.</p>
+          <p>The Map/Reduce framework supports some basic operations on queues
+          such as job submission to a specific queue, access control for queues,
+          queue states, viewing configured queues and their properties
+          and refresh of queue properties. In order to fully implement some of
+          these operations, the framework takes the help of the configured
+          scheduler.</p>
+          <p>The following types of queue configurations are possible:</p>
+          <ul>
+            <li> Single queue: The default configuration in Map/Reduce comprises
+            of a single queue, as supported by the default scheduler. All jobs
+            are submitted to this default queue which maintains jobs in a priority
+            based FIFO order.</li>
+            <li> Multiple single level queues: Multiple queues are defined, and
+            jobs can be submitted to any of these queues. Different policies
+            can be applied to these queues by schedulers that support this 
+            configuration to provide a better level of support. For example,
+            the <a href="capacity_scheduler.html">capacity scheduler</a>
+            provides ways of configuring different 
+            capacity and fairness guarantees on these queues.</li>
+            <li> Hierarchical queues: Hierarchical queues are a configuration in
+            which queues can contain other queues within them recursively. The
+            queues that contain other queues are referred to as 
+            container queues. Queues that do not contain other queues are 
+            referred as leaf or job queues. Jobs can only be submitted to leaf
+            queues. Hierarchical queues can potentially offer a higher level 
+            of control to administrators, as schedulers can now build a
+            hierarchy of policies where policies applicable to a container
+            queue can provide context for policies applicable to queues it
+            contains. It also opens up possibilities for delegating queue
+            administration where administration of queues in a container queue
+            can be turned over to a different set of administrators, within
+            the context provided by the container queue. For example, the
+            <a href="capacity_scheduler.html">capacity scheduler</a>
+            uses hierarchical queues to partition capacity of a cluster
+            among container queues, and allowing queues they contain to divide
+            that capacity in more ways.</li> 
+          </ul>
+
+          <p>Most of the configuration of the queues can be refreshed/reloaded
+          without restarting the Map/Reduce sub-system by editing this
+          configuration file as described in the section on
+          <a href="commands_manual.html#RefreshQueues">reloading queue 
+          configuration</a>.
+          Not all configuration properties can be reloaded of course,
+          as will description of each property below explain.</p>
+
+          <p>The format of conf/mapred-queues.xml is different from the other 
+          configuration files, supporting nested configuration
+          elements to support hierarchical queues. The format is as follows:
+          </p>
+
+          <source>
+          &lt;queues aclsEnabled="$aclsEnabled"&gt;
+            &lt;queue&gt;
+              &lt;name&gt;$queue-name&lt;/name&gt;
+              &lt;state&gt;$state&lt;/state&gt;
+              &lt;queue&gt;
+                &lt;name&gt;$child-queue1&lt;/name&gt;
+                &lt;properties&gt;
+                   &lt;property key="$key" value="$value"/&gt;
+                   ...
+                &lt;/properties&gt;
+                &lt;queue&gt;
+                  &lt;name&gt;$grand-child-queue1&lt;/name&gt;
+                  ...
+                &lt;/queue&gt;
+              &lt;/queue&gt;
+              &lt;queue&gt;
+                &lt;name&gt;$child-queue2&lt;/name&gt;
+                ...
+              &lt;/queue&gt;
+              ...
+              ...
+              ...
+              &lt;queue&gt;
+                &lt;name&gt;$leaf-queue&lt;/name&gt;
+                &lt;acl-submit-job&gt;$acls&lt;/acl-submit-job&gt;
+                &lt;acl-administer-jobs&gt;$acls&lt;/acl-administer-jobs&gt;
+                &lt;properties&gt;
+                   &lt;property key="$key" value="$value"/&gt;
+                   ...
+                &lt;/properties&gt;
+              &lt;/queue&gt;
+            &lt;/queue&gt;
+          &lt;/queues&gt;
+          </source>
+          <table>
+            <tr>
+              <th>Tag/Attribute</th>
+              <th>Value</th>
+              <th>
+              	<a href="commands_manual.html#RefreshQueues">Refresh-able?</a>
+              </th>
+              <th>Notes</th>
+            </tr>
+
+            <tr>
+              <td><anchor id="queues_tag"/>queues</td>
+              <td>Root element of the configuration file.</td>
+              <td>Not-applicable</td>
+              <td>All the queues are nested inside this root element of the
+              file. There can be only one root queues element in the file.</td>
+            </tr>
+
+            <tr>
+              <td>aclsEnabled</td>
+              <td>Boolean attribute to the
+              <a href="#queues_tag"><em>&lt;queues&gt;</em></a> tag
+              specifying whether ACLs are supported for controlling job
+              submission and administration for <em>all</em> the queues
+              configured.
+              </td>
+              <td>Yes</td>
+              <td>If <em>false</em>, ACLs are ignored for <em>all</em> the
+              configured queues. <br/><br/>
+              If <em>true</em>, the user and group details of the user
+              are checked against the configured ACLs of the corresponding
+              job-queue while submitting and administering jobs. ACLs can be
+              specified for each queue using the queue-specific tags
+              "acl-$acl_name", defined below. ACLs are checked only against
+              the job-queues, i.e. the leaf-level queues; ACLs configured
+              for the rest of the queues in the hierarchy are ignored.
+              </td>
+            </tr>
+
+            <tr>
+              <td><anchor id="queue_tag"/>queue</td>
+              <td>A child element of the
+              <a href="#queues_tag"><em>&lt;queues&gt;</em></a> tag or another
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a>. Denotes a queue
+              in the system.
+              </td>
+              <td>Not applicable</td>
+              <td>Queues can be hierarchical and so this element can contain
+              children of this same type.</td>
+            </tr>
+
+            <tr>
+              <td>name</td>
+              <td>Child element of a 
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              name of the queue.</td>
+              <td>No</td>
+              <td>Name of the queue cannot contain the character <em>":"</em>
+              which is reserved as the queue-name delimiter when addressing a
+              queue in a hierarchy.</td>
+            </tr>
+
+            <tr>
+              <td>state</td>
+              <td>Child element of a
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              state of the queue.
+              </td>
+              <td>Yes</td>
+              <td>Each queue has a corresponding state. A queue in
+              <em>'running'</em> state can accept new jobs, while a queue in
+              <em>'stopped'</em> state will stop accepting any new jobs. State
+              is defined and respected by the framework only for the
+              leaf-level queues and is ignored for all other queues.
+              <br/><br/>
+              The state of the queue can be viewed from the command line using
+              <code>'bin/mapred queue'</code> command and also on the the Web
+              UI.<br/><br/>
+              Administrators can stop and start queues at runtime using the
+              feature of <a href="commands_manual.html#RefreshQueues">reloading
+              queue configuration</a>. If a queue is stopped at runtime, it
+              will complete all the existing running jobs and will stop
+              accepting any new jobs.
+              </td>
+            </tr>
+
+            <tr>
+              <td>acl-submit-job</td>
+              <td>Child element of a
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              list of users and groups that can submit jobs to the specified
+              queue.</td>
+              <td>Yes</td>
+              <td>
+              Applicable only to leaf-queues.<br/><br/>
+              The list of users and groups are both comma separated
+              list of names. The two lists are separated by a blank.
+              Example: <em>user1,user2 group1,group2</em>.
+              If you wish to define only a list of groups, provide
+              a blank at the beginning of the value.
+              <br/><br/>
+              </td>
+            </tr>
+
+            <tr>
+              <td>acl-administer-job</td>
+              <td>Child element of a
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              list of users and groups that can change the priority of a job
+              or kill a job that has been submitted to the specified queue.
+              </td>
+              <td>Yes</td>
+              <td>
+              Applicable only to leaf-queues.<br/><br/>
+              The list of users and groups are both comma separated
+              list of names. The two lists are separated by a blank.
+              Example: <em>user1,user2 group1,group2</em>.
+              If you wish to define only a list of groups, provide
+              a blank at the beginning of the value. Note that an
+              owner of a job can always change the priority or kill
+              his/her own job, irrespective of the ACLs.
+              </td>
+            </tr>
+
+            <tr>
+              <td><anchor id="properties_tag"/>properties</td>
+              <td>Child element of a 
+              <a href="#queue_tag"><em>&lt;queue&gt;</em></a> specifying the
+              scheduler specific properties.</td>
+              <td>Not applicable</td>
+              <td>The scheduler specific properties are the children of this
+              element specified as a group of &lt;property&gt; tags described
+              below. The JobTracker completely ignores these properties. These
+              can be used as per-queue properties needed by the scheduler
+              being configured. Please look at the scheduler specific
+              documentation as to how these properties are used by that
+              particular scheduler.
+              </td>
+            </tr>
+
+            <tr>
+              <td><anchor id="property_tag"/>property</td>
+              <td>Child element of
+              <a href="#properties_tag"><em>&lt;properties&gt;</em></a> for a
+              specific queue.</td>
+              <td>Not applicable</td>
+              <td>A single scheduler specific queue-property. Ignored by
+              the JobTracker and used by the scheduler that is configured.</td>
+            </tr>
+
+            <tr>
+              <td>key</td>
+              <td>Attribute of a
+              <a href="#property_tag"><em>&lt;property&gt;</em></a> for a
+              specific queue.</td>
+              <td>Scheduler-specific</td>
+              <td>The name of a single scheduler specific queue-property.</td>
+            </tr>
+
+            <tr>
+              <td>value</td>
+              <td>Attribute of a
+              <a href="#property_tag"><em>&lt;property&gt;</em></a> for a
+              specific queue.</td>
+              <td>Scheduler-specific</td>
+              <td>The value of a single scheduler specific queue-property.
+              The value can be anything that is left for the proper
+              interpretation by the scheduler that is configured.</td>
+            </tr>
+
+         </table>
+
+          <p>Once the queues are configured properly and the Map/Reduce
+          system is up and running, from the command line one can
+          <a href="commands_manual.html#QueuesList">get the list
+          of queues</a> and
+          <a href="commands_manual.html#QueuesInfo">obtain
+          information specific to each queue</a>. This information is also
+          available from the web UI. On the web UI, queue information can be
+          seen by going to queueinfo.jsp, linked to from the queues table-cell
+          in the cluster-summary table. The queueinfo.jsp prints the hierarchy
+          of queues as well as the specific information for each queue.
+          </p>
+
+          <p> Users can submit jobs only to a
+          leaf-level queue by specifying the fully-qualified queue-name for
+          the property name <em>mapreduce.job.queuename</em> in the job
+          configuration. The character ':' is the queue-name delimiter and so,
+          for e.g., if one wants to submit to a configured job-queue 'Queue-C'
+          which is one of the sub-queues of 'Queue-B' which in-turn is a
+          sub-queue of 'Queue-A', then the job configuration should contain
+          property <em>mapreduce.job.queuename</em> set to the <em>
+          &lt;value&gt;Queue-A:Queue-B:Queue-C&lt;/value&gt;</em></p>
+         </section>
           <section>
             <title>Real-World Cluster Configurations</title>
             
@@ -881,7 +1120,6 @@
             <code>$ bin/hadoop job -history all output-dir</code><br/></p> 
           </section>
         </section>
-      </section>
       
       <p>Once all the necessary configuration is complete, distribute the files
       to the <code>HADOOP_CONF_DIR</code> directory on all the machines, 
@@ -952,7 +1190,7 @@
       and starts the <code>TaskTracker</code> daemon on all the listed slaves.
       </p>
     </section>
-    
+
     <section>
       <title>Hadoop Shutdown</title>
       

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/commands_manual.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/commands_manual.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/commands_manual.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/commands_manual.xml Wed Dec 23 10:53:56 2009
@@ -369,13 +369,13 @@
           <th> COMMAND_OPTION </th><th> Description </th>
         </tr>
         <tr>
-          <td><code>-list</code> </td>
+          <td><anchor id="QueuesList"/><code>-list</code> </td>
           <td>Gets list of Job Queues configured in the system. Along with scheduling information
           associated with the job queues.
           </td>
         </tr>
         <tr>
-          <td><code>-info &lt;job-queue-name&gt; [-showJobs]</code></td>
+          <td><anchor id="QueuesInfo"/><code>-info &lt;job-queue-name&gt; [-showJobs]</code></td>
           <td>
            Displays the job queue information and associated scheduling information of particular
            job queue. If -showJobs options is present a list of jobs submitted to the particular job
@@ -581,16 +581,61 @@
         <p>Runs MR admin client</p>
         <p><code>Usage: hadoop mradmin  [</code>
         <a href="commands_manual.html#Generic+Options">GENERIC_OPTIONS</a>
-        <code>] [-refreshQueueAcls] </code></p>
+        <code>] [-refreshServiceAcl] [-refreshQueues] [-refreshNodes] [-help [cmd]] </code></p>
         <table>
         <tr>
         <th> COMMAND_OPTION </th><th> Description </th>
         </tr>
         <tr>
-        <td><code>-refreshQueueAcls</code></td>
-        <td> Refresh the queue acls used by Hadoop, to check access during submissions
-        and administration of the job by the user. The properties present in
-        <code>mapred-queue-acls.xml</code> is reloaded by the queue manager.</td>
+        <td><code>-refreshServiceAcl</code></td>
+        <td> Reload the service-level authorization policies. Jobtracker
+         will reload the authorization policy file.</td>
+        </tr>
+        <tr>
+        <td><anchor id="RefreshQueues"/><code>-refreshQueues</code></td>
+        <td><p> Reload the queues' configuration at the JobTracker.
+          Most of the configuration of the queues can be refreshed/reloaded
+          without restarting the Map/Reduce sub-system. Administrators
+          typically own the
+          <a href="cluster_setup.html#mapred-queues.xml">
+          <em>conf/mapred-queues.xml</em></a>
+          file, can edit it while the JobTracker is still running, and can do
+          a reload by running this command.</p>
+          <p>It should be noted that while trying to refresh queues'
+          configuration, one cannot change the hierarchy of queues itself.
+          This means no operation that involves a change in either the
+          hierarchy structure itself or the queues' names will be allowed.
+          Only selected properties of queues can be changed during refresh.
+          For example, new queues cannot be added dynamically, neither can an
+          existing queue be deleted.</p>
+          <p>If during a reload of queue configuration,
+          a syntactic or semantic error in made during the editing of the
+          configuration file, the refresh command fails with an exception that
+          is printed on the standard output of this command, thus informing the
+          requester with any helpful messages of what has gone wrong during
+          the edit/reload. Importantly, the existing queue configuration is
+          untouched and the system is left in a consistent state.
+          </p>
+          <p>As described in the
+          <a href="cluster_setup.html#mapred-queues.xml"><em>
+          conf/mapred-queues.xml</em></a> section, the
+          <a href="cluster_setup.html#properties_tag"><em>
+          &lt;properties&gt;</em></a> tag in the queue configuration file can
+          also be used to specify per-queue properties needed by the scheduler.
+           When the framework's queue configuration is reloaded using this
+          command, this scheduler specific configuration will also be reloaded
+          , provided the scheduler being configured supports this reload.
+          Please see the documentation of the particular scheduler in use.</p>
+          </td>
+        </tr>
+        <tr>
+        <td><code>-refreshNodes</code></td>
+        <td> Refresh the hosts information at the jobtracker.</td>
+        </tr>
+        <tr>
+        <td><code>-help [cmd]</code></td>
+        <td>Displays help for the given command or all commands if none
+                is specified.</td>
         </tr>
         </table>
       </section>

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/mapred_tutorial.xml Wed Dec 23 10:53:56 2009
@@ -41,10 +41,10 @@
       </p> 
       <ul>
         <li>
-          <a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">Single Node Setup</a> for first-time users.
+          <a href="ext:single-node-setup">Single Node Setup</a> for first-time users.
         </li>
         <li>
-          <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
+          <a href="cluster_setup.html">Cluster Setup</a> for large, distributed clusters.
         </li>
       </ul>
     </section>
@@ -152,8 +152,8 @@
       occurences of each word in a given input set.</p>
       
       <p>This example works with a 
-      pseudo-distributed (<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html#SingleNodeSetup">Single Node Setup</a>) 
-     or fully-distributed (<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html">Cluster Setup</a>) 
+      pseudo-distributed (<a href="ext:single-node-setup">Single Node Setup</a>) 
+     or fully-distributed (<a href="cluster_setup.html">Cluster Setup</a>) 
       Hadoop installation.</p>   
       
       <section>
@@ -1301,7 +1301,7 @@
         <p>Note: <code>mapred.{map|reduce}.child.java.opts</code> are used only 
         for configuring the launched child tasks from task tracker. Configuring 
         the memory options for daemons is documented under
-        <a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Configuring+the+Environment+of+the+Hadoop+Daemons">
+        <a href="cluster_setup.html#Configuring+the+Environment+of+the+Hadoop+Daemons">
         Configuring the Environment of the Hadoop Daemons</a> (Cluster Setup).</p>
         
         <p>The memory available to some parts of the framework is also
@@ -2412,8 +2412,8 @@
       
       <p>This example needs the HDFS to be up and running, especially for the 
       <code>DistributedCache</code>-related features. Hence it only works with a 
-      pseudo-distributed (<a href="http://hadoop.apache.org/common/docs/current/single_node_setup.html#SingleNodeSetup">Single Node Setup</a>) 
-     or fully-distributed (<a href="http://hadoop.apache.org/common/docs/current/cluster_setup.html#Fully-Distributed+Operation">Cluster Setup</a>) 
+      pseudo-distributed (<a href="ext:single-node-setup">Single Node Setup</a>) 
+     or fully-distributed (<a href="cluster_setup.html#Fully-Distributed+Operation">Cluster Setup</a>) 
       Hadoop installation.</p>     
       
       <section>

Modified: hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/site.xml
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/site.xml?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/site.xml (original)
+++ hadoop/mapreduce/trunk/src/docs/src/documentation/content/xdocs/site.xml Wed Dec 23 10:53:56 2009
@@ -34,6 +34,7 @@
   
    <docs label="Getting Started"> 
 		<overview   				label="Overview" 					href="index.html" />
+		<setup label="Cluster Setup" href="cluster_setup.html"/>
 		<mapred    				label="MapReduce Tutorial" 	href="mapred_tutorial.html" />
 		 <streaming 				label="Hadoop Streaming"  href="streaming.html" />
    </docs>	
@@ -71,11 +72,18 @@
     <jira      href="http://hadoop.apache.org/mapreduce/issue_tracking.html"/>
     <wiki      href="http://wiki.apache.org/hadoop/MapReduce" />
     <faq       href="http://wiki.apache.org/hadoop/MapReduce/FAQ" />
-    
     <common-default href="http://hadoop.apache.org/common/docs/current/common-default.html" />
     <hdfs-default href="http://hadoop.apache.org/hdfs/docs/current/hdfs-default.html" />
     <mapred-default href="http://hadoop.apache.org/mapreduce/docs/current/mapred-default.html" />
-    
+    <mapred-queues href="http://hadoop.apache.org/mapreduce/docs/current/mapred-queues.xml" />
+    <mapred-queues-capacity-scheduler href="http://hadoop.apache.org/mapreduce/docs/current/mapred-queues-capacity-scheduler.xml" />
+    <capacity-scheduler-conf href="http://hadoop.apache.org/mapreduce/docs/current/capacity-scheduler-conf.html" />
+
+    <single-node-setup href="http://hadoop.apache.org/common/docs/current/single_node_setup.html">
+      <PreReqs href="#PreReqs" />
+      <Download href="#Download" />
+    </single-node-setup>
+
     <zlib      href="http://www.zlib.net/" />
     <gzip      href="http://www.gzip.org/" />
     <bzip      href="http://www.bzip.org/" />

Modified: hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java?rev=893469&r1=893468&r2=893469&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java (original)
+++ hadoop/mapreduce/trunk/src/java/org/apache/hadoop/mapred/tools/MRAdmin.java Wed Dec 23 10:53:56 2009
@@ -61,8 +61,9 @@
     "\t\tJobtracker will reload the authorization policy file.\n";
 
   String refreshQueues =
-        "-refreshQueues: Reload the queue acls and state.\n"
-            + "\t\tJobTracker will reload the mapred-queues.xml file.\n";
+        "-refreshQueues: Reload the queues' acls, states and "
+            + "scheduler specific properties.\n"
+            + "\t\tJobTracker will reload the mapred-queues configuration file.\n";
 
   String refreshUserToGroupsMappings = 
     "-refreshUserToGroupsMappings: Refresh user-to-groups mappings\n";



Mime
View raw message