hadoop-common-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From cutt...@apache.org
Subject svn commit: r602270 - in /lucene/hadoop/trunk: CHANGES.txt build.xml docs/hadoop-default.html
Date Fri, 07 Dec 2007 23:29:42 GMT
Author: cutting
Date: Fri Dec  7 15:29:41 2007
New Revision: 602270

URL: http://svn.apache.org/viewvc?rev=602270&view=rev
Log:
HADOOP-2382.  Add hadoop-default.html to subversion.

Added:
    lucene/hadoop/trunk/docs/hadoop-default.html
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/build.xml

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=602270&r1=602269&r2=602270&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Fri Dec  7 15:29:41 2007
@@ -243,9 +243,11 @@
   IMPROVEMENTS
 
     HADOOP-2160.  Remove project-level, non-user documentation from
-    releases, since it's now maintained in a separate tree.
+    releases, since it's now maintained in a separate tree.  (cutting)
 
-    HADOOP-1327.  Add user documentation for streaming.
+    HADOOP-1327.  Add user documentation for streaming.  (cutting)
+
+    HADOOP-2382.  Add hadoop-default.html to subversion. (cutting)
 
 
 Release 0.15.1 - 2007-11-27

Modified: lucene/hadoop/trunk/build.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/build.xml?rev=602270&r1=602269&r2=602270&view=diff
==============================================================================
--- lucene/hadoop/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Fri Dec  7 15:29:41 2007
@@ -22,7 +22,7 @@
   <property name="conf.dir" value="${basedir}/conf"/>
   <property name="docs.dir" value="${basedir}/docs"/>
   <property name="contrib.dir" value="${basedir}/src/contrib"/>
-  <property name="docs.src" value="${basedir}/src/web"/>
+  <property name="docs.src" value="${basedir}/src/docs"/>
   <property name="c++.src" value="${basedir}/src/c++"/>
   <property name="c++.utils.src" value="${c++.src}/utils"/>
   <property name="c++.pipes.src" value="${c++.src}/pipes"/>
@@ -608,14 +608,16 @@
   <!-- Documentation                                                      -->
   <!-- ================================================================== -->
   
-  <target name="docs">
-    <exec dir="src/docs" executable="forrest" failonerror="true" />
-    <copy todir="docs/">
-      <fileset dir="src/docs/build/site/" />
+  <target name="docs" description="Generate documentation">
+    <exec dir="${docs.src}" executable="forrest" failonerror="true" />
+    <copy todir="${docs.dir}">
+      <fileset dir="${docs.src}/build/site/" />
     </copy>
+    <style basedir="${conf.dir}" destdir="${docs.dir}"
+           includes="hadoop-default.xml" style="conf/configuration.xsl"/>
   </target>
 
-  <target name="javadoc" depends="default-doc" description="Generate documentation">
+  <target name="javadoc" description="Generate javadoc">
     <mkdir dir="${build.javadoc}"/>
     <javadoc
       overview="${src.dir}/overview.html"
@@ -655,14 +657,6 @@
     </javadoc>
   </target>	
 	
-  <target name="default-doc">
-    <style basedir="${conf.dir}" destdir="${build.docs}"
-           includes="hadoop-default.xml" style="conf/configuration.xsl"/>
-    <copy todir="${build.docs}">
-      <fileset dir="${docs.dir}" />
-    </copy>
-  </target>
-
   <!-- ================================================================== -->
   <!-- D I S T R I B U T I O N                                            -->
   <!-- ================================================================== -->
@@ -716,6 +710,7 @@
     </copy>
 
     <copy todir="${dist.dir}/docs">
+      <fileset dir="${docs.dir}" />
       <fileset dir="${build.docs}"/>
     </copy>
 
@@ -773,7 +768,7 @@
   <!-- ================================================================== -->
   <target name="clean" depends="clean-contrib" description="Clean.  Delete the build files,
and their directories">
     <delete dir="${build.dir}"/>
-    <delete dir="src/docs/build"/>
+    <delete dir="${docs.src}/build"/>
   </target>
 
   <!-- ================================================================== -->

Added: lucene/hadoop/trunk/docs/hadoop-default.html
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/docs/hadoop-default.html?rev=602270&view=auto
==============================================================================
--- lucene/hadoop/trunk/docs/hadoop-default.html (added)
+++ lucene/hadoop/trunk/docs/hadoop-default.html Fri Dec  7 15:29:41 2007
@@ -0,0 +1,599 @@
+<html>
+<body>
+<table border="1">
+<tr>
+<td>name</td><td>value</td><td>description</td>
+</tr>
+<tr>
+<td><a name="hadoop.tmp.dir">hadoop.tmp.dir</a></td><td>/tmp/hadoop-${user.name}</td><td>A
base for other temporary directories.</td>
+</tr>
+<tr>
+<td><a name="hadoop.native.lib">hadoop.native.lib</a></td><td>true</td><td>Should
native hadoop libraries, if present, be used.</td>
+</tr>
+<tr>
+<td><a name="hadoop.logfile.size">hadoop.logfile.size</a></td><td>10000000</td><td>The
max size of each log file</td>
+</tr>
+<tr>
+<td><a name="hadoop.logfile.count">hadoop.logfile.count</a></td><td>10</td><td>The
max number of log files</td>
+</tr>
+<tr>
+<td><a name="dfs.namenode.logging.level">dfs.namenode.logging.level</a></td><td>info</td><td>The
logging level for dfs namenode. Other values are "dir"(trac
+e namespace mutations), "block"(trace block under/over replications and block
+creations/deletions), or "all".</td>
+</tr>
+<tr>
+<td><a name="io.sort.factor">io.sort.factor</a></td><td>10</td><td>The
number of streams to merge at once while sorting
+  files.  This determines the number of open file handles.</td>
+</tr>
+<tr>
+<td><a name="io.sort.mb">io.sort.mb</a></td><td>100</td><td>The
total amount of buffer memory to use while sorting 
+  files, in megabytes.  By default, gives each merge stream 1MB, which
+  should minimize seeks.</td>
+</tr>
+<tr>
+<td><a name="io.file.buffer.size">io.file.buffer.size</a></td><td>4096</td><td>The
size of buffer for use in sequence files.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</td>
+</tr>
+<tr>
+<td><a name="io.bytes.per.checksum">io.bytes.per.checksum</a></td><td>512</td><td>The
number of bytes per checksum.  Must not be larger than
+  io.file.buffer.size.</td>
+</tr>
+<tr>
+<td><a name="io.skip.checksum.errors">io.skip.checksum.errors</a></td><td>false</td><td>If
true, when a checksum error is encountered while
+  reading a sequence file, entries are skipped, instead of throwing an
+  exception.</td>
+</tr>
+<tr>
+<td><a name="io.map.index.skip">io.map.index.skip</a></td><td>0</td><td>Number
of index entries to skip between each entry.
+  Zero by default. Setting this to values larger than zero can
+  facilitate opening large map files using less memory.</td>
+</tr>
+<tr>
+<td><a name="io.compression.codecs">io.compression.codecs</a></td><td>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</td><td>A
list of the compression codec classes that can be used 
+               for compression/decompression.</td>
+</tr>
+<tr>
+<td><a name="fs.default.name">fs.default.name</a></td><td>file:///</td><td>The
name of the default file system.  A URI whose
+  scheme and authority determine the FileSystem implementation.  The
+  uri's scheme determines the config property (fs.SCHEME.impl) naming
+  the FileSystem implementation class.  The uri's authority is used to
+  determine the host, port, etc. for a filesystem.</td>
+</tr>
+<tr>
+<td><a name="fs.trash.root">fs.trash.root</a></td><td>${hadoop.tmp.dir}/Trash</td><td>The
trash directory, used by FsShell's 'rm' command.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.trash.interval">fs.trash.interval</a></td><td>0</td><td>Number
of minutes between trash checkpoints.
+  If zero, the trash feature is disabled.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.file.impl">fs.file.impl</a></td><td>org.apache.hadoop.fs.LocalFileSystem</td><td>The
FileSystem for file: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.hdfs.impl">fs.hdfs.impl</a></td><td>org.apache.hadoop.dfs.DistributedFileSystem</td><td>The
FileSystem for hdfs: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.s3.impl">fs.s3.impl</a></td><td>org.apache.hadoop.fs.s3.S3FileSystem</td><td>The
FileSystem for s3: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.kfs.impl">fs.kfs.impl</a></td><td>org.apache.hadoop.fs.kfs.KosmosFileSystem</td><td>The
FileSystem for kfs: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.hftp.impl">fs.hftp.impl</a></td><td>org.apache.hadoop.dfs.HftpFileSystem</td><td></td>
+</tr>
+<tr>
+<td><a name="fs.ramfs.impl">fs.ramfs.impl</a></td><td>org.apache.hadoop.fs.InMemoryFileSystem</td><td>The
FileSystem for ramfs: uris.</td>
+</tr>
+<tr>
+<td><a name="fs.inmemory.size.mb">fs.inmemory.size.mb</a></td><td>75</td><td>The
size of the in-memory filsystem instance in MB</td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.dir">fs.checkpoint.dir</a></td><td>${hadoop.tmp.dir}/dfs/namesecondary</td><td>Determines
where on the local filesystem the DFS secondary
+      name node should store the temporary images and edits to merge.  
+  </td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.period">fs.checkpoint.period</a></td><td>3600</td><td>The
number of seconds between two periodic checkpoints.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.checkpoint.size">fs.checkpoint.size</a></td><td>67108864</td><td>The
size of the current edit log (in bytes) that triggers
+       a periodic checkpoint even if the fs.checkpoint.period hasn't expired.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.secondary.http.bindAddress">dfs.secondary.http.bindAddress</a></td><td>0.0.0.0:50090</td><td>
+    The secondary namenode http server bind address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.bindAddress">dfs.datanode.bindAddress</a></td><td>0.0.0.0:50010</td><td>
+    The address where the datanode will listen to.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.http.bindAddress">dfs.datanode.http.bindAddress</a></td><td>0.0.0.0:50075</td><td>
+    The datanode http server bind address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.http.bindAddress">dfs.http.bindAddress</a></td><td>0.0.0.0:50070</td><td>
+    The address and the base port where the dfs namenode web ui will listen on.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.dns.interface">dfs.datanode.dns.interface</a></td><td>default</td><td>The
name of the Network Interface from which a data node should 
+  report its IP address.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.dns.nameserver">dfs.datanode.dns.nameserver</a></td><td>default</td><td>The
host name or IP address of the name server (DNS)
+  which a DataNode should use to determine the host name used by the
+  NameNode for communication and display purposes.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication.considerLoad">dfs.replication.considerLoad</a></td><td>true</td><td>Decide
if chooseTarget considers the target's load or not
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.default.chunk.view.size">dfs.default.chunk.view.size</a></td><td>32768</td><td>The
number of bytes to view for a file on the browser.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.du.reserved">dfs.datanode.du.reserved</a></td><td>0</td><td>Reserved
space in bytes per volume. Always leave this much space free for non dfs use.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.datanode.du.pct">dfs.datanode.du.pct</a></td><td>0.98f</td><td>When
calculating remaining space, only use this percentage of the real available space
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.name.dir">dfs.name.dir</a></td><td>${hadoop.tmp.dir}/dfs/name</td><td>Determines
where on the local filesystem the DFS name node
+      should store the name table.  If this is a comma-delimited list
+      of directories then the name table is replicated in all of the
+      directories, for redundancy. </td>
+</tr>
+<tr>
+<td><a name="dfs.client.buffer.dir">dfs.client.buffer.dir</a></td><td>${hadoop.tmp.dir}/dfs/tmp</td><td>Determines
where on the local filesystem an DFS client
+  should store its blocks before it sends them to the datanode.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.data.dir">dfs.data.dir</a></td><td>${hadoop.tmp.dir}/dfs/data</td><td>Determines
where on the local filesystem an DFS data node
+  should store its blocks.  If this is a comma-delimited
+  list of directories, then data will be stored in all named
+  directories, typically on different devices.
+  Directories that do not exist are ignored.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication">dfs.replication</a></td><td>3</td><td>Default
block replication. 
+  The actual number of replications can be specified when the file is created.
+  The default is used if replication is not specified in create time.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication.max">dfs.replication.max</a></td><td>512</td><td>Maximal
block replication. 
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.replication.min">dfs.replication.min</a></td><td>1</td><td>Minimal
block replication. 
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.block.size">dfs.block.size</a></td><td>67108864</td><td>The
default block size for new files.</td>
+</tr>
+<tr>
+<td><a name="dfs.df.interval">dfs.df.interval</a></td><td>60000</td><td>Disk
usage statistics refresh interval in msec.</td>
+</tr>
+<tr>
+<td><a name="dfs.client.block.write.retries">dfs.client.block.write.retries</a></td><td>3</td><td>The
number of retries for writing blocks to the data nodes, 
+  before we signal failure to the application.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.blockreport.intervalMsec">dfs.blockreport.intervalMsec</a></td><td>3600000</td><td>Determines
block reporting interval in milliseconds.</td>
+</tr>
+<tr>
+<td><a name="dfs.heartbeat.interval">dfs.heartbeat.interval</a></td><td>3</td><td>Determines
datanode heartbeat interval in seconds.</td>
+</tr>
+<tr>
+<td><a name="dfs.namenode.handler.count">dfs.namenode.handler.count</a></td><td>10</td><td>The
number of server threads for the namenode.</td>
+</tr>
+<tr>
+<td><a name="dfs.safemode.threshold.pct">dfs.safemode.threshold.pct</a></td><td>0.999f</td><td>
+  	Specifies the percentage of blocks that should satisfy 
+  	the minimal replication requirement defined by dfs.replication.min.
+  	Values less than or equal to 0 mean not to start in safe mode.
+  	Values greater than 1 will make safe mode permanent.
+ 	</td>
+</tr>
+<tr>
+<td><a name="dfs.safemode.extension">dfs.safemode.extension</a></td><td>30000</td><td>
+  	Determines extension of safe mode in milliseconds 
+  	after the threshold level is reached.
+ 	</td>
+</tr>
+<tr>
+<td><a name="dfs.network.script">dfs.network.script</a></td><td></td><td>
+        Specifies a script name that print the network location path
+        of the current machine.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.balance.bandwidthPerSec">dfs.balance.bandwidthPerSec</a></td><td>1048576</td><td>
+        Specifies the maximum amount of bandwidth that each datanode
+        can utilize for the balancing purpose in term of
+        the number of bytes per second.
+  </td>
+</tr>
+<tr>
+<td><a name="dfs.hosts">dfs.hosts</a></td><td></td><td>Names
a file that contains a list of hosts that are
+  permitted to connect to the namenode. The full pathname of the file
+  must be specified.  If the value is empty, all hosts are
+  permitted.</td>
+</tr>
+<tr>
+<td><a name="dfs.hosts.exclude">dfs.hosts.exclude</a></td><td></td><td>Names
a file that contains a list of hosts that are
+  not permitted to connect to the namenode.  The full pathname of the
+  file must be specified.  If the value is empty, no hosts are
+  excluded.</td>
+</tr>
+<tr>
+<td><a name="fs.s3.block.size">fs.s3.block.size</a></td><td>67108864</td><td>Block
size to use when writing files to S3.</td>
+</tr>
+<tr>
+<td><a name="fs.s3.buffer.dir">fs.s3.buffer.dir</a></td><td>${hadoop.tmp.dir}/s3</td><td>Determines
where on the local filesystem the S3 filesystem
+  should store its blocks before it sends them to S3
+  or after it retrieves them from S3.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.s3.maxRetries">fs.s3.maxRetries</a></td><td>4</td><td>The
maximum number of retries for reading or writing blocks to S3, 
+  before we signal failure to the application.
+  </td>
+</tr>
+<tr>
+<td><a name="fs.s3.sleepTimeSeconds">fs.s3.sleepTimeSeconds</a></td><td>10</td><td>The
number of seconds to sleep between each S3 retry.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker">mapred.job.tracker</a></td><td>local</td><td>The
host and port that the MapReduce job tracker runs
+  at.  If "local", then jobs are run in-process as a single map
+  and reduce task.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.http.bindAddress">mapred.job.tracker.http.bindAddress</a></td><td>0.0.0.0:50030</td><td>
+    The job tracker http server bind address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.job.tracker.handler.count">mapred.job.tracker.handler.count</a></td><td>10</td><td>
+    The number of server threads for the JobTracker. This should be roughly
+    4% of the number of tasktracker nodes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.tracker.report.bindAddress">mapred.task.tracker.report.bindAddress</a></td><td>127.0.0.1:0</td><td>The
interface that task processes use to communicate
+  with their parent tasktracker process.</td>
+</tr>
+<tr>
+<td><a name="mapred.local.dir">mapred.local.dir</a></td><td>${hadoop.tmp.dir}/mapred/local</td><td>The
local directory where MapReduce stores intermediate
+  data files.  May be a comma-separated list of
+  directories on different devices in order to spread disk i/o.
+  Directories that do not exist are ignored.
+  </td>
+</tr>
+<tr>
+<td><a name="local.cache.size">local.cache.size</a></td><td>10737418240</td><td>The
limit on the size of cache you want to keep, set by default
+  to 10GB. This will act as a soft limit on the cache directory for out of band data.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.system.dir">mapred.system.dir</a></td><td>${hadoop.tmp.dir}/mapred/system</td><td>The
shared directory where MapReduce stores control files.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.temp.dir">mapred.temp.dir</a></td><td>${hadoop.tmp.dir}/mapred/temp</td><td>A
shared directory for temporary files.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.local.dir.minspacestart">mapred.local.dir.minspacestart</a></td><td>0</td><td>If
the space in mapred.local.dir drops under this, 
+  do not ask for more tasks.
+  Value in bytes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.local.dir.minspacekill">mapred.local.dir.minspacekill</a></td><td>0</td><td>If
the space in mapred.local.dir drops under this, 
+  	do not ask more tasks until all the current ones have finished and 
+  	cleaned up. Also, to save the rest of the tasks we have running, 
+  	kill one of them, to clean up some space. Start with the reduce tasks,
+  	then go with the ones that have finished the least.
+  	Value in bytes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.expiry.interval">mapred.tasktracker.expiry.interval</a></td><td>600000</td><td>Expert:
The time-interval, in miliseconds, after which
+  a tasktracker is declared 'lost' if it doesn't send heartbeats.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.tasks">mapred.map.tasks</a></td><td>2</td><td>The
default number of map tasks per job.  Typically set
+  to a prime several times greater than number of available hosts.
+  Ignored when mapred.job.tracker is "local".  
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.tasks">mapred.reduce.tasks</a></td><td>1</td><td>The
default number of reduce tasks per job.  Typically set
+  to a prime close to the number of available hosts.  Ignored when
+  mapred.job.tracker is "local".
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.max.attempts">mapred.map.max.attempts</a></td><td>4</td><td>Expert:
The maximum number of attempts per map task.
+  In other words, framework will try to execute a map task these many number
+  of times before giving up on it.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.max.attempts">mapred.reduce.max.attempts</a></td><td>4</td><td>Expert:
The maximum number of attempts per reduce task.
+  In other words, framework will try to execute a reduce task these many number
+  of times before giving up on it.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.parallel.copies">mapred.reduce.parallel.copies</a></td><td>5</td><td>The
default number of parallel transfers run by reduce
+  during the copy(shuffle) phase.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.reduce.copy.backoff">mapred.reduce.copy.backoff</a></td><td>300</td><td>The
maximum amount of time (in seconds) a reducer spends on 
+  fetching one map output before declaring it as failed.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.timeout">mapred.task.timeout</a></td><td>600000</td><td>The
number of milliseconds before a task will be
+  terminated if it neither reads an input, writes an output, nor
+  updates its status string.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.map.tasks.maximum">mapred.tasktracker.map.tasks.maximum</a></td><td>2</td><td>The
maximum number of map tasks that will be run
+  simultaneously by a task tracker.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.reduce.tasks.maximum">mapred.tasktracker.reduce.tasks.maximum</a></td><td>2</td><td>The
maximum number of reduce tasks that will be run
+  simultaneously by a task tracker.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.jobtracker.completeuserjobs.maximum">mapred.jobtracker.completeuserjobs.maximum</a></td><td>100</td><td>The
maximum number of complete jobs per user to keep around before delegating them to the job
history.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.child.java.opts">mapred.child.java.opts</a></td><td>-Xmx200m</td><td>Java
opts for the task tracker child processes.  Subsumes
+  'mapred.child.heap.size' (If a mapred.child.heap.size value is found
+  in a configuration, its maximum heap size will be used and a warning
+  emitted that heap.size has been deprecated). Also, the following symbol,
+  if present, will be interpolated: @taskid@ is replaced by current TaskID.
+  Any other occurrences of '@' will go unchanged. For
+  example, to enable verbose gc logging to a file named for the taskid in
+  /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of:
+        -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.inmem.merge.threshold">mapred.inmem.merge.threshold</a></td><td>1000</td><td>The
threshold, in terms of the number of files 
+  for the in-memory merge process. When we accumulate threshold number of files
+  we initiate the in-memory merge and spill to disk. A value of 0 or less than
+  0 indicates we want to DON'T have any threshold and instead depend only on
+  the ramfs's memory consumption to trigger the merge.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.speculative.execution">mapred.speculative.execution</a></td><td>true</td><td>If
true, then multiple instances of some map and reduce tasks 
+               may be executed in parallel.</td>
+</tr>
+<tr>
+<td><a name="mapred.min.split.size">mapred.min.split.size</a></td><td>0</td><td>The
minimum size chunk that map input should be split
+  into.  Note that some file formats may have minimum split sizes that
+  take priority over this setting.</td>
+</tr>
+<tr>
+<td><a name="mapred.submit.replication">mapred.submit.replication</a></td><td>10</td><td>The
replication level for submitted job files.  This
+  should be around the square root of the number of nodes.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.dns.interface">mapred.tasktracker.dns.interface</a></td><td>default</td><td>The
name of the Network Interface from which a task
+  tracker should report its IP address.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.tasktracker.dns.nameserver">mapred.tasktracker.dns.nameserver</a></td><td>default</td><td>The
host name or IP address of the name server (DNS)
+  which a TaskTracker should use to determine the host name used by
+  the JobTracker for communication and display purposes.
+  </td>
+</tr>
+<tr>
+<td><a name="tasktracker.http.threads">tasktracker.http.threads</a></td><td>40</td><td>The
number of worker threads that for the http server. This is
+               used for map output fetching
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.task.tracker.http.bindAddress">mapred.task.tracker.http.bindAddress</a></td><td>0.0.0.0:50060</td><td>
+    The task tracker http server bind address and port.
+    If the port is 0 then the server will start on a free port.
+  </td>
+</tr>
+<tr>
+<td><a name="keep.failed.task.files">keep.failed.task.files</a></td><td>false</td><td>Should
the files for failed tasks be kept. This should only be 
+               used on jobs that are failing, because the storage is never
+               reclaimed. It also prevents the map outputs from being erased
+               from the reduce directory as they are consumed.</td>
+</tr>
+<tr>
+<td><a name="mapred.output.compress">mapred.output.compress</a></td><td>false</td><td>Should
the job outputs be compressed?
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.output.compression.type">mapred.output.compression.type</a></td><td>RECORD</td><td>If
the job outputs are to compressed as SequenceFiles, how should
+               they be compressed? Should be one of NONE, RECORD or BLOCK.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.output.compression.codec">mapred.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If
the job outputs are compressed, how should they be compressed?
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.compress.map.output">mapred.compress.map.output</a></td><td>false</td><td>Should
the outputs of the maps be compressed before being
+               sent across the network. Uses SequenceFile compression.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.output.compression.type">mapred.map.output.compression.type</a></td><td>RECORD</td><td>If
the map outputs are to compressed, how should they
+               be compressed? Should be one of NONE, RECORD or BLOCK.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.map.output.compression.codec">mapred.map.output.compression.codec</a></td><td>org.apache.hadoop.io.compress.DefaultCodec</td><td>If
the map outputs are compressed, how should they be 
+               compressed?
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.compress.blocksize">io.seqfile.compress.blocksize</a></td><td>1000000</td><td>The
minimum block size for compression in block compressed 
+  				SequenceFiles.
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.lazydecompress">io.seqfile.lazydecompress</a></td><td>true</td><td>Should
values of block-compressed SequenceFiles be decompressed
+  				only when necessary.
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.sorter.recordlimit">io.seqfile.sorter.recordlimit</a></td><td>1000000</td><td>The
limit on number of records to be kept in memory in a spill 
+  				in SequenceFiles.Sorter
+  </td>
+</tr>
+<tr>
+<td><a name="io.seqfile.compression.type">io.seqfile.compression.type</a></td><td>RECORD</td><td>The
default compression type for SequenceFile.Writer.
+  </td>
+</tr>
+<tr>
+<td><a name="map.sort.class">map.sort.class</a></td><td>org.apache.hadoop.mapred.MergeSorter</td><td>The
default sort class for sorting keys.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.userlog.limit.kb">mapred.userlog.limit.kb</a></td><td>0</td><td>The
maximum size of user-logs of each task in KB. 0 disables the cap.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.userlog.retain.hours">mapred.userlog.retain.hours</a></td><td>24</td><td>The
maximum time, in hours, for which the user-logs are to be 
+  				retained.
+  </td>
+</tr>
+<tr>
+<td><a name="mapred.hosts">mapred.hosts</a></td><td></td><td>Names
a file that contains the list of nodes that may
+  connect to the jobtracker.  If the value is empty, all hosts are
+  permitted.</td>
+</tr>
+<tr>
+<td><a name="mapred.hosts.exclude">mapred.hosts.exclude</a></td><td></td><td>Names
a file that contains the list of hosts that
+  should be excluded by the jobtracker.  If the value is empty, no
+  hosts are excluded.</td>
+</tr>
+<tr>
+<td><a name="mapred.max.tracker.failures">mapred.max.tracker.failures</a></td><td>4</td><td>The
number of task-failures on a tasktracker of a given job 
+               after which new tasks of that job aren't assigned to it.
+  </td>
+</tr>
+<tr>
+<td><a name="jobclient.output.filter">jobclient.output.filter</a></td><td>FAILED</td><td>The
filter for controlling the output of the task's userlogs sent
+               to the console of the JobClient. 
+               The permissible options are: NONE, KILLED, FAILED, SUCCEEDED and 
+               ALL.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.timeout">ipc.client.timeout</a></td><td>60000</td><td>Defines
the timeout for IPC calls in milliseconds.</td>
+</tr>
+<tr>
+<td><a name="ipc.client.idlethreshold">ipc.client.idlethreshold</a></td><td>4000</td><td>Defines
the threshold number of connections after which
+               connections will be inspected for idleness.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.maxidletime">ipc.client.maxidletime</a></td><td>120000</td><td>Defines
the maximum idle time for a connected client after 
+               which it may be disconnected.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.kill.max">ipc.client.kill.max</a></td><td>10</td><td>Defines
the maximum number of clients to disconnect in one go.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.connection.maxidletime">ipc.client.connection.maxidletime</a></td><td>1000</td><td>The
maximum time after which a client will bring down the
+               connection to the server.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.client.connect.max.retries">ipc.client.connect.max.retries</a></td><td>10</td><td>Indicates
the number of retries a client will make to establish
+               a server connection.
+  </td>
+</tr>
+<tr>
+<td><a name="ipc.server.listen.queue.size">ipc.server.listen.queue.size</a></td><td>128</td><td>Indicates
the length of the listen queue for servers accepting
+               client connections.
+  </td>
+</tr>
+<tr>
+<td><a name="job.end.retry.attempts">job.end.retry.attempts</a></td><td>0</td><td>Indicates
how many times hadoop should attempt to contact the
+               notification URL </td>
+</tr>
+<tr>
+<td><a name="job.end.retry.interval">job.end.retry.interval</a></td><td>30000</td><td>Indicates
time in milliseconds between notification URL retry
+                calls</td>
+</tr>
+<tr>
+<td><a name="webinterface.private.actions">webinterface.private.actions</a></td><td>false</td><td>
If set to true, the web interfaces of JT and NN may contain 
+                actions, such as kill job, delete file, etc., that should 
+                not be exposed to public. Enable this option if the interfaces 
+                are only reachable by those who have the right authorization.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.rpc.socket.factory.class.default">hadoop.rpc.socket.factory.class.default</a></td><td>org.apache.hadoop.net.StandardSocketFactory</td><td>
Default SocketFactory to use. This parameter is expected to be
+    formatted as "package.FactoryClassName".
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.rpc.socket.factory.class.ClientProtocol">hadoop.rpc.socket.factory.class.ClientProtocol</a></td><td></td><td>
SocketFactory to use to connect to a DFS. If null or empty, use
+    hadoop.rpc.socket.class.default. This socket factory is also used by
+    DFSClient to create sockets to DataNodes.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.rpc.socket.factory.class.JobSubmissionProtocol">hadoop.rpc.socket.factory.class.JobSubmissionProtocol</a></td><td></td><td>
SocketFactory to use to connect to a Map/Reduce master
+    (JobTracker). If null or empty, then use hadoop.rpc.socket.class.default.
+  </td>
+</tr>
+<tr>
+<td><a name="hadoop.socks.server">hadoop.socks.server</a></td><td></td><td>
Address (host:port) of the SOCKS server to be used by the
+    SocksSocketFactory.
+  </td>
+</tr>
+</table>
+</body>
+</html>



Mime
View raw message