Return-Path:
+ *
* Commit is a two-phased protocol. First the attempt informs the
* ApplicationMaster that it is
* {@link #commitPending(TaskAttemptID, TaskStatus)}. Then it repeatedly polls
@@ -202,7 +202,7 @@ public class TaskAttemptListenerImpl extends CompositeService
* TaskAttempt is reporting that it is in commit_pending and it is waiting for
* the commit Response
*
- *
+ *
* Commit it a two-phased protocol. First the attempt informs the
* ApplicationMaster that it is
* {@link #commitPending(TaskAttemptID, TaskStatus)}. Then it repeatedly polls
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
index 981e6ff..05bb40b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
@@ -44,7 +44,6 @@ import org.mortbay.log.Log;
* proxy if needed
JobTracker
.
@@ -56,7 +56,7 @@ import org.apache.hadoop.util.StringInterner;
* Clients can query for the latest ClusterStatus
, via
* {@link JobClient#getClusterStatus()}.
Note: The following is valid only if the {@link OutputCommitter}
* is {@link FileOutputCommitter}. If OutputCommitter
is not
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
index 30ebd6b..32e07e7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
@@ -47,7 +47,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
- * IFile
is the simple IFile
is the simple <key-len, value-len, key, value> format
* for the intermediate map-outputs in Map-Reduce.
*
* There is a Writer
to write out map-outputs in this format and
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
index 37633ab..0dbbe5a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
@@ -101,7 +101,6 @@ public class JobACLsManager {
* @param jobOperation
* @param jobOwner
* @param jobACL
- * @throws AccessControlException
*/
public boolean checkAccess(UserGroupInformation callerUGI,
JobACL jobOperation, String jobOwner, AccessControlList jobACL) {
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
index fa50ecb..6a9145c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
@@ -79,7 +79,7 @@ import org.apache.hadoop.util.ToolRunner;
* Submitting the job to the cluster and optionally monitoring
* it's status.
*
- *
JobClient
to submit
@@ -101,9 +101,9 @@ import org.apache.hadoop.util.ToolRunner;
*
* // Submit the job, then poll for progress until the job is complete
* JobClient.runJob(job);
- *
+ *
*
- * At times clients would chain map-reduce jobs to accomplish complex tasks * which cannot be done via a single map-reduce job. This is fairly easy since @@ -127,7 +127,7 @@ import org.apache.hadoop.util.ToolRunner; * {@link JobConf#setJobEndNotificationURI(String)} : setup a notification * on job-completion, thus avoiding polling. * - *
+ * * * @see JobConf * @see ClusterStatus http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java index 03f1160..51b99be 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -71,7 +71,7 @@ import org.apache.log4j.Level; * more complex for the user to control finely * (e.g. {@link #setNumMapTasks(int)}). * - * + * * *JobConf
typically specifies the {@link Mapper}, combiner
* (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
@@ -102,7 +102,7 @@ import org.apache.log4j.Level;
*
* job.setInputFormat(SequenceFileInputFormat.class);
* job.setOutputFormat(SequenceFileOutputFormat.class);
- *
* If the parameter {@code loadDefaults} is false, the new instance * will not load resources from the default files. * @@ -988,19 +988,19 @@ public class JobConf extends Configuration { /** * Set the user defined {@link RawComparator} comparator for * grouping keys in the input to the combiner. - *
+ * *This comparator should be provided if the equivalence rules for keys * for sorting the intermediates are different from those for grouping keys * before each call to * {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.
- * + * *For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed * in a single call to the reduce function if K1 and K2 compare as equal.
- * + * *Since {@link #setOutputKeyComparatorClass(Class)} can be used to control * how keys are sorted, this can be used in conjunction to simulate * secondary sort on values.
- * + * *Note: This is not a guarantee of the combiner sort being * stable in any sense. (In any case, with the order of available * map-outputs to the combiner being non-deterministic, it wouldn't make @@ -1205,7 +1205,7 @@ public class JobConf extends Configuration { *
Typically the combiner is same as the Reducer
for the
* job i.e. {@link #setReducerClass(Class)}.
The number of maps is usually driven by the total size of the inputs * i.e. total number of blocks of the input files.
@@ -1345,7 +1345,7 @@ public class JobConf extends Configuration { /** * Set the requisite number of reduce tasks for this job. * - *The right number of reduces seems to be 0.95
or
* 1.75
multiplied by (<no. of nodes> *
@@ -1365,7 +1365,7 @@ public class JobConf extends Configuration {
* reserve a few reduce slots in the framework for speculative-tasks, failures
* etc.
It is legal to set the number of reduce-tasks to zero
.
The debug command, run on the node where the map failed, is:
- *+ * * *+ ** $script $stdout $stderr $syslog $jobconf. - *
The script file is distributed through {@link DistributedCache} * APIs. The script needs to be symlinked.
@@ -1700,7 +1700,7 @@ public class JobConf extends Configuration { * job.setMapDebugScript("./myscript"); * DistributedCache.createSymlink(job); * DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); - * + * * * @param mDbgScript the script name */ @@ -1725,9 +1725,9 @@ public class JobConf extends Configuration { * is given task's stdout, stderr, syslog, jobconf files as arguments. * *The debug command, run on the node where the map failed, is:
- *+ * * *+ ** $script $stdout $stderr $syslog $jobconf. - *
The script file is distributed through {@link DistributedCache} * APIs. The script file needs to be symlinked
@@ -1737,7 +1737,7 @@ public class JobConf extends Configuration { * job.setReduceDebugScript("./myscript"); * DistributedCache.createSymlink(job); * DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); - * + * * * @param rDbgScript the script name */ @@ -1780,8 +1780,6 @@ public class JobConf extends Configuration { * * @param uri the job end notification uri * @see JobStatus - * @see Job Completion and Chaining */ public void setJobEndNotificationURI(String uri) { set(JobContext.MR_JOB_END_NOTIFICATION_URL, uri); @@ -1811,7 +1809,7 @@ public class JobConf extends Configuration { * * If a value is specified in the configuration, it is returned. * Else, it returns {@link JobContext#DEFAULT_MAP_MEMORY_MB}. - * + ** For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used @@ -1838,7 +1836,7 @@ public class JobConf extends Configuration { * * If a value is specified in the configuration, it is returned. * Else, it returns {@link JobContext#DEFAULT_REDUCE_MEMORY_MB}. - *
+ ** For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used @@ -1912,7 +1910,6 @@ public class JobConf extends Configuration { * * @param my_class the class to find. * @return a jar file that contains the class, or null. - * @throws IOException */ public static String findContainingJar(Class my_class) { return ClassUtil.findContainingJar(my_class); @@ -1921,10 +1918,10 @@ public class JobConf extends Configuration { /** * Get the memory required to run a task of this job, in bytes. See * {@link #MAPRED_TASK_MAXVMEM_PROPERTY} - *
+ ** This method is deprecated. Now, different memory limits can be * set for map and reduce tasks of a job, in MB. - *
+ ** For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. * Otherwise, this method will return the larger of the values returned by @@ -1950,7 +1947,7 @@ public class JobConf extends Configuration { /** * Set the maximum amount of memory any task of this job can use. See * {@link #MAPRED_TASK_MAXVMEM_PROPERTY} - *
+ ** mapred.task.maxvmem is split into * mapreduce.map.memory.mb * and mapreduce.map.memory.mb,mapred http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Mapper.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Mapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Mapper.java index eaa6c2b..ac2c96d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Mapper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Mapper.java @@ -117,7 +117,7 @@ import org.apache.hadoop.io.compress.CompressionCodec; * output.collect(key, val); * } * } - *
+ * * *Applications may write a custom {@link MapRunnable} to exert greater
* control on map processing e.g. multi-threaded Mapper
s etc.
* The Map/Reduce framework can be configured with one or more queues, * depending on the scheduler it is configured with. While some * schedulers work only with one queue, some schedulers support multiple * queues. Some schedulers also support the notion of queues within * queues - a feature called hierarchical queues. - *
+ ** Queue names are unique, and used as a key to lookup queues. Hierarchical * queues are named by a 'fully qualified name' such as q1:q2:q3, where * q2 is a child queue of q1 and q3 is a child queue of q2. - *
+ ** Leaf level queues are queues that contain no queues within them. Jobs * can be submitted only to leaf level queues. - *
+ ** Queues can be configured with various properties. Some of these * properties are common to all schedulers, and those are handled by this * class. Schedulers might also associate several custom properties with @@ -69,11 +69,11 @@ import java.net.URL; * provided by the framework, but define their own mechanisms. In such cases, * it is likely that the name of the queue will be used to relate the * common properties of a queue with scheduler specific properties. - *
+ ** Information related to a queue, such as its name, properties, scheduling * information and children are exposed by this class via a serializable * class called {@link JobQueueInfo}. - *
+ ** Queues are configured in the configuration file mapred-queues.xml. * To support backwards compatibility, queues can also be configured * in mapred-site.xml. However, when configured in the latter, there is @@ -102,7 +102,7 @@ public class QueueManager { /** * Factory method to create an appropriate instance of a queue * configuration parser. - *
+ ** Returns a parser that can parse either the deprecated property * style queue configuration in mapred-site.xml, or one that can * parse hierarchical queues in mapred-queues.xml. First preference @@ -157,7 +157,7 @@ public class QueueManager { /** * Construct a new QueueManager using configuration specified in the passed * in {@link org.apache.hadoop.conf.Configuration} object. - *
+ ** This instance supports queue configuration specified in mapred-site.xml, * but without support for hierarchical queues. If no queue configuration * is found in mapred-site.xml, it will then look for site configuration @@ -173,7 +173,7 @@ public class QueueManager { /** * Create an instance that supports hierarchical queues, defined in * the passed in configuration file. - *
+ ** This is mainly used for testing purposes and should not called from * production code. * @@ -208,7 +208,7 @@ public class QueueManager { /** * Return the set of leaf level queues configured in the system to * which jobs are submitted. - *
+ ** The number of queues configured should be dependent on the Scheduler * configured. Note that some schedulers work with only one queue, whereas * others can support multiple queues. @@ -222,7 +222,7 @@ public class QueueManager { /** * Return true if the given user is part of the ACL for the given * {@link QueueACL} name for the given queue. - *
+ ** An operation is allowed if all users are provided access for this * operation, or if either the user or any of the groups specified is * provided access. @@ -283,7 +283,7 @@ public class QueueManager { /** * Set a generic Object that represents scheduling information relevant * to a queue. - *
+ ** A string representation of this Object will be used by the framework * to display in user facing applications like the JobTracker web UI and * the hadoop CLI. @@ -323,7 +323,7 @@ public class QueueManager { /** * Refresh acls, state and scheduler properties for the configured queues. - *
+ ** This method reloads configuration related to queues, but does not * support changes to the list of queues or hierarchy. The expected usage * is that an administrator can modify the queue configuration file and @@ -431,7 +431,7 @@ public class QueueManager { /** * JobQueueInfo for all the queues. - *
+ ** Contribs can use this data structure to either create a hierarchy or for * traversing. * They can also use this to refresh properties in case of refreshQueues @@ -450,7 +450,7 @@ public class QueueManager { /** * Generates the array of QueueAclsInfo object. - *
+ ** The array consists of only those queues for which user has acls. * * @return QueueAclsInfo[] http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java index 0c95a14..6e2c89f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java @@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceStability; * *
RecordReader
, typically, converts the byte-oriented view of
* the input, provided by the InputSplit
, and presents a
- * record-oriented view for the {@link Mapper} & {@link Reducer} tasks for
+ * record-oriented view for the {@link Mapper} and {@link Reducer} tasks for
* processing. It thus assumes the responsibility of processing record
* boundaries and presenting the tasks with keys and values.
Reducer
is input the grouped output of a {@link Mapper}.
* In the phase the framework, for each Reducer
, fetches the
@@ -51,7 +51,7 @@ import org.apache.hadoop.io.Closeable;
*
The framework groups Reducer
inputs by key
s
* (since different Mapper
s may have output the same key) in this
@@ -60,7 +60,7 @@ import org.apache.hadoop.io.Closeable;
*
The shuffle and sort phases occur simultaneously i.e. while outputs are * being fetched they are merged.
* - *If equivalence rules for keys while grouping the intermediates are * different from those for grouping keys before reduction, then one may @@ -86,11 +86,11 @@ import org.apache.hadoop.io.Closeable; *
In this phase the
* {@link #reduce(Object, Iterator, OutputCollector, Reporter)}
- * method is called for each <key, (list of values)>
pair in
+ * method is called for each <key, (list of values)>
pair in
* the grouped inputs.
The output of the reduce task is typically written to the * {@link FileSystem} via @@ -156,7 +156,7 @@ import org.apache.hadoop.io.Closeable; * } * } * } - *
+ * * * @see Mapper * @see Partitioner @@ -171,7 +171,7 @@ public interface ReducerThe framework calls this method for each
- * <key, (list of values)>
pair in the grouped inputs.
+ * <key, (list of values)>
pair in the grouped inputs.
* Output values must be of the same type as input values. Input keys must
* not be altered. The framework will reuse the key and value objects
* that are passed into the reduce, therefore the application should clone
http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
----------------------------------------------------------------------
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
index 14f040a..723a234 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
@@ -29,61 +29,61 @@ import java.io.IOException;
/**
* The ChainMapper class allows to use multiple Mapper classes within a single
* Map task.
- *
* The Mapper classes are invoked in a chained (or piped) fashion, the output of * the first becomes the input of the second, and so on until the last Mapper, * the output of the last Mapper will be written to the task's output. - *
+ ** The key functionality of this feature is that the Mappers in the chain do not * need to be aware that they are executed in a chain. This enables having * reusable specialized Mappers that can be combined to perform composite * operations within a single task. - *
+ ** Special care has to be taken when creating chains that the key/values output * by a Mapper are valid for the following Mapper in the chain. It is assumed * all Mappers and the Reduce in the chain use maching output and input key and * value classes as no conversion is done by the chaining code. - *
+ *
* Using the ChainMapper and the ChainReducer classes is possible to compose
* Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]
. And
* immediate benefit of this pattern is a dramatic reduction in disk IO.
- *
* IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the chain. - *
+ ** ChainMapper usage pattern: - *
+ **
* ... * conf.setJobName("chain"); * conf.setInputFormat(TextInputFormat.class); * conf.setOutputFormat(TextOutputFormat.class); - * + * * JobConf mapAConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, * Text.class, Text.class, true, mapAConf); - * + * * JobConf mapBConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, mapBConf); - * + * * JobConf reduceConf = new JobConf(false); * ... * ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, * Text.class, Text.class, true, reduceConf); - * + * * ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, null); - * + * * ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, * LongWritable.class, LongWritable.class, true, null); - * + * * FileInputFormat.setInputPaths(conf, inDir); * FileOutputFormat.setOutputPath(conf, outDir); * ... - * + * * JobClient jc = new JobClient(conf); * RunningJob job = jc.submitJob(conf); * ... @@ -95,21 +95,21 @@ public class ChainMapper implements Mapper { /** * Adds a Mapper class to the chain job's JobConf. - * + ** It has to be specified how key and values are passed from one element of * the chain to the next, by value or by reference. If a Mapper leverages the * assumed semantics that the key and values are not modified by the collector * 'by value' must be used. If the Mapper does not expect this semantics, as * an optimization to avoid serialization and deserialization 'by reference' * can be used. - *
+ ** For the added Mapper the configuration given for it, *
+ *mapperConf
, have precedence over the job's JobConf. This * precedence is in effect when the task is running. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the chain - *
+ ** * @param job job's JobConf to add the Mapper class. * @param klass the Mapper class to add. @@ -148,7 +148,7 @@ public class ChainMapper implements Mapper { /** * Configures the ChainMapper and all the Mappers in the chain. - *
+ ** If this method is overriden
+ *super.configure(...)
should be * invoked at the beginning of the overwriter method. */ @@ -171,7 +171,7 @@ public class ChainMapper implements Mapper { /** * Closes the ChainMapper and all the Mappers in the chain. - ** If this method is overriden
+ *super.close()
should be * invoked at the end of the overwriter method. */ http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java index 641d82c..6f5b7cd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java @@ -27,63 +27,63 @@ import java.util.Iterator; /** * The ChainReducer class allows to chain multiple Mapper classes after a * Reducer within the Reducer task. - ** For each record output by the Reducer, the Mapper classes are invoked in a * chained (or piped) fashion, the output of the first becomes the input of the * second, and so on until the last Mapper, the output of the last Mapper will * be written to the task's output. - *
+ ** The key functionality of this feature is that the Mappers in the chain do not * need to be aware that they are executed after the Reducer or in a chain. * This enables having reusable specialized Mappers that can be combined to * perform composite operations within a single task. - *
+ ** Special care has to be taken when creating chains that the key/values output * by a Mapper are valid for the following Mapper in the chain. It is assumed * all Mappers and the Reduce in the chain use maching output and input key and * value classes as no conversion is done by the chaining code. - *
+ ** Using the ChainMapper and the ChainReducer classes is possible to compose * Map/Reduce jobs that look like
+ *[MAP+ / REDUCE MAP*]
. And * immediate benefit of this pattern is a dramatic reduction in disk IO. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainReducer, this is done by the setReducer or the addMapper for the last * element in the chain. - *
+ ** ChainReducer usage pattern: - *
+ **
* ... * conf.setJobName("chain"); * conf.setInputFormat(TextInputFormat.class); * conf.setOutputFormat(TextOutputFormat.class); - * + * * JobConf mapAConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, * Text.class, Text.class, true, mapAConf); - * + * * JobConf mapBConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, mapBConf); - * + * * JobConf reduceConf = new JobConf(false); * ... * ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, * Text.class, Text.class, true, reduceConf); - * + * * ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, null); - * + * * ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, * LongWritable.class, LongWritable.class, true, null); - * + * * FileInputFormat.setInputPaths(conf, inDir); * FileOutputFormat.setOutputPath(conf, outDir); * ... - * + * * JobClient jc = new JobClient(conf); * RunningJob job = jc.submitJob(conf); * ... @@ -95,18 +95,18 @@ public class ChainReducer implements Reducer { /** * Sets the Reducer class to the chain job's JobConf. - * + ** It has to be specified how key and values are passed from one element of * the chain to the next, by value or by reference. If a Reducer leverages the * assumed semantics that the key and values are not modified by the collector * 'by value' must be used. If the Reducer does not expect this semantics, as * an optimization to avoid serialization and deserialization 'by reference' * can be used. - *
+ ** For the added Reducer the configuration given for it, *
+ *reducerConf
, have precedence over the job's JobConf. This * precedence is in effect when the task is running. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainReducer, this is done by the setReducer or the addMapper for the last * element in the chain. @@ -139,18 +139,18 @@ public class ChainReducer implements Reducer { /** * Adds a Mapper class to the chain job's JobConf. - *
+ ** It has to be specified how key and values are passed from one element of * the chain to the next, by value or by reference. If a Mapper leverages the * assumed semantics that the key and values are not modified by the collector * 'by value' must be used. If the Mapper does not expect this semantics, as * an optimization to avoid serialization and deserialization 'by reference' * can be used. - *
+ ** For the added Mapper the configuration given for it, *
+ *mapperConf
, have precedence over the job's JobConf. This * precedence is in effect when the task is running. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the chain * . @@ -191,7 +191,7 @@ public class ChainReducer implements Reducer { /** * Configures the ChainReducer, the Reducer and all the Mappers in the chain. - *
+ ** If this method is overriden
+ *super.configure(...)
should be * invoked at the beginning of the overwriter method. */ @@ -215,7 +215,7 @@ public class ChainReducer implements Reducer { /** * Closes the ChainReducer, the Reducer and all the Mappers in the chain. - ** If this method is overriden
+ *super.close()
should be * invoked at the end of the overwriter method. */ http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java index 39e80f9..f0f3652 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java @@ -31,29 +31,29 @@ import java.util.*; * than the job default output via theOutputCollector
passed to * themap()
andreduce()
methods of the *Mapper
andReducer
implementations. - ** Each additional output, or named output, may be configured with its own *
+ *OutputFormat
, with its own key class and with its own value * class. - ** A named output can be a single file or a multi file. The later is refered as * a multi named output. - *
+ ** A multi named output is an unbound set of files all sharing the same *
+ *OutputFormat
, key class and value class configuration. - ** When named outputs are used within a
+ *Mapper
implementation, * key/values written to a name output are not part of the reduce phase, only * key/values written to the jobOutputCollector
are part of the * reduce phase. - ** MultipleOutputs supports counters, by default the are disabled. The counters * group is the {@link MultipleOutputs} class name. *
* The names of the counters are the same as the named outputs. For multi * named outputs the name of the counter is the concatenation of the named * output, and underscore '_' and the multiname. - * + ** Job configuration usage pattern is: *
* @@ -82,7 +82,7 @@ import java.util.*; * * ... *- * + ** Job configuration usage pattern is: *
* @@ -271,7 +271,6 @@ public class MultipleOutputs { /** * Adds a named output for the job. - * * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters @@ -291,7 +290,6 @@ public class MultipleOutputs { /** * Adds a multi named output for the job. - * * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters @@ -311,7 +309,6 @@ public class MultipleOutputs { /** * Adds a named output for the job. - * * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters @@ -339,9 +336,9 @@ public class MultipleOutputs { /** * Enables or disables counters for the named outputs. - * + ** By default these counters are disabled. - *
+ ** MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link MultipleOutputs} class name. *
@@ -358,9 +355,9 @@ public class MultipleOutputs { /** * Returns if the counters for the named outputs are enabled or not. - * + ** By default these counters are disabled. - *
+ ** MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link MultipleOutputs} class name. *
@@ -465,7 +462,6 @@ public class MultipleOutputs { /** * Gets the output collector for a named output. - * * * @param namedOutput the named output name * @param reporter the reporter @@ -480,7 +476,6 @@ public class MultipleOutputs { /** * Gets the output collector for a multi named output. - * * * @param namedOutput the named output name * @param multiName the multi name part @@ -525,7 +520,7 @@ public class MultipleOutputs { /** * Closes all the opened named outputs. - * + ** If overriden subclasses must invoke
super.close()
at the * end of theirclose()
* http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java index 8e884ce..75179e1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java @@ -32,7 +32,7 @@ import org.apache.hadoop.mapred.Reporter; /** - * A {@link Mapper} that maps text values intopairs. Uses + * A {@link Mapper} that maps text values into <token,freq> pairs. Uses * {@link StringTokenizer} to break text into tokens. */ @InterfaceAudience.Public http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java index 8c20723..6251925 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java @@ -60,7 +60,7 @@ import org.apache.hadoop.util.GenericOptionsParser; * The developer using Aggregate will need only to provide a plugin class * conforming to the following interface: * - * public interface ValueAggregatorDescriptor { public ArrayList + * public interface ValueAggregatorDescriptor { public ArrayList<Entry> * generateKeyValPairs(Object key, Object value); public void * configure(JobConfjob); } * http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java index a6b3573..2738968 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java @@ -45,7 +45,8 @@ public class ValueAggregatorReducer values, OutputCollector output, Reporter reporter) throws IOException { http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java index 2715705..159919f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java @@ -195,8 +195,8 @@ public class DBInputFormat * @param inputClass the class object implementing DBWritable, which is the * Java object holding tuple fields. * @param tableName The table to read data from - * @param conditions The condition which to select data with, eg. '(updated > - * 20070101 AND length > 0)' + * @param conditions The condition which to select data with, eg. '(updated > + * 20070101 AND length > 0)' * @param orderBy the fieldNames in the orderBy clause. * @param fieldNames The field names in the table * @see #setInput(JobConf, Class, String, String) http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java index 60ff715..34353ac 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java @@ -134,6 +134,7 @@ public class Cluster { /** * Close the Cluster
. + * @throws IOException */ public synchronized void close() throws IOException { clientProtocolProvider.close(client); http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java index c4c2778..b5e54b5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java @@ -40,15 +40,15 @@ import org.apache.hadoop.io.Writable; * Slot capacity of the cluster. *
Clients can query for the latest ClusterMetrics
, via
* {@link Cluster#getClusterStatus()}.
The framework first calls
* {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
- * {@link #map(Object, Object, Context)}
+ * {@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)}
* for each key/value pair in the InputSplit
. Finally
- * {@link #cleanup(Context)} is called.
All intermediate values associated with a given output key are * subsequently grouped by the framework, and passed to a {@link Reducer} to @@ -84,9 +84,10 @@ import org.apache.hadoop.mapreduce.task.MapContextImpl; * } * } * } - *
+ * * - *Applications may override the {@link #run(Context)} method to exert + *
Applications may override the
+ * {@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert
* greater control on map processing e.g. multi-threaded Mapper
s
* etc.
The Reducer
copies the sorted output from each
* {@link Mapper} using HTTP across the network.
The framework merge sorts Reducer
inputs by
* key
s
@@ -55,7 +55,7 @@ import java.util.Iterator;
*
The shuffle and sort phases occur simultaneously i.e. while outputs are * being fetched they are merged.
* - *To achieve a secondary sort on the values returned by the value * iterator, the application should extend the key with the secondary @@ -83,10 +83,10 @@ import java.util.Iterator; *
In this phase the
- * {@link #reduce(Object, Iterable, Context)}
+ * {@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)}
* method is called for each <key, (collection of values)>
in
* the sorted inputs.
The output of the reduce task is typically written to a @@ -113,7 +113,7 @@ import java.util.Iterator; * context.write(key, result); * } * } - *
+ * * * @see Mapper * @see Partitioner http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java index eaa5af8..86a57d9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java @@ -115,7 +115,7 @@ import java.net.URI; * } * } * - * + * * * It is also very common to use the DistributedCache by using * {@link org.apache.hadoop.util.GenericOptionsParser}. @@ -235,7 +235,6 @@ public class DistributedCache { * DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps * @return a long array of timestamps - * @throws IOException * @deprecated Use {@link JobContext#getArchiveTimestamps()} instead */ @Deprecated @@ -250,7 +249,6 @@ public class DistributedCache { * DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps * @return a long array of timestamps - * @throws IOException * @deprecated Use {@link JobContext#getFileTimestamps()} instead */ @Deprecated @@ -434,7 +432,6 @@ public class DistributedCache { * internal DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps * @return a string array of booleans - * @throws IOException */ public static boolean[] getFileVisibilities(Configuration conf) { return parseBooleans(conf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES)); http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java index d8833da..de25f64 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java @@ -60,7 +60,7 @@ import org.apache.hadoop.util.GenericOptionsParser; * The developer using Aggregate will need only to provide a plugin class * conforming to the following interface: * - * public interface ValueAggregatorDescriptor { public ArrayList* The configuration properties of the chain job have precedence over the * configuration properties of the Mapper. * @@ -738,7 +738,7 @@ public class Chain { /** * Sets the Reducer class to the chain job. * - *
+ ** The configuration properties of the chain job have precedence over the * configuration properties of the Reducer. * http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java index c042ff0..c3bf012 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java @@ -57,24 +57,24 @@ import org.apache.hadoop.mapreduce.lib.chain.Chain.ChainBlockingQueue; * ChainMapper, this is done by the addMapper for the last mapper in the chain. *
* ChainMapper usage pattern: - * + ** *
* ... * Job = new Job(conf); - * + * * Configuration mapAConf = new Configuration(false); * ... * ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class, * Text.class, Text.class, true, mapAConf); - * + * * Configuration mapBConf = new Configuration(false); * ... * ChainMapper.addMapper(job, BMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, mapBConf); - * + * * ... - * + * * job.waitForComplettion(true); * ... *http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java index dc03d5d..1c37587 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java @@ -50,7 +50,7 @@ import java.io.IOException; * all Mappers and the Reduce in the chain use matching output and input key and * value classes as no conversion is done by the chaining code. * - * Using the ChainMapper and the ChainReducer classes is possible to + *
Using the ChainMapper and the ChainReducer classes is possible to
* compose Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]
. And
* immediate benefit of this pattern is a dramatic reduction in disk IO.
@@ -59,26 +59,26 @@ import java.io.IOException; * element in the chain. *
* ChainReducer usage pattern: - * + ** *
* ... * Job = new Job(conf); * .... - * + * * Configuration reduceConf = new Configuration(false); * ... * ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class, * Text.class, Text.class, true, reduceConf); - * + * * ChainReducer.addMapper(job, CMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, null); - * + * * ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class, * LongWritable.class, LongWritable.class, true, null); - * + * * ... - * + * * job.waitForCompletion(true); * ... *http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java index 00fbeda..f193374 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java @@ -316,7 +316,7 @@ public class DBInputFormat
* It can be used instead of the default implementation, - * @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + * {@link org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU * bound in order to improve throughput. *
* Mapper implementations using this MapRunnable must be thread-safe. *
* The Map-Reduce job has to be configured with the mapper to use via - * {@link #setMapperClass(Configuration, Class)} and + * {@link #setMapperClass(Job, Class)} and * the number of thread the thread-pool can use with the - * {@link #getNumberOfThreads(Configuration) method. The default + * {@link #getNumberOfThreads(JobContext)} method. The default * value is 10 threads. *
*/ http://git-wip-us.apache.org/repos/asf/hadoop/blob/ccde4aed/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java index fa3708e..2c69542 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java @@ -181,7 +181,7 @@ public static final String OUTDIR = "mapreduce.output.fileoutputformat.outputdir * Get the {@link Path} to the task's temporary output directory * for the map-reduce job * - *
Some applications need to create/write-to side-files, which differ from * the actual job-outputs.