hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Pedro Costa <psdc1...@gmail.com>
Subject Re: PiEstimator error - Type mismatch in key from map
Date Thu, 27 Jan 2011 17:29:56 GMT
The reason was that I set the mapred-site.xml to use the new api. Thanks,

On Thu, Jan 27, 2011 at 5:04 PM, Chase Bradford
<chase.bradford@gmail.com> wrote:
> That's very puzzling, because I don't see any reason for the new API
> to get activated.  I'm pretty sure that's what's happening though,
> based on this section of the exception's call stack:
>
>       at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
>       at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637)
>       at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
>
>
>
> On Thu, Jan 27, 2011 at 8:50 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>> [code]
>>
>> package org.apache.hadoop.examples;
>>
>> import java.io.IOException;
>> import java.math.BigDecimal;
>> import java.util.Iterator;
>>
>> import org.apache.hadoop.conf.Configured;
>> import org.apache.hadoop.fs.FileSystem;
>> import org.apache.hadoop.fs.Path;
>> import org.apache.hadoop.io.BooleanWritable;
>> import org.apache.hadoop.io.LongWritable;
>> import org.apache.hadoop.io.SequenceFile;
>> import org.apache.hadoop.io.Writable;
>> import org.apache.hadoop.io.WritableComparable;
>> import org.apache.hadoop.io.SequenceFile.CompressionType;
>> import org.apache.hadoop.mapred.FileInputFormat;
>> import org.apache.hadoop.mapred.FileOutputFormat;
>> import org.apache.hadoop.mapred.JobClient;
>> import org.apache.hadoop.mapred.JobConf;
>> import org.apache.hadoop.mapred.MapReduceBase;
>> import org.apache.hadoop.mapred.Mapper;
>> import org.apache.hadoop.mapred.OutputCollector;
>> import org.apache.hadoop.mapred.Reducer;
>> import org.apache.hadoop.mapred.Reporter;
>> import org.apache.hadoop.mapred.SequenceFileInputFormat;
>> import org.apache.hadoop.mapred.SequenceFileOutputFormat;
>> import org.apache.hadoop.util.Tool;
>> import org.apache.hadoop.util.ToolRunner;
>>
>>
>> public class PiEstimator extends Configured implements Tool {
>>        /** tmp directory for input/output */
>>        static private final Path TMP_DIR = new
>> Path(PiEstimator.class.getSimpleName() + "_TMP_3_141592654");
>>
>>
>>        /**
>>         * Mapper class for Pi estimation.
>>         * Generate points in a unit square
>>         * and then count points inside/outside of the inscribed circle of the
square.
>>         */
>>        public static class PiMapper extends MapReduceBase
>>        implements Mapper<LongWritable, LongWritable, BooleanWritable, LongWritable>
{
>>
>>                /** Map method.
>>                 * @param offset samples starting from the (offset+1)th sample.
>>                 * @param size the number of samples for this map
>>                 * @param out output {ture->numInside, false->numOutside}
>>                 * @param reporter
>>                 */
>>                public void map(LongWritable offset,
>>                                LongWritable size,
>>                                OutputCollector<BooleanWritable,
LongWritable> out,
>>                                Reporter reporter) throws IOException
{
>>
>>                        final HaltonSequence haltonsequence = new HaltonSequence(offset.get());
>>                        long numInside = 0L;
>>                        long numOutside = 0L;
>>
>>                        for(long i = 0; i < size.get(); ) {
>>                                //generate points in a unit square
>>                                final double[] point = haltonsequence.nextPoint();
>>
>>                                //count points inside/outside of the
inscribed circle of the square
>>                                final double x = point[0] - 0.5;
>>                                final double y = point[1] - 0.5;
>>                                if (x*x + y*y > 0.25) {
>>                                        numOutside++;
>>                                } else {
>>                                        numInside++;
>>                                }
>>
>>                                //report status
>>                                i++;
>>                                if (i % 1000 == 0) {
>>                                        reporter.setStatus("Generated
" + i + " samples.");
>>                                }
>>                        }
>>
>>                        //output map results
>>                        out.collect(new BooleanWritable(true), new LongWritable(numInside));
>>                        out.collect(new BooleanWritable(false), new LongWritable(numOutside));
>>                }
>>        }
>>
>>
>>
>>        /**
>>         * Run a map/reduce job for estimating Pi.
>>         *
>>         * @return the estimated value of Pi
>>         */
>>        public static BigDecimal estimate(int numMaps, long numPoints, JobConf
jobConf)
>>        throws IOException {
>>                //setup job conf
>>                jobConf.setJobName(PiEstimator.class.getSimpleName());
>>
>>                jobConf.setInputFormat(SequenceFileInputFormat.class);
>>
>>                jobConf.setOutputKeyClass(BooleanWritable.class);
>>                jobConf.setOutputValueClass(LongWritable.class);
>>                //              jobConf.setMapOutputKeyClass(BooleanWritable.class);
>>                //              jobConf.setMapOutputValueClass(LongWritable.class);
>>                jobConf.setOutputFormat(SequenceFileOutputFormat.class);
>>
>>                jobConf.setMapperClass(PiMapper.class);
>>                jobConf.setNumMapTasks(numMaps);
>>
>>                jobConf.setReducerClass(PiReducer.class);
>>                jobConf.setNumReduceTasks(1);
>>
>>                // turn off speculative execution, because DFS doesn't handle
>>                // multiple writers to the same file.
>>                jobConf.setSpeculativeExecution(false);
>>
>>                //setup input/output directories
>>                final Path inDir = new Path(TMP_DIR, "in");
>>                final Path outDir = new Path(TMP_DIR, "out");
>>                FileInputFormat.setInputPaths(jobConf, inDir);
>>                FileOutputFormat.setOutputPath(jobConf, outDir);
>>
>>                final FileSystem fs = FileSystem.get(jobConf);
>>                if (fs.exists(TMP_DIR)) {
>>                        throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR)
>>                                        + " already exists.  Please
remove it first.");
>>                }
>>                if (!fs.mkdirs(inDir)) {
>>                        throw new IOException("Cannot create input directory
" + inDir);
>>                }
>>
>>                try {
>>                        //generate an input file for each map task
>>                        for(int i=0; i < numMaps; ++i) {
>>                                final Path file = new Path(inDir,
"part"+i);
>>                                final LongWritable offset = new LongWritable(i
* numPoints);
>>                                final LongWritable size = new LongWritable(numPoints);
>>                                final SequenceFile.Writer writer =
SequenceFile.createWriter(
>>                                                fs, jobConf,
file,
>>                                                LongWritable.class,
LongWritable.class, CompressionType.NONE);
>>                                try {
>>                                        writer.append(offset,
size);
>>                                } finally {
>>                                        writer.close();
>>                                }
>>                                System.out.println("Wrote input for
Map #"+i);
>>                        }
>>
>>                        //start a map/reduce job
>>                        System.out.println("Starting Job");
>>                        final long startTime = System.currentTimeMillis();
>>                        JobClient.runJob(jobConf);
>>                        final double duration = (System.currentTimeMillis()
- startTime)/1000.0;
>>                        System.out.println("Job Finished in " + duration
+ " seconds");
>>
>>                        //read outputs
>>                        Path inFile = new Path(outDir, "reduce-out");
>>                        LongWritable numInside = new LongWritable();
>>                        LongWritable numOutside = new LongWritable();
>>                        SequenceFile.Reader reader = new SequenceFile.Reader(fs,
inFile, jobConf);
>>                        try {
>>                                reader.next(numInside, numOutside);
>>                        } finally {
>>                                reader.close();
>>                        }
>>
>>                        //compute estimated value
>>                        return BigDecimal.valueOf(4).setScale(20)
>>                        .multiply(BigDecimal.valueOf(numInside.get()))
>>                        .divide(BigDecimal.valueOf(numMaps))
>>                        .divide(BigDecimal.valueOf(numPoints));
>>                } finally {
>>                        fs.delete(TMP_DIR, true);
>>                }
>>        }
>>
>>        /**
>>         * Parse arguments and then runs a map/reduce job.
>>         * Print output in standard out.
>>         *
>>         * @return a non-zero if there is an error.  Otherwise, return 0.
>>         */
>>        public int run(String[] args) throws Exception {
>>                if (args.length != 2) {
>>                        System.err.println("Usage: "+getClass().getName()+"
<nMaps> <nSamples>");
>>                        ToolRunner.printGenericCommandUsage(System.err);
>>                        return -1;
>>                }
>>
>>                final int nMaps = Integer.parseInt(args[0]);
>>                final long nSamples = Long.parseLong(args[1]);
>>
>>                System.out.println("Number of Maps  = " + nMaps);
>>                System.out.println("Samples per Map = " + nSamples);
>>
>>                final JobConf jobConf = new JobConf(getConf(), getClass());
>>                System.out.println("Estimated value of Pi is "
>>                                + estimate(nMaps, nSamples, jobConf));
>>                return 0;
>>        }
>> }
>>
>> [/code]
>>
>>
>>
>> On Thu, Jan 27, 2011 at 4:44 PM, Chase Bradford
>> <chase.bradford@gmail.com> wrote:
>>> That should be fine, but mapreduce.Mapper.map has this signature:
>>>
>>> map(K key, V value, Context)
>>>
>>> Your PiEstimator map signature doesn't match, so it's not overriding
>>> the proper function and is never getting called by the framework.
>>>
>>> Could you paste your complete PiMapper class definition and the series
>>> of calls you make to setup your job?  That would make debugging the
>>> problem much easier.
>>>
>>> Chase
>>>
>>>
>>> On Thu, Jan 27, 2011 at 8:29 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>>>> Yes, that's the one that's being used ( o.a.h.mapreduce.Mapper ). This
>>>> is not the right one to use?
>>>>
>>>>
>>>>
>>>> On Thu, Jan 27, 2011 at 3:40 PM, Chase Bradford
>>>> <chase.bradford@gmail.com> wrote:
>>>>> Are you sure the function signature for you Mapper's map matches the
super class, and that you specified your Map class in the job setup?  It sounds a bit like
the base o.a.h.mapreduce.Mapper map implementation is being used instead.
>>>>>
>>>>>
>>>>> On Jan 27, 2011, at 2:36 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>>>>>
>>>>>> The map output class are well defined:
>>>>>> keyClass: class org.apache.hadoop.io.BooleanWritable - valClass:
class
>>>>>> org.apache.hadoop.io.LongWritable
>>>>>>
>>>>>> but executing the pi example, the values that map function passes
is:
>>>>>> keyClass: class org.apache.hadoop.io.LongWritable - valClass: class
>>>>>> org.apache.hadoop.io.Text
>>>>>>
>>>>>>
>>>>>> I looked at the PiEstimator.class.PiMapper#map function, and the
>>>>>> output collector seems ok.
>>>>>>
>>>>>> [code]
>>>>>> public void map(LongWritable offset,
>>>>>>        LongWritable size,
>>>>>>        OutputCollector<BooleanWritable, LongWritable> out,
>>>>>>        Reporter reporter) throws IOException {
>>>>>> (...)
>>>>>> out.collect(new BooleanWritable(true), new LongWritable(numInside));
>>>>>> out.collect(new BooleanWritable(false), new LongWritable(numOutside));
>>>>>> }
>>>>>> [/code]
>>>>>>
>>>>>> I'm really confused, right now. How can this be happening?
>>>>>>
>>>>>>
>>>>>> On Thu, Jan 27, 2011 at 10:19 AM, Pedro Costa <psdc1978@gmail.com>
wrote:
>>>>>>> Thanks Nicholas, but it didn't worked.
>>>>>>>
>>>>>>> Can I do a remote debugging on hadoop examples? I really like
to put a
>>>>>>> breakpoint in the Pi class.
>>>>>>>
>>>>>>> Thanks,
>>>>>>>
>>>>>>> On Wed, Jan 26, 2011 at 6:46 PM, Tsz Wo (Nicholas), Sze
>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>> Okay, I got it now.  You were talking about your programs
but not the
>>>>>>>> PiEstimator example came from Hadoop.  Then, you have to
set
>>>>>>>> "mapred.output.key.class" and "mapred.output.value.class"
as Srihari
>>>>>>>> mentioned.  Below are the APIs.
>>>>>>>>
>>>>>>>>     //new API
>>>>>>>>     final Job job = ...
>>>>>>>>     job.setMapOutputKeyClass(BooleanWritable.class);
>>>>>>>>     job.setMapOutputValueClass(LongWritable.class);
>>>>>>>>
>>>>>>>>     //old API
>>>>>>>>     final JobConf jobconf = ...
>>>>>>>>     jobconf.setOutputKeyClass(BooleanWritable.class);
>>>>>>>>     jobconf.setOutputValueClass(LongWritable.class);
>>>>>>>>
>>>>>>>> Nicholas
>>>>>>>>
>>>>>>>> ________________________________
>>>>>>>> From: Srihari Anantha Padmanabhan <sriharia@yahoo-inc.com>
>>>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>>>> Sent: Wed, January 26, 2011 10:36:09 AM
>>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from
map
>>>>>>>>
>>>>>>>> I am using Hadoop 0.20.2. I just wrote my own map-reduce
program based on
>>>>>>>> the map-reduce tutorial at
>>>>>>>> http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html
>>>>>>>>
>>>>>>>> On Jan 26, 2011, at 10:27 AM, Pedro Costa wrote:
>>>>>>>>
>>>>>>>>> Hadoop 20.1
>>>>>>>>>
>>>>>>>>> On Wed, Jan 26, 2011 at 6:26 PM, Tsz Wo (Nicholas), Sze
>>>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>>>> Hi Srihari,
>>>>>>>>>>
>>>>>>>>>> Same questions to you: Which version of Hadoop are
you using?  And where
>>>>>>>>>> did
>>>>>>>>>> you get the examples?  I guess you were able to
reproduce it.  I suspect
>>>>>>>>>> the
>>>>>>>>>> examples and the Hadoop are in different versions.
>>>>>>>>>>
>>>>>>>>>> Nicholas
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> ________________________________
>>>>>>>>>> From: Srihari Anantha Padmanabhan <sriharia@yahoo-inc.com>
>>>>>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>>>>>> Sent: Wed, January 26, 2011 10:15:08 AM
>>>>>>>>>> Subject: Re: PiEstimator error - Type mismatch in
key from map
>>>>>>>>>>
>>>>>>>>>> I got a similar error before in one of my projects.
I had to set the
>>>>>>>>>> values
>>>>>>>>>> for "mapred.output.key.class" and "mapred.output.value.class".
>>>>>>>>>> That resolved the issue for me.
>>>>>>>>>> Srihari
>>>>>>>>>> On Jan 26, 2011, at 10:09 AM, Pedro Costa wrote:
>>>>>>>>>>
>>>>>>>>>> Yes, I can reproduce it deterministically. But, I
also did some
>>>>>>>>>> changes to the Hadoop MR code. Most definitely this
is the reason. I'm
>>>>>>>>>> looking throughly through the code.
>>>>>>>>>>
>>>>>>>>>> I'll say something after I find the problem.
>>>>>>>>>>
>>>>>>>>>> I was just wondering if this error has happened to
someone before.
>>>>>>>>>> Maybe I could get a hint and try to see what's my
problem easily.
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>>
>>>>>>>>>> On Wed, Jan 26, 2011 at 6:02 PM, Tsz Wo (Nicholas),
Sze
>>>>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>>>>
>>>>>>>>>> Hi Pedro,
>>>>>>>>>>
>>>>>>>>>> This is interesting.  Which version of Hadoop are
you using?  And where
>>>>>>>>>> did
>>>>>>>>>>
>>>>>>>>>> you get the example class files?  Also, are you
able to reproduce it
>>>>>>>>>>
>>>>>>>>>> deterministically?
>>>>>>>>>>
>>>>>>>>>> Nicholas
>>>>>>>>>>
>>>>>>>>>> ________________________________
>>>>>>>>>>
>>>>>>>>>> From: Pedro Costa <psdc1978@gmail.com>
>>>>>>>>>>
>>>>>>>>>> To: mapreduce-user@hadoop.apache.org
>>>>>>>>>>
>>>>>>>>>> Sent: Wed, January 26, 2011 5:47:01 AM
>>>>>>>>>>
>>>>>>>>>> Subject: PiEstimator error - Type mismatch in key
from map
>>>>>>>>>>
>>>>>>>>>> Hi,
>>>>>>>>>>
>>>>>>>>>> I run the PI example of hadoop, and I've got the
following error:
>>>>>>>>>>
>>>>>>>>>> [code]
>>>>>>>>>>
>>>>>>>>>> java.io.IOException: Type mismatch in key from map:
expected
>>>>>>>>>>
>>>>>>>>>> org.apache.hadoop.io.BooleanWritable, recieved
>>>>>>>>>>
>>>>>>>>>> org.apache.hadoop.io.LongWritable
>>>>>>>>>>
>>>>>>>>>>     at
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:885)
>>>>>>>>>>
>>>>>>>>>>     at
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:551)
>>>>>>>>>>
>>>>>>>>>>     at
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:81)
>>>>>>>>>>
>>>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
>>>>>>>>>>
>>>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
>>>>>>>>>>
>>>>>>>>>>     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637)
>>>>>>>>>>
>>>>>>>>>>     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
>>>>>>>>>>
>>>>>>>>>>     at org.apache.hadoop.mapred.Child.main(Child.java:190)
>>>>>>>>>>
>>>>>>>>>> [/code]
>>>>>>>>>>
>>>>>>>>>> I've look at the map function of the class "PiEstimator.class"
and it
>>>>>>>>>> seems
>>>>>>>>>>
>>>>>>>>>> ok.
>>>>>>>>>>
>>>>>>>>>> [code]
>>>>>>>>>>
>>>>>>>>>> public void map(LongWritable offset,
>>>>>>>>>>
>>>>>>>>>>         LongWritable size,
>>>>>>>>>>
>>>>>>>>>>         OutputCollector<BooleanWritable, LongWritable>
out,
>>>>>>>>>>
>>>>>>>>>>         Reporter reporter) throws IOException
{}
>>>>>>>>>>
>>>>>>>>>> [/code]
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> What's wrong with this examples?
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>>
>>>>>>>>>> Pedro
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> Pedro
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> --
>>>>>>>>> Pedro
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Pedro
>>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> --
>>>>>> Pedro
>>>>>
>>>>
>>>>
>>>>
>>>> --
>>>> Pedro
>>>>
>>>
>>>
>>>
>>> --
>>> Chase Bradford
>>>
>>>
>>> “If in physics there's something you don't understand, you can always
>>> hide behind the uncharted depths of nature. But if your program
>>> doesn't work, there is no obstinate nature. If it doesn't work, you've
>>> messed up.”
>>>
>>> - Edsger Dijkstra
>>>
>>
>>
>>
>> --
>> Pedro
>>
>
>
>
> --
> Chase Bradford
>
>
> “If in physics there's something you don't understand, you can always
> hide behind the uncharted depths of nature. But if your program
> doesn't work, there is no obstinate nature. If it doesn't work, you've
> messed up.”
>
> - Edsger Dijkstra
>



-- 
Pedro

Mime
View raw message