hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Chase Bradford <chase.bradf...@gmail.com>
Subject Re: PiEstimator error - Type mismatch in key from map
Date Thu, 27 Jan 2011 17:04:13 GMT
That's very puzzling, because I don't see any reason for the new API
to get activated.  I'm pretty sure that's what's happening though,
based on this section of the exception's call stack:

       at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
       at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637)
       at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)



On Thu, Jan 27, 2011 at 8:50 AM, Pedro Costa <psdc1978@gmail.com> wrote:
> [code]
>
> package org.apache.hadoop.examples;
>
> import java.io.IOException;
> import java.math.BigDecimal;
> import java.util.Iterator;
>
> import org.apache.hadoop.conf.Configured;
> import org.apache.hadoop.fs.FileSystem;
> import org.apache.hadoop.fs.Path;
> import org.apache.hadoop.io.BooleanWritable;
> import org.apache.hadoop.io.LongWritable;
> import org.apache.hadoop.io.SequenceFile;
> import org.apache.hadoop.io.Writable;
> import org.apache.hadoop.io.WritableComparable;
> import org.apache.hadoop.io.SequenceFile.CompressionType;
> import org.apache.hadoop.mapred.FileInputFormat;
> import org.apache.hadoop.mapred.FileOutputFormat;
> import org.apache.hadoop.mapred.JobClient;
> import org.apache.hadoop.mapred.JobConf;
> import org.apache.hadoop.mapred.MapReduceBase;
> import org.apache.hadoop.mapred.Mapper;
> import org.apache.hadoop.mapred.OutputCollector;
> import org.apache.hadoop.mapred.Reducer;
> import org.apache.hadoop.mapred.Reporter;
> import org.apache.hadoop.mapred.SequenceFileInputFormat;
> import org.apache.hadoop.mapred.SequenceFileOutputFormat;
> import org.apache.hadoop.util.Tool;
> import org.apache.hadoop.util.ToolRunner;
>
>
> public class PiEstimator extends Configured implements Tool {
>        /** tmp directory for input/output */
>        static private final Path TMP_DIR = new
> Path(PiEstimator.class.getSimpleName() + "_TMP_3_141592654");
>
>
>        /**
>         * Mapper class for Pi estimation.
>         * Generate points in a unit square
>         * and then count points inside/outside of the inscribed circle of the square.
>         */
>        public static class PiMapper extends MapReduceBase
>        implements Mapper<LongWritable, LongWritable, BooleanWritable, LongWritable>
{
>
>                /** Map method.
>                 * @param offset samples starting from the (offset+1)th sample.
>                 * @param size the number of samples for this map
>                 * @param out output {ture->numInside, false->numOutside}
>                 * @param reporter
>                 */
>                public void map(LongWritable offset,
>                                LongWritable size,
>                                OutputCollector<BooleanWritable, LongWritable>
out,
>                                Reporter reporter) throws IOException
{
>
>                        final HaltonSequence haltonsequence = new HaltonSequence(offset.get());
>                        long numInside = 0L;
>                        long numOutside = 0L;
>
>                        for(long i = 0; i < size.get(); ) {
>                                //generate points in a unit square
>                                final double[] point = haltonsequence.nextPoint();
>
>                                //count points inside/outside of the inscribed
circle of the square
>                                final double x = point[0] - 0.5;
>                                final double y = point[1] - 0.5;
>                                if (x*x + y*y > 0.25) {
>                                        numOutside++;
>                                } else {
>                                        numInside++;
>                                }
>
>                                //report status
>                                i++;
>                                if (i % 1000 == 0) {
>                                        reporter.setStatus("Generated
" + i + " samples.");
>                                }
>                        }
>
>                        //output map results
>                        out.collect(new BooleanWritable(true), new LongWritable(numInside));
>                        out.collect(new BooleanWritable(false), new LongWritable(numOutside));
>                }
>        }
>
>
>
>        /**
>         * Run a map/reduce job for estimating Pi.
>         *
>         * @return the estimated value of Pi
>         */
>        public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf)
>        throws IOException {
>                //setup job conf
>                jobConf.setJobName(PiEstimator.class.getSimpleName());
>
>                jobConf.setInputFormat(SequenceFileInputFormat.class);
>
>                jobConf.setOutputKeyClass(BooleanWritable.class);
>                jobConf.setOutputValueClass(LongWritable.class);
>                //              jobConf.setMapOutputKeyClass(BooleanWritable.class);
>                //              jobConf.setMapOutputValueClass(LongWritable.class);
>                jobConf.setOutputFormat(SequenceFileOutputFormat.class);
>
>                jobConf.setMapperClass(PiMapper.class);
>                jobConf.setNumMapTasks(numMaps);
>
>                jobConf.setReducerClass(PiReducer.class);
>                jobConf.setNumReduceTasks(1);
>
>                // turn off speculative execution, because DFS doesn't handle
>                // multiple writers to the same file.
>                jobConf.setSpeculativeExecution(false);
>
>                //setup input/output directories
>                final Path inDir = new Path(TMP_DIR, "in");
>                final Path outDir = new Path(TMP_DIR, "out");
>                FileInputFormat.setInputPaths(jobConf, inDir);
>                FileOutputFormat.setOutputPath(jobConf, outDir);
>
>                final FileSystem fs = FileSystem.get(jobConf);
>                if (fs.exists(TMP_DIR)) {
>                        throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR)
>                                        + " already exists.  Please
remove it first.");
>                }
>                if (!fs.mkdirs(inDir)) {
>                        throw new IOException("Cannot create input directory
" + inDir);
>                }
>
>                try {
>                        //generate an input file for each map task
>                        for(int i=0; i < numMaps; ++i) {
>                                final Path file = new Path(inDir, "part"+i);
>                                final LongWritable offset = new LongWritable(i
* numPoints);
>                                final LongWritable size = new LongWritable(numPoints);
>                                final SequenceFile.Writer writer = SequenceFile.createWriter(
>                                                fs, jobConf, file,
>                                                LongWritable.class,
LongWritable.class, CompressionType.NONE);
>                                try {
>                                        writer.append(offset, size);
>                                } finally {
>                                        writer.close();
>                                }
>                                System.out.println("Wrote input for Map
#"+i);
>                        }
>
>                        //start a map/reduce job
>                        System.out.println("Starting Job");
>                        final long startTime = System.currentTimeMillis();
>                        JobClient.runJob(jobConf);
>                        final double duration = (System.currentTimeMillis()
- startTime)/1000.0;
>                        System.out.println("Job Finished in " + duration +
" seconds");
>
>                        //read outputs
>                        Path inFile = new Path(outDir, "reduce-out");
>                        LongWritable numInside = new LongWritable();
>                        LongWritable numOutside = new LongWritable();
>                        SequenceFile.Reader reader = new SequenceFile.Reader(fs,
inFile, jobConf);
>                        try {
>                                reader.next(numInside, numOutside);
>                        } finally {
>                                reader.close();
>                        }
>
>                        //compute estimated value
>                        return BigDecimal.valueOf(4).setScale(20)
>                        .multiply(BigDecimal.valueOf(numInside.get()))
>                        .divide(BigDecimal.valueOf(numMaps))
>                        .divide(BigDecimal.valueOf(numPoints));
>                } finally {
>                        fs.delete(TMP_DIR, true);
>                }
>        }
>
>        /**
>         * Parse arguments and then runs a map/reduce job.
>         * Print output in standard out.
>         *
>         * @return a non-zero if there is an error.  Otherwise, return 0.
>         */
>        public int run(String[] args) throws Exception {
>                if (args.length != 2) {
>                        System.err.println("Usage: "+getClass().getName()+"
<nMaps> <nSamples>");
>                        ToolRunner.printGenericCommandUsage(System.err);
>                        return -1;
>                }
>
>                final int nMaps = Integer.parseInt(args[0]);
>                final long nSamples = Long.parseLong(args[1]);
>
>                System.out.println("Number of Maps  = " + nMaps);
>                System.out.println("Samples per Map = " + nSamples);
>
>                final JobConf jobConf = new JobConf(getConf(), getClass());
>                System.out.println("Estimated value of Pi is "
>                                + estimate(nMaps, nSamples, jobConf));
>                return 0;
>        }
> }
>
> [/code]
>
>
>
> On Thu, Jan 27, 2011 at 4:44 PM, Chase Bradford
> <chase.bradford@gmail.com> wrote:
>> That should be fine, but mapreduce.Mapper.map has this signature:
>>
>> map(K key, V value, Context)
>>
>> Your PiEstimator map signature doesn't match, so it's not overriding
>> the proper function and is never getting called by the framework.
>>
>> Could you paste your complete PiMapper class definition and the series
>> of calls you make to setup your job?  That would make debugging the
>> problem much easier.
>>
>> Chase
>>
>>
>> On Thu, Jan 27, 2011 at 8:29 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>>> Yes, that's the one that's being used ( o.a.h.mapreduce.Mapper ). This
>>> is not the right one to use?
>>>
>>>
>>>
>>> On Thu, Jan 27, 2011 at 3:40 PM, Chase Bradford
>>> <chase.bradford@gmail.com> wrote:
>>>> Are you sure the function signature for you Mapper's map matches the super
class, and that you specified your Map class in the job setup?  It sounds a bit like the
base o.a.h.mapreduce.Mapper map implementation is being used instead.
>>>>
>>>>
>>>> On Jan 27, 2011, at 2:36 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>>>>
>>>>> The map output class are well defined:
>>>>> keyClass: class org.apache.hadoop.io.BooleanWritable - valClass: class
>>>>> org.apache.hadoop.io.LongWritable
>>>>>
>>>>> but executing the pi example, the values that map function passes is:
>>>>> keyClass: class org.apache.hadoop.io.LongWritable - valClass: class
>>>>> org.apache.hadoop.io.Text
>>>>>
>>>>>
>>>>> I looked at the PiEstimator.class.PiMapper#map function, and the
>>>>> output collector seems ok.
>>>>>
>>>>> [code]
>>>>> public void map(LongWritable offset,
>>>>>        LongWritable size,
>>>>>        OutputCollector<BooleanWritable, LongWritable> out,
>>>>>        Reporter reporter) throws IOException {
>>>>> (...)
>>>>> out.collect(new BooleanWritable(true), new LongWritable(numInside));
>>>>> out.collect(new BooleanWritable(false), new LongWritable(numOutside));
>>>>> }
>>>>> [/code]
>>>>>
>>>>> I'm really confused, right now. How can this be happening?
>>>>>
>>>>>
>>>>> On Thu, Jan 27, 2011 at 10:19 AM, Pedro Costa <psdc1978@gmail.com>
wrote:
>>>>>> Thanks Nicholas, but it didn't worked.
>>>>>>
>>>>>> Can I do a remote debugging on hadoop examples? I really like to
put a
>>>>>> breakpoint in the Pi class.
>>>>>>
>>>>>> Thanks,
>>>>>>
>>>>>> On Wed, Jan 26, 2011 at 6:46 PM, Tsz Wo (Nicholas), Sze
>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>> Okay, I got it now.  You were talking about your programs but
not the
>>>>>>> PiEstimator example came from Hadoop.  Then, you have to set
>>>>>>> "mapred.output.key.class" and "mapred.output.value.class" as
Srihari
>>>>>>> mentioned.  Below are the APIs.
>>>>>>>
>>>>>>>     //new API
>>>>>>>     final Job job = ...
>>>>>>>     job.setMapOutputKeyClass(BooleanWritable.class);
>>>>>>>     job.setMapOutputValueClass(LongWritable.class);
>>>>>>>
>>>>>>>     //old API
>>>>>>>     final JobConf jobconf = ...
>>>>>>>     jobconf.setOutputKeyClass(BooleanWritable.class);
>>>>>>>     jobconf.setOutputValueClass(LongWritable.class);
>>>>>>>
>>>>>>> Nicholas
>>>>>>>
>>>>>>> ________________________________
>>>>>>> From: Srihari Anantha Padmanabhan <sriharia@yahoo-inc.com>
>>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>>> Sent: Wed, January 26, 2011 10:36:09 AM
>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map
>>>>>>>
>>>>>>> I am using Hadoop 0.20.2. I just wrote my own map-reduce program
based on
>>>>>>> the map-reduce tutorial at
>>>>>>> http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html
>>>>>>>
>>>>>>> On Jan 26, 2011, at 10:27 AM, Pedro Costa wrote:
>>>>>>>
>>>>>>>> Hadoop 20.1
>>>>>>>>
>>>>>>>> On Wed, Jan 26, 2011 at 6:26 PM, Tsz Wo (Nicholas), Sze
>>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>>> Hi Srihari,
>>>>>>>>>
>>>>>>>>> Same questions to you: Which version of Hadoop are you
using?  And where
>>>>>>>>> did
>>>>>>>>> you get the examples?  I guess you were able to reproduce
it.  I suspect
>>>>>>>>> the
>>>>>>>>> examples and the Hadoop are in different versions.
>>>>>>>>>
>>>>>>>>> Nicholas
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> ________________________________
>>>>>>>>> From: Srihari Anantha Padmanabhan <sriharia@yahoo-inc.com>
>>>>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>>>>> Sent: Wed, January 26, 2011 10:15:08 AM
>>>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key
from map
>>>>>>>>>
>>>>>>>>> I got a similar error before in one of my projects. I
had to set the
>>>>>>>>> values
>>>>>>>>> for "mapred.output.key.class" and "mapred.output.value.class".
>>>>>>>>> That resolved the issue for me.
>>>>>>>>> Srihari
>>>>>>>>> On Jan 26, 2011, at 10:09 AM, Pedro Costa wrote:
>>>>>>>>>
>>>>>>>>> Yes, I can reproduce it deterministically. But, I also
did some
>>>>>>>>> changes to the Hadoop MR code. Most definitely this is
the reason. I'm
>>>>>>>>> looking throughly through the code.
>>>>>>>>>
>>>>>>>>> I'll say something after I find the problem.
>>>>>>>>>
>>>>>>>>> I was just wondering if this error has happened to someone
before.
>>>>>>>>> Maybe I could get a hint and try to see what's my problem
easily.
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>> On Wed, Jan 26, 2011 at 6:02 PM, Tsz Wo (Nicholas), Sze
>>>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>>>
>>>>>>>>> Hi Pedro,
>>>>>>>>>
>>>>>>>>> This is interesting.  Which version of Hadoop are you
using?  And where
>>>>>>>>> did
>>>>>>>>>
>>>>>>>>> you get the example class files?  Also, are you able
to reproduce it
>>>>>>>>>
>>>>>>>>> deterministically?
>>>>>>>>>
>>>>>>>>> Nicholas
>>>>>>>>>
>>>>>>>>> ________________________________
>>>>>>>>>
>>>>>>>>> From: Pedro Costa <psdc1978@gmail.com>
>>>>>>>>>
>>>>>>>>> To: mapreduce-user@hadoop.apache.org
>>>>>>>>>
>>>>>>>>> Sent: Wed, January 26, 2011 5:47:01 AM
>>>>>>>>>
>>>>>>>>> Subject: PiEstimator error - Type mismatch in key from
map
>>>>>>>>>
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> I run the PI example of hadoop, and I've got the following
error:
>>>>>>>>>
>>>>>>>>> [code]
>>>>>>>>>
>>>>>>>>> java.io.IOException: Type mismatch in key from map: expected
>>>>>>>>>
>>>>>>>>> org.apache.hadoop.io.BooleanWritable, recieved
>>>>>>>>>
>>>>>>>>> org.apache.hadoop.io.LongWritable
>>>>>>>>>
>>>>>>>>>     at
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:885)
>>>>>>>>>
>>>>>>>>>     at
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:551)
>>>>>>>>>
>>>>>>>>>     at
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:81)
>>>>>>>>>
>>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
>>>>>>>>>
>>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
>>>>>>>>>
>>>>>>>>>     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637)
>>>>>>>>>
>>>>>>>>>     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
>>>>>>>>>
>>>>>>>>>     at org.apache.hadoop.mapred.Child.main(Child.java:190)
>>>>>>>>>
>>>>>>>>> [/code]
>>>>>>>>>
>>>>>>>>> I've look at the map function of the class "PiEstimator.class"
and it
>>>>>>>>> seems
>>>>>>>>>
>>>>>>>>> ok.
>>>>>>>>>
>>>>>>>>> [code]
>>>>>>>>>
>>>>>>>>> public void map(LongWritable offset,
>>>>>>>>>
>>>>>>>>>         LongWritable size,
>>>>>>>>>
>>>>>>>>>         OutputCollector<BooleanWritable, LongWritable>
out,
>>>>>>>>>
>>>>>>>>>         Reporter reporter) throws IOException {}
>>>>>>>>>
>>>>>>>>> [/code]
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> What's wrong with this examples?
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>>
>>>>>>>>> --
>>>>>>>>>
>>>>>>>>> Pedro
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> --
>>>>>>>>> Pedro
>>>>>>>>>
>>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> --
>>>>>>>> Pedro
>>>>>>>
>>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> --
>>>>>> Pedro
>>>>>>
>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Pedro
>>>>
>>>
>>>
>>>
>>> --
>>> Pedro
>>>
>>
>>
>>
>> --
>> Chase Bradford
>>
>>
>> “If in physics there's something you don't understand, you can always
>> hide behind the uncharted depths of nature. But if your program
>> doesn't work, there is no obstinate nature. If it doesn't work, you've
>> messed up.”
>>
>> - Edsger Dijkstra
>>
>
>
>
> --
> Pedro
>



-- 
Chase Bradford


“If in physics there's something you don't understand, you can always
hide behind the uncharted depths of nature. But if your program
doesn't work, there is no obstinate nature. If it doesn't work, you've
messed up.”

- Edsger Dijkstra

Mime
View raw message