hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Pedro Costa <psdc1...@gmail.com>
Subject Re: PiEstimator error - Type mismatch in key from map
Date Thu, 27 Jan 2011 16:50:44 GMT
[code]

package org.apache.hadoop.examples;

import java.io.IOException;
import java.math.BigDecimal;
import java.util.Iterator;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.SequenceFile.CompressionType;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;


public class PiEstimator extends Configured implements Tool {
	/** tmp directory for input/output */
	static private final Path TMP_DIR = new
Path(PiEstimator.class.getSimpleName() + "_TMP_3_141592654");


	/**
	 * Mapper class for Pi estimation.
	 * Generate points in a unit square
	 * and then count points inside/outside of the inscribed circle of the square.
	 */
	public static class PiMapper extends MapReduceBase
	implements Mapper<LongWritable, LongWritable, BooleanWritable, LongWritable> {

		/** Map method.
		 * @param offset samples starting from the (offset+1)th sample.
		 * @param size the number of samples for this map
		 * @param out output {ture->numInside, false->numOutside}
		 * @param reporter
		 */
		public void map(LongWritable offset,
				LongWritable size,
				OutputCollector<BooleanWritable, LongWritable> out,
				Reporter reporter) throws IOException {

			final HaltonSequence haltonsequence = new HaltonSequence(offset.get());
			long numInside = 0L;
			long numOutside = 0L;

			for(long i = 0; i < size.get(); ) {
				//generate points in a unit square
				final double[] point = haltonsequence.nextPoint();

				//count points inside/outside of the inscribed circle of the square
				final double x = point[0] - 0.5;
				final double y = point[1] - 0.5;
				if (x*x + y*y > 0.25) {
					numOutside++;
				} else {
					numInside++;
				}

				//report status
				i++;
				if (i % 1000 == 0) {
					reporter.setStatus("Generated " + i + " samples.");
				}
			}

			//output map results
			out.collect(new BooleanWritable(true), new LongWritable(numInside));
			out.collect(new BooleanWritable(false), new LongWritable(numOutside));
		}
	}



	/**
	 * Run a map/reduce job for estimating Pi.
	 *
	 * @return the estimated value of Pi
	 */
	public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf)
	throws IOException {
		//setup job conf
		jobConf.setJobName(PiEstimator.class.getSimpleName());

		jobConf.setInputFormat(SequenceFileInputFormat.class);

		jobConf.setOutputKeyClass(BooleanWritable.class);
		jobConf.setOutputValueClass(LongWritable.class);
		//		jobConf.setMapOutputKeyClass(BooleanWritable.class);
		//		jobConf.setMapOutputValueClass(LongWritable.class);
		jobConf.setOutputFormat(SequenceFileOutputFormat.class);

		jobConf.setMapperClass(PiMapper.class);
		jobConf.setNumMapTasks(numMaps);

		jobConf.setReducerClass(PiReducer.class);
		jobConf.setNumReduceTasks(1);

		// turn off speculative execution, because DFS doesn't handle
		// multiple writers to the same file.
		jobConf.setSpeculativeExecution(false);

		//setup input/output directories
		final Path inDir = new Path(TMP_DIR, "in");
		final Path outDir = new Path(TMP_DIR, "out");
		FileInputFormat.setInputPaths(jobConf, inDir);
		FileOutputFormat.setOutputPath(jobConf, outDir);

		final FileSystem fs = FileSystem.get(jobConf);
		if (fs.exists(TMP_DIR)) {
			throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR)
					+ " already exists.  Please remove it first.");
		}
		if (!fs.mkdirs(inDir)) {
			throw new IOException("Cannot create input directory " + inDir);
		}

		try {
			//generate an input file for each map task
			for(int i=0; i < numMaps; ++i) {
				final Path file = new Path(inDir, "part"+i);
				final LongWritable offset = new LongWritable(i * numPoints);
				final LongWritable size = new LongWritable(numPoints);
				final SequenceFile.Writer writer = SequenceFile.createWriter(
						fs, jobConf, file,
						LongWritable.class, LongWritable.class, CompressionType.NONE);
				try {
					writer.append(offset, size);
				} finally {
					writer.close();
				}
				System.out.println("Wrote input for Map #"+i);
			}

			//start a map/reduce job
			System.out.println("Starting Job");
			final long startTime = System.currentTimeMillis();
			JobClient.runJob(jobConf);
			final double duration = (System.currentTimeMillis() - startTime)/1000.0;
			System.out.println("Job Finished in " + duration + " seconds");

			//read outputs
			Path inFile = new Path(outDir, "reduce-out");
			LongWritable numInside = new LongWritable();
			LongWritable numOutside = new LongWritable();
			SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
			try {
				reader.next(numInside, numOutside);
			} finally {
				reader.close();
			}

			//compute estimated value
			return BigDecimal.valueOf(4).setScale(20)
			.multiply(BigDecimal.valueOf(numInside.get()))
			.divide(BigDecimal.valueOf(numMaps))
			.divide(BigDecimal.valueOf(numPoints));
		} finally {
			fs.delete(TMP_DIR, true);
		}
	}

	/**
	 * Parse arguments and then runs a map/reduce job.
	 * Print output in standard out.
	 *
	 * @return a non-zero if there is an error.  Otherwise, return 0.
	 */
	public int run(String[] args) throws Exception {
		if (args.length != 2) {
			System.err.println("Usage: "+getClass().getName()+" <nMaps> <nSamples>");
			ToolRunner.printGenericCommandUsage(System.err);
			return -1;
		}

		final int nMaps = Integer.parseInt(args[0]);
		final long nSamples = Long.parseLong(args[1]);

		System.out.println("Number of Maps  = " + nMaps);
		System.out.println("Samples per Map = " + nSamples);

		final JobConf jobConf = new JobConf(getConf(), getClass());
		System.out.println("Estimated value of Pi is "
				+ estimate(nMaps, nSamples, jobConf));
		return 0;
	}
}

[/code]



On Thu, Jan 27, 2011 at 4:44 PM, Chase Bradford
<chase.bradford@gmail.com> wrote:
> That should be fine, but mapreduce.Mapper.map has this signature:
>
> map(K key, V value, Context)
>
> Your PiEstimator map signature doesn't match, so it's not overriding
> the proper function and is never getting called by the framework.
>
> Could you paste your complete PiMapper class definition and the series
> of calls you make to setup your job?  That would make debugging the
> problem much easier.
>
> Chase
>
>
> On Thu, Jan 27, 2011 at 8:29 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>> Yes, that's the one that's being used ( o.a.h.mapreduce.Mapper ). This
>> is not the right one to use?
>>
>>
>>
>> On Thu, Jan 27, 2011 at 3:40 PM, Chase Bradford
>> <chase.bradford@gmail.com> wrote:
>>> Are you sure the function signature for you Mapper's map matches the super class,
and that you specified your Map class in the job setup?  It sounds a bit like the base o.a.h.mapreduce.Mapper
map implementation is being used instead.
>>>
>>>
>>> On Jan 27, 2011, at 2:36 AM, Pedro Costa <psdc1978@gmail.com> wrote:
>>>
>>>> The map output class are well defined:
>>>> keyClass: class org.apache.hadoop.io.BooleanWritable - valClass: class
>>>> org.apache.hadoop.io.LongWritable
>>>>
>>>> but executing the pi example, the values that map function passes is:
>>>> keyClass: class org.apache.hadoop.io.LongWritable - valClass: class
>>>> org.apache.hadoop.io.Text
>>>>
>>>>
>>>> I looked at the PiEstimator.class.PiMapper#map function, and the
>>>> output collector seems ok.
>>>>
>>>> [code]
>>>> public void map(LongWritable offset,
>>>>        LongWritable size,
>>>>        OutputCollector<BooleanWritable, LongWritable> out,
>>>>        Reporter reporter) throws IOException {
>>>> (...)
>>>> out.collect(new BooleanWritable(true), new LongWritable(numInside));
>>>> out.collect(new BooleanWritable(false), new LongWritable(numOutside));
>>>> }
>>>> [/code]
>>>>
>>>> I'm really confused, right now. How can this be happening?
>>>>
>>>>
>>>> On Thu, Jan 27, 2011 at 10:19 AM, Pedro Costa <psdc1978@gmail.com>
wrote:
>>>>> Thanks Nicholas, but it didn't worked.
>>>>>
>>>>> Can I do a remote debugging on hadoop examples? I really like to put
a
>>>>> breakpoint in the Pi class.
>>>>>
>>>>> Thanks,
>>>>>
>>>>> On Wed, Jan 26, 2011 at 6:46 PM, Tsz Wo (Nicholas), Sze
>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>> Okay, I got it now.  You were talking about your programs but not
the
>>>>>> PiEstimator example came from Hadoop.  Then, you have to set
>>>>>> "mapred.output.key.class" and "mapred.output.value.class" as Srihari
>>>>>> mentioned.  Below are the APIs.
>>>>>>
>>>>>>     //new API
>>>>>>     final Job job = ...
>>>>>>     job.setMapOutputKeyClass(BooleanWritable.class);
>>>>>>     job.setMapOutputValueClass(LongWritable.class);
>>>>>>
>>>>>>     //old API
>>>>>>     final JobConf jobconf = ...
>>>>>>     jobconf.setOutputKeyClass(BooleanWritable.class);
>>>>>>     jobconf.setOutputValueClass(LongWritable.class);
>>>>>>
>>>>>> Nicholas
>>>>>>
>>>>>> ________________________________
>>>>>> From: Srihari Anantha Padmanabhan <sriharia@yahoo-inc.com>
>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>> Sent: Wed, January 26, 2011 10:36:09 AM
>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from map
>>>>>>
>>>>>> I am using Hadoop 0.20.2. I just wrote my own map-reduce program
based on
>>>>>> the map-reduce tutorial at
>>>>>> http://hadoop.apache.org/common/docs/r0.20.2/mapred_tutorial.html
>>>>>>
>>>>>> On Jan 26, 2011, at 10:27 AM, Pedro Costa wrote:
>>>>>>
>>>>>>> Hadoop 20.1
>>>>>>>
>>>>>>> On Wed, Jan 26, 2011 at 6:26 PM, Tsz Wo (Nicholas), Sze
>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>> Hi Srihari,
>>>>>>>>
>>>>>>>> Same questions to you: Which version of Hadoop are you using?
 And where
>>>>>>>> did
>>>>>>>> you get the examples?  I guess you were able to reproduce
it.  I suspect
>>>>>>>> the
>>>>>>>> examples and the Hadoop are in different versions.
>>>>>>>>
>>>>>>>> Nicholas
>>>>>>>>
>>>>>>>>
>>>>>>>> ________________________________
>>>>>>>> From: Srihari Anantha Padmanabhan <sriharia@yahoo-inc.com>
>>>>>>>> To: "mapreduce-user@hadoop.apache.org" <mapreduce-user@hadoop.apache.org>
>>>>>>>> Sent: Wed, January 26, 2011 10:15:08 AM
>>>>>>>> Subject: Re: PiEstimator error - Type mismatch in key from
map
>>>>>>>>
>>>>>>>> I got a similar error before in one of my projects. I had
to set the
>>>>>>>> values
>>>>>>>> for "mapred.output.key.class" and "mapred.output.value.class".
>>>>>>>> That resolved the issue for me.
>>>>>>>> Srihari
>>>>>>>> On Jan 26, 2011, at 10:09 AM, Pedro Costa wrote:
>>>>>>>>
>>>>>>>> Yes, I can reproduce it deterministically. But, I also did
some
>>>>>>>> changes to the Hadoop MR code. Most definitely this is the
reason. I'm
>>>>>>>> looking throughly through the code.
>>>>>>>>
>>>>>>>> I'll say something after I find the problem.
>>>>>>>>
>>>>>>>> I was just wondering if this error has happened to someone
before.
>>>>>>>> Maybe I could get a hint and try to see what's my problem
easily.
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>> On Wed, Jan 26, 2011 at 6:02 PM, Tsz Wo (Nicholas), Sze
>>>>>>>> <s29752-hadoopuser@yahoo.com> wrote:
>>>>>>>>
>>>>>>>> Hi Pedro,
>>>>>>>>
>>>>>>>> This is interesting.  Which version of Hadoop are you using?
 And where
>>>>>>>> did
>>>>>>>>
>>>>>>>> you get the example class files?  Also, are you able to
reproduce it
>>>>>>>>
>>>>>>>> deterministically?
>>>>>>>>
>>>>>>>> Nicholas
>>>>>>>>
>>>>>>>> ________________________________
>>>>>>>>
>>>>>>>> From: Pedro Costa <psdc1978@gmail.com>
>>>>>>>>
>>>>>>>> To: mapreduce-user@hadoop.apache.org
>>>>>>>>
>>>>>>>> Sent: Wed, January 26, 2011 5:47:01 AM
>>>>>>>>
>>>>>>>> Subject: PiEstimator error - Type mismatch in key from map
>>>>>>>>
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> I run the PI example of hadoop, and I've got the following
error:
>>>>>>>>
>>>>>>>> [code]
>>>>>>>>
>>>>>>>> java.io.IOException: Type mismatch in key from map: expected
>>>>>>>>
>>>>>>>> org.apache.hadoop.io.BooleanWritable, recieved
>>>>>>>>
>>>>>>>> org.apache.hadoop.io.LongWritable
>>>>>>>>
>>>>>>>>     at
>>>>>>>>
>>>>>>>>
>>>>>>>> org.apache.hadoop.mapred.MapTask$MapOutputBuffer.collect(MapTask.java:885)
>>>>>>>>
>>>>>>>>     at
>>>>>>>>
>>>>>>>>
>>>>>>>> org.apache.hadoop.mapred.MapTask$NewOutputCollector.write(MapTask.java:551)
>>>>>>>>
>>>>>>>>     at
>>>>>>>>
>>>>>>>>
>>>>>>>> org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:81)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.map(Mapper.java:124)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapreduce.Mapper.run(Mapper.java:144)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:637)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
>>>>>>>>
>>>>>>>>     at org.apache.hadoop.mapred.Child.main(Child.java:190)
>>>>>>>>
>>>>>>>> [/code]
>>>>>>>>
>>>>>>>> I've look at the map function of the class "PiEstimator.class"
and it
>>>>>>>> seems
>>>>>>>>
>>>>>>>> ok.
>>>>>>>>
>>>>>>>> [code]
>>>>>>>>
>>>>>>>> public void map(LongWritable offset,
>>>>>>>>
>>>>>>>>         LongWritable size,
>>>>>>>>
>>>>>>>>         OutputCollector<BooleanWritable, LongWritable>
out,
>>>>>>>>
>>>>>>>>         Reporter reporter) throws IOException {}
>>>>>>>>
>>>>>>>> [/code]
>>>>>>>>
>>>>>>>>
>>>>>>>> What's wrong with this examples?
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>>
>>>>>>>> --
>>>>>>>>
>>>>>>>> Pedro
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> --
>>>>>>>> Pedro
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> --
>>>>>>> Pedro
>>>>>>
>>>>>>
>>>>>
>>>>>
>>>>>
>>>>> --
>>>>> Pedro
>>>>>
>>>>
>>>>
>>>>
>>>> --
>>>> Pedro
>>>
>>
>>
>>
>> --
>> Pedro
>>
>
>
>
> --
> Chase Bradford
>
>
> “If in physics there's something you don't understand, you can always
> hide behind the uncharted depths of nature. But if your program
> doesn't work, there is no obstinate nature. If it doesn't work, you've
> messed up.”
>
> - Edsger Dijkstra
>



-- 
Pedro

Mime
View raw message