Hi All,
I am a beginner of Hadoop. I modified the Inverted Index Code in Yahoo's
Tutorial
(http://developer.yahoo.com/hadoop/tutorial/module4.html#solution), but
I always get errors of "java.io.IOException: Type mismatch in key from
map: expected org.apache.hadoop.io.Text, recieved
org.apache.hadoop.io.LongWritable". Could some people tell me what is
wrong in my code? Thanks a million!
Zhiqiang
----------------------------code
starts--------------------------------------
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
public class YahooIndex {
public static class MyMapper extends Mapper<LongWritable, Text, Text,
Text> {
private final static Text word = new Text();
private final static Text location = new Text();
public void map(LongWritable key, Text val,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
String fileName = fileSplit.getPath().getName();
location.set(fileName);
String line = val.toString();
StringTokenizer itr = new StringTokenizer(line.toLowerCase());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
output.collect(word, location);
}
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterator<Text> values,
OutputCollector<Text, Text> output, Reporter reporter)
throws IOException {
boolean first = true;
StringBuilder toReturn = new StringBuilder();
while (values.hasNext()){
if (!first)
toReturn.append(", ");
first=false;
toReturn.append(values.next().toString());
}
output.collect(key, new Text(toReturn.toString()));
}
}
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
Job job = new Job(conf, "Example Hadoop 0.20.1 WordCount");
job.setJarByClass(YahooIndex.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, new Path("output"));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
-----------------------------------code
ends-----------------------------------------------------------------------------------
|