hadoop-general mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Ma, Zhiqiang" <zqmas...@gmail.com>
Subject help for beginner
Date Thu, 04 Feb 2010 05:16:52 GMT
Hi All,

I am a beginner of Hadoop. I modified the Inverted Index Code in Yahoo's 
Tutorial  
(http://developer.yahoo.com/hadoop/tutorial/module4.html#solution), but 
I always get errors of "java.io.IOException: Type mismatch in key from 
map: expected org.apache.hadoop.io.Text, recieved 
org.apache.hadoop.io.LongWritable". Could some people tell me what is 
wrong in my code? Thanks a million!

Zhiqiang

----------------------------code 
starts--------------------------------------
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

public class YahooIndex {

   public static class MyMapper extends Mapper<LongWritable, Text, Text, 
Text> {

     private final static Text word = new Text();
     private final static Text location = new Text();

     public void map(LongWritable key, Text val,
         OutputCollector<Text, Text> output, Reporter reporter)
         throws IOException {

       FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
       String fileName = fileSplit.getPath().getName();
       location.set(fileName);

       String line = val.toString();
       StringTokenizer itr = new StringTokenizer(line.toLowerCase());
       while (itr.hasMoreTokens()) {
         word.set(itr.nextToken());
         output.collect(word, location);
       }
     }
   }



   public static class MyReducer extends Reducer<Text, Text, Text, Text> {

     public void reduce(Text key, Iterator<Text> values,
         OutputCollector<Text, Text> output, Reporter reporter)
         throws IOException {

       boolean first = true;
       StringBuilder toReturn = new StringBuilder();
       while (values.hasNext()){
         if (!first)
           toReturn.append(", ");
         first=false;
         toReturn.append(values.next().toString());
       }

       output.collect(key, new Text(toReturn.toString()));
     }
   }


   public static void main(String[] args) throws IOException, 
InterruptedException, ClassNotFoundException {
           Configuration conf = new Configuration();
         Job job = new Job(conf, "Example Hadoop 0.20.1 WordCount");
         job.setJarByClass(YahooIndex.class);
         job.setMapperClass(MyMapper.class);
         job.setReducerClass(MyReducer.class);
         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(Text.class);
         FileInputFormat.addInputPath(job, new Path("input"));
         FileOutputFormat.setOutputPath(job, new Path("output"));
         System.exit(job.waitForCompletion(true) ? 0 : 1);
   }
}
-----------------------------------code 
ends-----------------------------------------------------------------------------------

Mime
View raw message