hadoop-common-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Tri Doan <trid...@k-state.edu>
Subject ask problem
Date Thu, 25 Nov 2010 21:31:08 GMT
Thurday 25 Nov 2010
Hi

I would like to write program to count frequency of word in collection of text files. First,
i output every word in document and calculate number of words in that documents that will
be output at the end with key is blank "". I expect combiner function will compute all the
pair with key is blank (""), that will be the total number of words. so last step in reduce
function will have total number of word whic can be use to calculate frequency of word in
collection. Here is my program

import java.io.IOException;
import java.util.*;
        
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.util.*;
        
public class thu {
        
 public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text,
Text> {
    //private final static IntWritable one = new IntWritable(1);
    private Text word = new Text();private Text id = new Text();
    private Text value = new Text();    
    public void map(LongWritable  key, Text value, OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
        String line = value.toString();
        int word_count=0;
        StringTokenizer tokenizer = new StringTokenizer(line);
        while (tokenizer.hasMoreTokens()) {
            word.set(tokenizer.nextToken());
            value.set(Integer.toString(1));   word_count++; 
            output.collect(word, value);
        }
        id.set(" ");value.set(Integer.toString(word_count)); 
        System.out.println(key.toString()+" has number of word "+word_count);
        output.collect(id, value);
    }
 } 
        
 public static class Reduce extends MapReduceBase implements Reducer<Text, Text, Text,
Text> {

    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text>
output, Reporter reporter) throws IOException {
    	double total_word_count=0 ;
        Text value = new Text();
        
       if ( key.toString()==" "   )
        	{	
    	   total_word_count=Double.valueOf(values.next().toString());
    	   System.out.println(key.toString()+" has total "+total_word_count);
        //	   total_word
       //  	total_word_count = value.;
        //    while (values.hasNext()) {
        //    	total_word_count += Double.valueOf(values.next().toString());
         //   }
        	}
        	int word_count =0;
        	 while (values.hasNext()) {
             	 word_count += Double.valueOf(values.next().toString());
             	// value.set(Double.toString(word_count/total_word_count));
             	 }
        value.set(Double.toString(word_count));
        System.out.println(key.toString()+" has "+Double.toString(word_count));
        output.collect(key,value);
   
  }
 }   
 
 public static class Combiner extends MapReduceBase implements Reducer<Text,Text, Text,
Text> {

	    public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text>
output, Reporter reporter) throws IOException {
	        int partial_sum = 0;
	        Text id = new Text();
			Text value = new Text();
			
	        while (values.hasNext()) {
	            partial_sum += Double.valueOf(values.next().toString());
	        }
	        value.set(Double.toString(partial_sum));
	        System.out.println(key.toString()+" has "+Double.toString(partial_sum));
	        output.collect(key, value);
	    }
	 }
 
 public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(thu.class);
    conf.setJobName("thu");
        
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
        
    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
        
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
        
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    FileSystem.get(conf).delete(new Path(args[1]), true);
    
    JobClient.runJob(conf);
 }
        
}

however i obserse that combiner function seems does not work ? 
could you tell me what wrong.

Tri Doan
1429 Laramie Apt 3, Manhattan
KS 66502
USA


Mime
View raw message