hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Devaraj k <devara...@huawei.com>
Subject RE: Want to Sort the values in one line using map reduce
Date Sat, 27 Jul 2013 04:55:32 GMT
You are almost done to get the desired output. You need to change little in the reduce function
like this,

public static class ReduceClass extends MapReduceBase implements
      Reducer<Text, Text, Text, Text> {
    Text v = new Text();

    public void reduce(Text key, Iterator<Text> values,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      StringBuffer value = new StringBuffer();
      while (values.hasNext()){
        value.append(values.next().toString());
        value.append(",");
      }
      v.set(value.toString());
      output.collect(key, v);
    }
  }
In the above reduce function you can add logical condition to avoid extra ',' at end of each
value line.

Thanks
Devaraj k

From: manish dunani [mailto:manishd207@gmail.com]
Sent: 27 July 2013 10:02
To: user@hadoop.apache.org
Subject: Want to Sort the values in one line using map reduce

Hi,

I have input file and my data looks like:

date

country

 city

pagePath

visits

20120301

India

Ahmedabad

/

1

20120302

India

Ahmedabad

/gtuadmissionhelpline-team

1

20120302

India

Mumbai

/

1

20120302

India

Mumbai

/merit-calculator

1





 I wrote the map and reduce application to convert it into page_url by city:




package data.ga<http://data.ga>;

import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;


public class pharmecy
{
            public static class MapClass extends MapReduceBase implements Mapper<LongWritable,Text,Text,Text>
            {
                        Text k = new Text();
                        Text v = new Text();

                        public void map(LongWritable key,Text value,OutputCollector<Text,Text>output,Reporter
reporter) throws IOException
                        {
                                    try
                                    {
                                    String[] line = value.toString().split(",",5);

                                    String city = String.valueOf(line[2]);
                                    String url = String.valueOf(line[3]);

                                    k.set(city);
                                    v.set(url);

                                    output.collect(k, v);
                                    }
                                    catch(Exception e)
                                    {
                                                System.out.println(e);
                                    }

                        }
            }

            public static class ReduceClass extends MapReduceBase implements Reducer <Text,Text,Text,Text>
            {
                        Text v = new Text();

                        public void reduce(Text key,Iterator<Text> values,OutputCollector<Text,Text>output,Reporter
reporter) throws IOException
                        {


                                    while(values.hasNext())

                                    {
                                                String val=values.next().toString();

                                                v.set(val);

                                                output.collect(key,v);


                                    }


                        }


            public static void main(String[] args) {
                        JobClient client = new JobClient();
                        JobConf conf = new JobConf(data.ga.pharmecy.class);

                        conf.setMapOutputKeyClass(Text.class);
                        conf.setMapOutputValueClass(Text.class);
                        // TODO: specify output types
                        conf.setOutputKeyClass(Text.class);
                        conf.setOutputValueClass(Text.class);

                        FileInputFormat.setInputPaths(conf, new Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));
                        FileOutputFormat.setOutputPath(conf, new Path("hdfs://localhost:54310/user/manish/gadataoutput11"));

                        conf.setInputFormat(TextInputFormat.class);
                        conf.setOutputFormat(TextOutputFormat.class);

                        conf.setMapperClass(MapClass.class);
                        conf.setReducerClass(ReduceClass.class);

                        client.setConf(conf);
                        try {
                                    JobClient.runJob(conf);
                        } catch (Exception e) {
                                    e.printStackTrace();
                        }
            }

            }
}

Output:


#city                  #pagepath
"Aachen"                 "/medicalcollege/m-p-shah-medical-college"
"Abbottabad"  "/merit-calculator"
"Abbottabad"  "/merit-calculator"
"Abidjan"                "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"
"Abidjan"                "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"


My question is:

I want to convert this output in below format::

#city                        #pagepath
city1                        url1,url2,url3
city2                        url1,url2,url3

Is it possible to convert it in this format using map and reduce ???

If yes then how??

--
MANISH DUNANI
-THANX


Mime
View raw message