hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From manish dunani <manishd...@gmail.com>
Subject Re: Want to Sort the values in one line using map reduce
Date Sat, 27 Jul 2013 07:42:53 GMT
     *"Thanks a lot Devraj!!!!!!!!"*


On Sat, Jul 27, 2013 at 10:25 AM, Devaraj k <devaraj.k@huawei.com> wrote:

>  You are almost done to get the desired output. You need to change little
> in the reduce function like this, ****
>
> ** **
>
> *public* *static* *class* ReduceClass *extends* *MapReduceBase* *
> implements*****
>
>       *Reducer*<Text, Text, Text, Text> {****
>
>     Text v = *new* Text();****
>
> ** **
>
>     *public* *void* reduce(Text key, *Iterator*<Text> values,****
>
>         *OutputCollector*<Text, Text> output, *Reporter* reporter)****
>
>         *throws* IOException {****
>
>       StringBuffer value = *new* StringBuffer();****
>
>       *while* (values.hasNext()){****
>
>         value.append(values.next().toString());****
>
>         value.append(",");****
>
>       }****
>
>       v.set(value.toString());****
>
>       output.collect(key, v);****
>
>     }****
>
>   }****
>
> In the above reduce function you can add logical condition to avoid extra
> ‘,’ at end of each value line.****
>
> ** **
>
> Thanks****
>
> Devaraj k****
>
> ** **
>
> *From:* manish dunani [mailto:manishd207@gmail.com]
> *Sent:* 27 July 2013 10:02
> *To:* user@hadoop.apache.org
> *Subject:* Want to Sort the values in one line using map reduce****
>
> ** **
>
> Hi,****
>
> ** **
>
> *I have input file and my data looks like:*****
>
> ** **
>
>    date ****
>
> country****
>
>  city****
>
> pagePath****
>
> visits****
>
> 20120301 ****
>
> India****
>
> Ahmedabad****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Ahmedabad****
>
> /gtuadmissionhelpline-team****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /****
>
> 1****
>
> 20120302 ****
>
> India****
>
> Mumbai****
>
> /merit-calculator****
>
> 1****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> * I wrote the map and reduce application to convert it into page_url by
> city:*****
>
> ** **
>
> ** **
>
> ** **
>
> ** **
>
> package data.ga;****
>
> ** **
>
> import java.io.IOException;****
>
> import java.util.Iterator;****
>
> import org.apache.hadoop.fs.Path;****
>
> import org.apache.hadoop.io.LongWritable;****
>
> import org.apache.hadoop.io.Text;****
>
> import org.apache.hadoop.mapred.FileInputFormat;****
>
> import org.apache.hadoop.mapred.FileOutputFormat;****
>
> import org.apache.hadoop.mapred.JobClient;****
>
> import org.apache.hadoop.mapred.JobConf;****
>
> import org.apache.hadoop.mapred.MapReduceBase;****
>
> import org.apache.hadoop.mapred.Mapper;****
>
> import org.apache.hadoop.mapred.OutputCollector;****
>
> import org.apache.hadoop.mapred.Reducer;****
>
> import org.apache.hadoop.mapred.Reporter;****
>
> import org.apache.hadoop.mapred.TextInputFormat;****
>
> import org.apache.hadoop.mapred.TextOutputFormat;****
>
> ** **
>
> ** **
>
> public class pharmecy ****
>
> {****
>
>             public static class MapClass extends MapReduceBase implements
> Mapper<LongWritable,Text,Text,Text>****
>
>             {****
>
>                         Text k = new Text();****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void map(LongWritable key,Text
> value,OutputCollector<Text,Text>output,Reporter reporter) throws IOException
> ****
>
>                         {****
>
>                                     try****
>
>                                     {****
>
>                                     String[] line =
> value.toString().split(",",5);****
>
>                                     ****
>
>                                     String city = String.valueOf(line[2]);
> ****
>
>                                     String url = String.valueOf(line[3]);*
> ***
>
>                                     ****
>
>                                     k.set(city);****
>
>                                     v.set(url);****
>
>                                     ****
>
>                                     output.collect(k, v);****
>
>                                     }****
>
>                                     catch(Exception e)****
>
>                                     {****
>
>                                                 System.out.println(e);****
>
>                                     }****
>
>                                                                         **
> **
>
>                         }****
>
>             }****
>
>             ****
>
>             public static class ReduceClass extends MapReduceBase
> implements Reducer <Text,Text,Text,Text>****
>
>             {****
>
>                         Text v = new Text();****
>
>                         ****
>
>                         public void reduce(Text key,Iterator<Text>
> values,OutputCollector<Text,Text>output,Reporter reporter) throws
> IOException****
>
>                         {****
>
>                          ****
>
>                         ****
>
>                                     while(values.hasNext())****
>
>                                       ****
>
>                                     {****
>
>                                                 String
> val=values.next().toString();****
>
>                                                 ****
>
>                                                 v.set(val);****
>
>
> ****
>
>                                                 output.collect(key,v);****
>
>                                     ****
>
>                                     ****
>
>                                     }****
>
>                                     ****
>
>                                     ****
>
>                         }****
>
>             ****
>
>             ****
>
>             public static void main(String[] args) {****
>
>                         JobClient client = new JobClient();****
>
>                         JobConf conf = new JobConf(data.ga.pharmecy.class);
> ****
>
> ** **
>
>                         conf.setMapOutputKeyClass(Text.class);****
>
>                         conf.setMapOutputValueClass(Text.class);****
>
>                         // TODO: specify output types****
>
>                         conf.setOutputKeyClass(Text.class);****
>
>                         conf.setOutputValueClass(Text.class);****
>
> ** **
>
>                         FileInputFormat.setInputPaths(conf, new
> Path("hdfs://localhost:54310/user/manish/gadatainput/pharmecydata.txt"));*
> ***
>
>                         FileOutputFormat.setOutputPath(conf, new
> Path("hdfs://localhost:54310/user/manish/gadataoutput11"));****
>
> ** **
>
>                         conf.setInputFormat(TextInputFormat.class);****
>
>                         conf.setOutputFormat(TextOutputFormat.class);****
>
>                         ****
>
>                         conf.setMapperClass(MapClass.class);****
>
>                         conf.setReducerClass(ReduceClass.class);****
>
>                         ****
>
>                         client.setConf(conf);****
>
>                         try {****
>
>                                     JobClient.runJob(conf);****
>
>                         } catch (Exception e) {****
>
>                                     e.printStackTrace();****
>
>                         }****
>
>             }****
>
> ** **
>
>             }****
>
> }****
>
> * *
>
> *Output:***
>
> * *
>
> * *
>
> *#city*                 * #pagepath*****
>
> "Aachen"                 "/medicalcollege/m-p-shah-medical-college"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abbottabad"  "/merit-calculator"****
>
> "Abidjan"
> "/pharmacycollege/shree-swaminarayan-pharmacy-college-kevadiya-colony"****
>
> "Abidjan"
> "/pharmacycollege/amruta-college-of-pharmacy-research-institute-gandhinagar"
> ****
>
> ** **
>
> ** **
>
> *My question is:*****
>
> ** **
>
> I want to convert this output in below format::****
>
> ** **
>
> #city                        #pagepath****
>
> city1                        url1,url2,url3****
>
> city2                        url1,url2,url3****
>
> ** **
>
> Is it possible to convert it in this format using map and reduce ???****
>
> ** **
>
> If yes then how??****
>
> ** **
>
> -- ****
>
> MANISH DUNANI
> -THANX****
>
> ** **
>



-- 
MANISH DUNANI
-THANX
+91 9426881954,+91 8460656443
manishd207@gmail.com

Mime
View raw message