hadoop-hdfs-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Ranjini Rathinam <ranjinibe...@gmail.com>
Subject Re: Fine tunning
Date Tue, 07 Jan 2014 03:56:14 GMT
Hi,

I have a table in hbase named currencymaster

For Eg:
id,currency
1,INR
2,USD

Now I am dumping the text file containing currency as one of the field in
tableCurrency table in hbase using mapreduce code.

if the value from text file of currency field matches with the value of
currency master table then need to add one more column in the

tableCurrency as Valid_Ind , if values mathes then  Valid_Ind value will be
"0' and if does not match the value will be "1".

I have attached my code. Please suggest why the validation part so long
time for just 13250 records.



public class MapReduceTable
{
 private static Configuration conf = null;
   static {
      Configuration customConf = new Configuration();
      //customConf.setStrings ("hbase.zookeeper.quorum","
tss4l20b1.svr.us.jpmchase.net,tss4l20b2.svr.us.jpmchase.net,
tss4l20a1.svr.us.jpmchase.net");
      customConf.setStrings("hbase.zookeeper.quorum","localhost");
      customConf.setLong("hbase.rpc.timeout", 60000000);
      customConf.setLong("hbase.client.scanner.caching", 60000000);
      conf = HBaseConfiguration.create(customConf);
  // customConf = null;
    }
   static class Map extends Mapper<LongWritable, Text, Text, Put>
   {
    protected void map(LongWritable key, Text value, Context context)throws
IOException, InterruptedException
      {
    String messageStr = value.toString();
    String[] logRecvArr = messageStr.split(",");
    Put put1 = new Put(Bytes.toBytes(logRecvArr[0]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("firstName"),Bytes.toBytes(logRecvArr[1]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("middleName"),Bytes.toBytes(logRecvArr[2]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("LastName"),Bytes.toBytes(logRecvArr[3]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("Company"),Bytes.toBytes(logRecvArr[4]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("location"),Bytes.toBytes(logRecvArr[5]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("dept"),Bytes.toBytes(logRecvArr[6]));
    put1.add(Bytes.toBytes("Id"),
Bytes.toBytes("exp"),Bytes.toBytes(logRecvArr[7]));
    context.write(new Text(logRecvArr[0]), put1);
          }
         }
   static class MyMapper extends Mapper<LongWritable, Text, Text, Put>
   {
    protected void map(LongWritable key, Text value, Context context)throws
IOException, InterruptedException
    {
     String messageStr = value.toString();
     String valid_Ind="";String val="";
     String[] logRecvArr = messageStr.split(",");
     Put put = new Put(Bytes.toBytes(logRecvArr[0]));
     Filter valFilter = new ValueFilter(CompareFilter.CompareOp.EQUAL, new
BinaryComparator(Bytes.toBytes(logRecvArr[15])));
     HTable table1 = new HTable(conf, "curr");
     Scan s1 = new Scan();
     s1.setFilter(valFilter);
     ResultScanner ss1 = table1.getScanner(s1);
      for (Result r1 : ss1)
      {
       for (KeyValue kv1 : r1.raw())
       {
        if (Bytes.toString(kv1.getQualifier()).equals("currency"))
        {
         val = new String(kv1.getValue());
        }
       }
      }
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("Address1"),Bytes.toBytes(logRecvArr[8]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("Address2"),Bytes.toBytes(logRecvArr[9]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("Address3"),Bytes.toBytes(logRecvArr[10]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("phone"),Bytes.toBytes(logRecvArr[11]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("mobile"),Bytes.toBytes(logRecvArr[12]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("sal"),Bytes.toBytes(logRecvArr[13]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("Acctno"),Bytes.toBytes(logRecvArr[14]));
       put.add(Bytes.toBytes("Id"),
Bytes.toBytes("currency"),Bytes.toBytes(logRecvArr[15]));

          if(val.equals(logRecvArr[15]))
         {
          put.add(Bytes.toBytes("Id"),
Bytes.toBytes("Vaild_Ind"),Bytes.toBytes("0"));
         }
        else
         {
          put.add(Bytes.toBytes("Id"),
Bytes.toBytes("Vaild_Ind"),Bytes.toBytes("1"));
         }
          context.write(new Text(logRecvArr[0]), put);
    }
   }
      public int execute() throws Exception
       {
       String input="/user/hduser/INPUT/";
       Job job = new Job(conf,"TrandferHdfsToUserLog");
       job.setJarByClass(MapReduceTable.class);
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(Text.class);
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(Text.class);
       job.setInputFormatClass(TextInputFormat.class);
       job.setOutputFormatClass(TextOutputFormat.class);
       FileInputFormat.setInputPaths(job, input);
       job.setMapperClass(Map.class);
       TableMapReduceUtil.initTableReducerJob("RanCount",null,job);
       job.setNumReduceTasks(0);
       System.out.println("Hello Hadoop 2nd
Job!!"+job.waitForCompletion(true));
       return 0;
      }
      public int executeLast() throws Exception
       {
       String input="/user/hduser/INPUT/";
       Job job = new Job(conf,"TrandferHdfsToUserLog");
       job.setJarByClass(MapReduceTable.class);
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(Text.class);
       job.setMapOutputKeyClass(Text.class);
       job.setMapOutputValueClass(Text.class);
       job.setInputFormatClass(TextInputFormat.class);
       job.setOutputFormatClass(TextOutputFormat.class);
       FileInputFormat.setInputPaths(job, input);
       job.setMapperClass(MyMapper.class);
       TableMapReduceUtil.initTableReducerJob("Rancount11",null,job);
       job.setNumReduceTasks(0);
       System.out.println("Hello Hadoop 2nd
Job!!"+job.waitForCompletion(true));
       return 0;
      }

      public static void main(String[] args) throws Exception
      {
       new MapReduceTable().execute();
       new MapReduceTable().executeLast();
      }
}

Thanks in advance.

Ranjini

On Tue, Jan 7, 2014 at 12:36 AM, Hardik Pandya <smarty.juice@gmail.com>wrote:

> Can you please share how you are doing the lookup?
>
>
>
>
> On Mon, Jan 6, 2014 at 4:23 AM, Ranjini Rathinam <ranjinibecse@gmail.com>wrote:
>
>>  Hi,
>>
>> I have a input File of 16 fields in it.
>>
>> Using Mapreduce code need to load the hbase tables.
>>
>> The first eight has to go into one table in hbase and last eight has to
>> got to another hbase table.
>>
>> The data is being loaded into hbase table in 0.11 sec , but if any lookup
>> is being added in the mapreduce code,
>> For eg, the input file has one  attribute named currency , it will have a
>> master table currency. need to match both values to print it.
>>
>> The table which has lookup takes long time to get load. For 13250 records
>> it take 59 mins.
>>
>> How to make fine tune to reduce the time for its loading.
>>
>> Please help.
>>
>> Thanks in advance.
>>
>> Ranjini.R
>>
>>
>>
>

Mime
View raw message