hbase-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From stack <st...@duboce.net>
Subject Re: SampleUploader for the trunk hbase
Date Fri, 11 Jul 2008 05:09:50 GMT
We'd welcome the contrib.  If sufficiently 'generic', we should just 
check it in under our examples directory.
St.Ack

Dan Zinngrabe wrote:
> It would just need a little cleanup and it would be ready to go. We made it
> generic enough that it should work for just about anyone, with a few (minor)
> caveats. We used it to transition a fairly flat, large mysql table into
> HBase as well as for production backups and restore.
>
> On Mon, Jul 7, 2008 at 9:43 PM, stack <stack@duboce.net> wrote:
>
>   
>> Sounds good.  Would it make sense your posting your code or is it too
>> particular to your setup?
>> St.Ack
>>
>>
>> Dan Zinngrabe wrote:
>>
>>     
>>> We have something very similar, but a little more flexible in use in
>>> production. Along with it is a simple exporter that outputs hbase data in
>>> the same format.
>>> It's not quite HBASE-50 <
>>>
>>> https://issues.apache.org/jira/browse/HBASE-50?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=12580996#action_12580996
>>>  but it does work pretty well so far for backing up even large tables.
>>>
>>> On Thu, Jul 3, 2008 at 10:48 AM, stack <stack@duboce.net> wrote:
>>>
>>>
>>>
>>>       
>>>> Thanks Alex.  Looks great.  You want me to add it to the wiki?  (Or you
>>>> could do it yourself).
>>>>
>>>> Yours does something slightly different it seems; you hardcode the column
>>>> name and do a count of splits[1].  You might add to the class comment a
>>>> description of what your MR job does.
>>>>
>>>> St.Ack
>>>>
>>>>
>>>>
>>>> Alex Newman wrote:
>>>>
>>>>
>>>>
>>>>         
>>>>> /// This is very close to the example in the javadoc
>>>>> already(Bytes,BatchUpdate) instead of (text/mapwritable), and i find
>>>>> it to be the easiest way to get people started/motivated with HBase.
>>>>>
>>>>>
>>>>> package org.apache.hadoop.hbase.mapred;
>>>>>
>>>>> import org.apache.hadoop.hbase.util.Bytes;
>>>>>
>>>>> import java.io.IOException;
>>>>> import java.util.Iterator;
>>>>>
>>>>> import org.apache.hadoop.conf.Configuration;
>>>>> import org.apache.hadoop.fs.Path;
>>>>> import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
>>>>> import org.apache.hadoop.hbase.io.BatchUpdate;
>>>>>
>>>>> import org.apache.hadoop.io.LongWritable;
>>>>> import org.apache.hadoop.io.MapWritable;
>>>>> import org.apache.hadoop.io.Text;
>>>>> import org.apache.hadoop.mapred.JobClient;
>>>>> import org.apache.hadoop.mapred.JobConf;
>>>>> import org.apache.hadoop.mapred.MapReduceBase;
>>>>> import org.apache.hadoop.mapred.Mapper;
>>>>> import org.apache.hadoop.mapred.OutputCollector;
>>>>> import org.apache.hadoop.mapred.Reporter;
>>>>> import org.apache.hadoop.util.Tool;
>>>>> import org.apache.hadoop.util.ToolRunner;
>>>>> public class SampleUploader extends MapReduceBase
>>>>> implements Mapper<LongWritable, Text, ImmutableBytesWritable,
>>>>> BatchUpdate>, Tool
>>>>>  {
>>>>>  private static final String NAME = "SampleUploader";
>>>>>  private Configuration conf;
>>>>>
>>>>>  public JobConf createSubmittableJob(String[] args) {
>>>>>   JobConf c = new JobConf(getConf(), SampleUploader.class);
>>>>>   c.setJobName(NAME);
>>>>>   c.setInputPath(new Path(args[0]));
>>>>>   c.setMapperClass(this.getClass());
>>>>>   c.setMapOutputKeyClass(ImmutableBytesWritable.class);
>>>>>   c.setMapOutputValueClass(BatchUpdate.class);
>>>>>   c.setReducerClass(TableUploader.class);
>>>>>   TableReduce.initJob(args[1], TableUploader.class, c);
>>>>>   return c;
>>>>>  }
>>>>>
>>>>>  public void map(LongWritable k, Text v,
>>>>>   OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
Reporter
>>>>> r)
>>>>>  throws IOException {
>>>>>   // Lines are space-delimited; first item is row, next the columnname
>>>>> and
>>>>>  // then the third the cell value.
>>>>>   String tmp = v.toString();
>>>>>   if (tmp.length() == 0) {
>>>>>     return;
>>>>>   }
>>>>>   String [] splits = v.toString().split(" ");
>>>>>   String row = splits[0];
>>>>>   BatchUpdate mw = new  BatchUpdate(row);
>>>>>
>>>>>   mw.put( "count:",  Bytes.toBytes(splits[1]));
>>>>>   r.setStatus("Map emitting " + row + " for record " + k.toString());
>>>>>   output.collect(new ImmutableBytesWritable(row.getBytes()), mw);
>>>>>  }
>>>>>
>>>>>  public static class TableUploader extends
>>>>> TableReduce<ImmutableBytesWritable,
>>>>> BatchUpdate> {
>>>>>
>>>>>
>>>>>  @Override
>>>>>  public void reduce(ImmutableBytesWritable key, Iterator<BatchUpdate>
>>>>> values,
>>>>>     OutputCollector<ImmutableBytesWritable, BatchUpdate> output,
>>>>>     @SuppressWarnings("unused") Reporter reporter)
>>>>>     throws IOException {
>>>>>
>>>>>   while(values.hasNext()) {
>>>>>     output.collect(key, values.next());
>>>>>   }
>>>>>  }
>>>>> }
>>>>>
>>>>>
>>>>>  static int printUsage() {
>>>>>   System.out.println(NAME + " <input> <table_name>");
>>>>>   return -1;
>>>>>  }
>>>>>
>>>>>  public int run(@SuppressWarnings("unused") String[] args) throws
>>>>> Exception {
>>>>>   // Make sure there are exactly 2 parameters left.
>>>>>   if (args.length != 2) {
>>>>>     System.out.println("ERROR: Wrong number of parameters: " +
>>>>>       args.length + " instead of 2.");
>>>>>     return printUsage();
>>>>>   }
>>>>>   JobClient.runJob(createSubmittableJob(args));
>>>>>   return 0;
>>>>>  }
>>>>>  System.out.println("ERROR: Wrong number of parameters: " +
>>>>>       args.length + " instead of 2.");
>>>>>     return printUsage();
>>>>>   }
>>>>>   JobClient.runJob(createSubmittableJob(args));
>>>>>   return 0;
>>>>>  }
>>>>>
>>>>>  public Configuration getConf() {
>>>>>   return this.conf;
>>>>>  }
>>>>>
>>>>>  public void setConf(final Configuration c) {
>>>>>   this.conf = c;
>>>>>  }
>>>>>
>>>>>  public static void main(String[] args) throws Exception {
>>>>>   int errCode = ToolRunner.run(new Configuration(), new
>>>>> SampleUploader(),
>>>>>     args);
>>>>>   System.exit(errCode);
>>>>>  }
>>>>> }
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>           
>>>>         
>>>
>>>
>>>       
>>     
>
>
>   


Mime
View raw message