Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 8879E1109E for ; Thu, 31 Jul 2014 22:07:50 +0000 (UTC) Received: (qmail 45636 invoked by uid 500); 31 Jul 2014 22:07:39 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 45507 invoked by uid 500); 31 Jul 2014 22:07:39 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 44096 invoked by uid 99); 31 Jul 2014 22:07:38 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 31 Jul 2014 22:07:38 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 8F8A1951CC8; Thu, 31 Jul 2014 22:07:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: eclark@apache.org To: commits@hbase.apache.org Date: Thu, 31 Jul 2014 22:08:14 -0000 Message-Id: In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [38/50] [abbrv] git commit: [master] Refactor copyTable a lil' bit and pass taking config parameters from CLI. [master] Refactor copyTable a lil' bit and pass taking config parameters from CLI. Summary: this is needed for testing migration job for ods prod data, please take a look :) Test Plan: try it out on shadow Reviewers: rshroff, fan, gauravm, daviddeng, arjen, manukranthk Reviewed By: manukranthk Subscribers: hbase-eng@, paultuckfield Differential Revision: https://phabricator.fb.com/D1430360 Tasks: 4682090 git-svn-id: svn+ssh://tubbs/svnhive/hadoop/branches/titan/VENDOR.hbase/hbase-trunk@43444 e7acf4d4-3532-417f-9e73-7a9ae25a1f51 Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2cb3ebbc Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2cb3ebbc Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2cb3ebbc Branch: refs/heads/0.89-fb Commit: 2cb3ebbc2b8b84674d5a27bd2422a202cff524b5 Parents: fede35d Author: adela Authored: Fri Jul 11 23:25:56 2014 +0000 Committer: Elliott Clark Committed: Thu Jul 31 14:44:24 2014 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/mapreduce/CopyTable.java | 267 ++++++++++--------- 1 file changed, 134 insertions(+), 133 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/2cb3ebbc/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java index d060d1e..1f6afa4 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java @@ -19,15 +19,20 @@ */ package org.apache.hadoop.hbase.mapreduce; +import java.io.IOException; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.GnuParser; +import org.apache.commons.cli.HelpFormatter; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Job; -import java.io.IOException; - /** * Tool used to copy a table to another one which can be on a different setup. * It is also configurable with a start and time as well as a specification @@ -36,156 +41,152 @@ import java.io.IOException; public class CopyTable { final static String NAME = "Copy Table"; - static String rsClass = null; - static String rsImpl = null; - static long startTime = 0; - static long endTime = 0; - static String tableName = null; - static String newTableName = null; - static String peerAddress = null; - static String families = null; + private final String rsClass; + private final String rsImpl; + private final long startTime; + private final long endTime; + private final String tableName; + private final String newTableName; + private final String peerAddress; + private final String[] families; + + + public CopyTable(String rsClass, String rsImpl, long startTime, long endTime, + String tableName, String newTableName, String zookeeperQuorum, + String[] families) { + this.rsClass = rsClass; + this.rsImpl = rsImpl; + this.startTime = startTime; + this.endTime = endTime; + this.tableName = tableName; + this.newTableName = newTableName; + this.peerAddress = zookeeperQuorum; + this.families = families; + } /** * Sets up the actual job. * - * @param conf The current configuration. - * @param args The command line parameters. + * @param conf + * The current configuration. + * @param args + * The command line parameters. * @return The newly created job. - * @throws IOException When setting up the job fails. + * @throws IOException + * When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) - throws IOException { - if (!doCommandLine(args)) { + throws IOException { + Job job = null; + CopyTable cpTbl = null; + try { + cpTbl = buildCopyTableFromArguments(conf, args); + } catch (ParseException e) { + e.printStackTrace(); return null; } - Job job = new Job(conf, NAME + "_" + tableName); - job.setJarByClass(CopyTable.class); - Scan scan = new Scan(); - if (startTime != 0) { - scan.setTimeRange(startTime, - endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime); - } - if(families != null) { - String[] fams = families.split(","); - for(String fam : fams) { - scan.addFamily(Bytes.toBytes(fam)); + if (cpTbl != null) { + job = new Job(conf, NAME + "_" + cpTbl.tableName); + job.setJarByClass(CopyTable.class); + Scan scan = new Scan(); + if (cpTbl.startTime != 0) { + scan.setTimeRange(cpTbl.startTime, + cpTbl.endTime == 0 ? HConstants.LATEST_TIMESTAMP : cpTbl.endTime); + } + if (cpTbl.families != null) { + for (String fam : cpTbl.families) { + scan.addFamily(Bytes.toBytes(fam)); + } } + TableMapReduceUtil.initTableMapperJob(cpTbl.tableName, scan, + Import.Importer.class, null, null, job); + TableMapReduceUtil.initTableReducerJob( + cpTbl.newTableName == null ? cpTbl.tableName : cpTbl.newTableName, + null, job, null, cpTbl.peerAddress, cpTbl.rsClass, cpTbl.rsImpl); + job.setNumReduceTasks(0); + } else { + System.out.println("CopyTable object is null, exiting!"); + return null; } - TableMapReduceUtil.initTableMapperJob(tableName, scan, - Import.Importer.class, null, null, job); - TableMapReduceUtil.initTableReducerJob( - newTableName == null ? tableName : newTableName, null, job, - null, peerAddress, rsClass, rsImpl); - job.setNumReduceTasks(0); return job; } - /* - * @param errorMsg Error message. Can be null. - */ - private static void printUsage(final String errorMsg) { - if (errorMsg != null && errorMsg.length() > 0) { - System.err.println("ERROR: " + errorMsg); - } - System.err.println("Usage: CopyTable [--rs.class=CLASS] " + - "[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] " + - "[--new.name=NEW] [--peer.adr=ADR] "); - System.err.println(); - System.err.println("Options:"); - System.err.println(" rs.class hbase.regionserver.class of the peer cluster"); - System.err.println(" specify if different from current cluster"); - System.err.println(" rs.impl hbase.regionserver.impl of the peer cluster"); - System.err.println(" starttime beginning of the time range"); - System.err.println(" without endtime means from starttime to forever"); - System.err.println(" endtime end of the time range"); - System.err.println(" new.name new table's name"); - System.err.println(" peer.adr Address of the peer cluster given in the format"); - System.err.println(" hbase.zookeeer.quorum:zookeeper.znode.parent"); - System.err.println(" families comma-seperated list of families to copy"); - System.err.println(); - System.err.println("Args:"); - System.err.println(" tablename Name of the table to copy"); - System.err.println(); - System.err.println("Examples:"); - System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour window:"); - System.err.println(" $ bin/hbase " + - "org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface " + - "--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer --starttime=1265875194289 --endtime=1265878794289 " + - "--peer.adr=server1,server2,server3:/hbase TestTable "); + private static void printHelp(Options opt) { + String example = "To copy 'TestTable' to a cluster that uses replication for a 1 hour window:\n $ bin/hbase " + + "org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface " + + "--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer --starttime=1265875194289 --endtime=1265878794289 " + + "--peer.adr=server1,server2,server3:/hbase TestTable "; + new HelpFormatter().printHelp( + "Job needs to be run on the source cluster \n CopyTable < tablename | -h > [--rs.class=CLASS] " + + "[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] " + + "[--new.name=NEW] [--peer.adr=ADR] [--D]", "", opt, + "Example: " + example); } - private static boolean doCommandLine(final String[] args) { - // Process command-line args. TODO: Better cmd-line processing - // (but hopefully something not as painful as cli options). - if (args.length < 1) { - printUsage(null); - return false; - } - try { - for (int i = 0; i < args.length; i++) { - String cmd = args[i]; - if (cmd.equals("-h") || cmd.startsWith("--h")) { - printUsage(null); - return false; - } - - final String rsClassArgKey = "--rs.class="; - if (cmd.startsWith(rsClassArgKey)) { - rsClass = cmd.substring(rsClassArgKey.length()); - continue; - } - - final String rsImplArgKey = "--rs.impl="; - if (cmd.startsWith(rsImplArgKey)) { - rsImpl = cmd.substring(rsImplArgKey.length()); - continue; - } - - final String startTimeArgKey = "--starttime="; - if (cmd.startsWith(startTimeArgKey)) { - startTime = Long.parseLong(cmd.substring(startTimeArgKey.length())); - continue; - } - - final String endTimeArgKey = "--endtime="; - if (cmd.startsWith(endTimeArgKey)) { - endTime = Long.parseLong(cmd.substring(endTimeArgKey.length())); - continue; - } - - final String newNameArgKey = "--new.name="; - if (cmd.startsWith(newNameArgKey)) { - newTableName = cmd.substring(newNameArgKey.length()); - continue; - } - - final String peerAdrArgKey = "--peer.adr="; - if (cmd.startsWith(peerAdrArgKey)) { - peerAddress = cmd.substring(peerAdrArgKey.length()); - continue; - } - - final String familiesArgKey = "--families="; - if (cmd.startsWith(familiesArgKey)) { - families = cmd.substring(familiesArgKey.length()); - continue; - } + private static Options buildOptions() { + Options opt = new Options(); + opt.addOption("rs.class", true, + "hbase.regionserver.class of the destination cluster"); + opt.addOption("rs.impl", true, + "hbase.regionserver.impl of the destination cluster"); + opt.addOption( + "starttime", + true, + "beginning of the time range, don't specify if you want to copy [-infinity, end)"); + opt.addOption( + "endtime", + true, + "end of the time range, don't specify if you want to copy the data [start, infinity)"); + opt.addOption("new.name", true, + "name of the new table on the destination cluster"); + opt.addOption("peer.adr", true, + "zookeeper.quorum of the destination cluster"); + opt.addOption("families", true, "comma-seperated list of families to copy"); + opt.addOption("tablename", true, + "name of the table that we are copying from the source cluster"); + opt.addOption("D", true, "configuration override, provide arguments in format conf1=x, conf2=y, etc."); + opt.addOption("-h", false, "help"); + return opt; + } - if (i == args.length-1) { - tableName = cmd; + /** + * Construct CopyTable object from the arguments from command line + */ + private static CopyTable buildCopyTableFromArguments(Configuration conf, + final String[] args) throws ParseException { + Options opt = buildOptions(); + CommandLine cmd = new GnuParser().parse(opt, args); + String rsClass = cmd.getOptionValue("rs.class"); + String rsImpl = cmd.getOptionValue("rs.impl"); + String startTimeString = cmd.getOptionValue("starttime"); + long startTime = startTimeString == null ? 0 : Integer + .valueOf(startTimeString); + String endTimeString = cmd.getOptionValue("endtime"); + long endTime = endTimeString == null ? 0 : Integer + .valueOf(endTimeString); + String newTableName = cmd.getOptionValue("new.name"); + String zookeeperQuorum = cmd.getOptionValue("peer.adr"); + String[] families = cmd.getOptionValues("families"); + String tableName = cmd.getOptionValue("tablename"); + if (tableName == null && zookeeperQuorum == null) { + printHelp(opt); + throw new ParseException( + "tableName OR zookeperQuorum (peer.adr) must be specified"); + } + if (cmd.hasOption("D")) { + for (String confOpt : cmd.getOptionValues("D")) { + String[] kv = confOpt.split("=", 2); + if (kv.length == 2) { + conf.set(kv[0], kv[1]); + System.out.println("-D configuration override: " + kv[0] + "=" + + kv[1]); + } else { + throw new ParseException("-D option format invalid: " + confOpt); } } - if (newTableName == null && peerAddress == null) { - printUsage("At least a new table name or a " + - "peer address must be specified"); - return false; - } - } catch (Exception e) { - e.printStackTrace(); - printUsage("Can't start because " + e.getMessage()); - return false; } - return true; + return new CopyTable(rsClass, rsImpl, startTime, endTime, tableName, + newTableName, zookeeperQuorum, families); } /**