hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ecl...@apache.org
Subject [38/50] [abbrv] git commit: [master] Refactor copyTable a lil' bit and pass taking config parameters from CLI.
Date Thu, 31 Jul 2014 22:08:14 GMT
[master] Refactor copyTable a lil' bit and pass taking config parameters from CLI.

Summary: this is needed for testing migration job for ods prod data, please take a look :)

Test Plan: try it out on shadow

Reviewers: rshroff, fan, gauravm, daviddeng, arjen, manukranthk

Reviewed By: manukranthk

Subscribers: hbase-eng@, paultuckfield

Differential Revision: https://phabricator.fb.com/D1430360

Tasks: 4682090

git-svn-id: svn+ssh://tubbs/svnhive/hadoop/branches/titan/VENDOR.hbase/hbase-trunk@43444 e7acf4d4-3532-417f-9e73-7a9ae25a1f51


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/2cb3ebbc
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/2cb3ebbc
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/2cb3ebbc

Branch: refs/heads/0.89-fb
Commit: 2cb3ebbc2b8b84674d5a27bd2422a202cff524b5
Parents: fede35d
Author: adela <adela@e7acf4d4-3532-417f-9e73-7a9ae25a1f51>
Authored: Fri Jul 11 23:25:56 2014 +0000
Committer: Elliott Clark <elliott@fb.com>
Committed: Thu Jul 31 14:44:24 2014 -0700

----------------------------------------------------------------------
 .../hadoop/hbase/mapreduce/CopyTable.java       | 267 ++++++++++---------
 1 file changed, 134 insertions(+), 133 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/2cb3ebbc/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
index d060d1e..1f6afa4 100644
--- a/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
+++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/CopyTable.java
@@ -19,15 +19,20 @@
  */
 package org.apache.hadoop.hbase.mapreduce;
 
+import java.io.IOException;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.mapreduce.Job;
 
-import java.io.IOException;
-
 /**
  * Tool used to copy a table to another one which can be on a different setup.
  * It is also configurable with a start and time as well as a specification
@@ -36,156 +41,152 @@ import java.io.IOException;
 public class CopyTable {
 
   final static String NAME = "Copy Table";
-  static String rsClass = null;
-  static String rsImpl = null;
-  static long startTime = 0;
-  static long endTime = 0;
-  static String tableName = null;
-  static String newTableName = null;
-  static String peerAddress = null;
-  static String families = null;
+  private final String rsClass;
+  private final String rsImpl;
+  private final long startTime;
+  private final long endTime;
+  private final String tableName;
+  private final String newTableName;
+  private final String peerAddress;
+  private final String[] families;
+
+
+  public CopyTable(String rsClass, String rsImpl, long startTime, long endTime,
+      String tableName, String newTableName, String zookeeperQuorum,
+      String[] families) {
+    this.rsClass = rsClass;
+    this.rsImpl = rsImpl;
+    this.startTime = startTime;
+    this.endTime = endTime;
+    this.tableName = tableName;
+    this.newTableName = newTableName;
+    this.peerAddress = zookeeperQuorum;
+    this.families = families;
+  }
 
   /**
    * Sets up the actual job.
    *
-   * @param conf  The current configuration.
-   * @param args  The command line parameters.
+   * @param conf
+   *          The current configuration.
+   * @param args
+   *          The command line parameters.
    * @return The newly created job.
-   * @throws IOException When setting up the job fails.
+   * @throws IOException
+   *           When setting up the job fails.
    */
   public static Job createSubmittableJob(Configuration conf, String[] args)
-  throws IOException {
-    if (!doCommandLine(args)) {
+      throws IOException {
+    Job job = null;
+    CopyTable cpTbl = null;
+    try {
+      cpTbl = buildCopyTableFromArguments(conf, args);
+    } catch (ParseException e) {
+      e.printStackTrace();
       return null;
     }
-    Job job = new Job(conf, NAME + "_" + tableName);
-    job.setJarByClass(CopyTable.class);
-    Scan scan = new Scan();
-    if (startTime != 0) {
-      scan.setTimeRange(startTime,
-          endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime);
-    }
-    if(families != null) {
-      String[] fams = families.split(",");
-      for(String fam : fams) {
-        scan.addFamily(Bytes.toBytes(fam));
+    if (cpTbl != null) {
+      job = new Job(conf, NAME + "_" + cpTbl.tableName);
+      job.setJarByClass(CopyTable.class);
+      Scan scan = new Scan();
+      if (cpTbl.startTime != 0) {
+        scan.setTimeRange(cpTbl.startTime,
+            cpTbl.endTime == 0 ? HConstants.LATEST_TIMESTAMP : cpTbl.endTime);
+      }
+      if (cpTbl.families != null) {
+        for (String fam : cpTbl.families) {
+          scan.addFamily(Bytes.toBytes(fam));
+        }
       }
+      TableMapReduceUtil.initTableMapperJob(cpTbl.tableName, scan,
+          Import.Importer.class, null, null, job);
+      TableMapReduceUtil.initTableReducerJob(
+          cpTbl.newTableName == null ? cpTbl.tableName : cpTbl.newTableName,
+          null, job, null, cpTbl.peerAddress, cpTbl.rsClass, cpTbl.rsImpl);
+      job.setNumReduceTasks(0);
+    } else {
+      System.out.println("CopyTable object is null, exiting!");
+      return null;
     }
-    TableMapReduceUtil.initTableMapperJob(tableName, scan,
-        Import.Importer.class, null, null, job);
-    TableMapReduceUtil.initTableReducerJob(
-        newTableName == null ? tableName : newTableName, null, job,
-        null, peerAddress, rsClass, rsImpl);
-    job.setNumReduceTasks(0);
     return job;
   }
 
-  /*
-   * @param errorMsg Error message.  Can be null.
-   */
-  private static void printUsage(final String errorMsg) {
-    if (errorMsg != null && errorMsg.length() > 0) {
-      System.err.println("ERROR: " + errorMsg);
-    }
-    System.err.println("Usage: CopyTable [--rs.class=CLASS] " +
-        "[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] " +
-        "[--new.name=NEW] [--peer.adr=ADR] <tablename>");
-    System.err.println();
-    System.err.println("Options:");
-    System.err.println(" rs.class     hbase.regionserver.class of the peer cluster");
-    System.err.println("              specify if different from current cluster");
-    System.err.println(" rs.impl      hbase.regionserver.impl of the peer cluster");
-    System.err.println(" starttime    beginning of the time range");
-    System.err.println("              without endtime means from starttime to forever");
-    System.err.println(" endtime      end of the time range");
-    System.err.println(" new.name     new table's name");
-    System.err.println(" peer.adr     Address of the peer cluster given in the format");
-    System.err.println("              hbase.zookeeer.quorum:zookeeper.znode.parent");
-    System.err.println(" families     comma-seperated list of families to copy");
-    System.err.println();
-    System.err.println("Args:");
-    System.err.println(" tablename    Name of the table to copy");
-    System.err.println();
-    System.err.println("Examples:");
-    System.err.println(" To copy 'TestTable' to a cluster that uses replication for a 1 hour
window:");
-    System.err.println(" $ bin/hbase " +
-        "org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface
" +
-        "--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer
--starttime=1265875194289 --endtime=1265878794289 " +
-        "--peer.adr=server1,server2,server3:/hbase TestTable ");
+  private static void printHelp(Options opt) {
+    String example = "To copy 'TestTable' to a cluster that uses replication for a 1 hour
window:\n $ bin/hbase "
+        + "org.apache.hadoop.hbase.mapreduce.CopyTable --rs.class=org.apache.hadoop.hbase.ipc.ReplicationRegionInterface
"
+        + "--rs.impl=org.apache.hadoop.hbase.regionserver.replication.ReplicationRegionServer
--starttime=1265875194289 --endtime=1265878794289 "
+        + "--peer.adr=server1,server2,server3:/hbase TestTable ";
+    new HelpFormatter().printHelp(
+        "Job needs to be run on the source cluster \n CopyTable < tablename | -h >
[--rs.class=CLASS] "
+            + "[--rs.impl=IMPL] [--starttime=X] [--endtime=Y] "
+            + "[--new.name=NEW] [--peer.adr=ADR] [--D]", "", opt,
+        "Example: " + example);
   }
 
-  private static boolean doCommandLine(final String[] args) {
-    // Process command-line args. TODO: Better cmd-line processing
-    // (but hopefully something not as painful as cli options).
-    if (args.length < 1) {
-      printUsage(null);
-      return false;
-    }
-    try {
-      for (int i = 0; i < args.length; i++) {
-        String cmd = args[i];
-        if (cmd.equals("-h") || cmd.startsWith("--h")) {
-          printUsage(null);
-          return false;
-        }
-
-        final String rsClassArgKey = "--rs.class=";
-        if (cmd.startsWith(rsClassArgKey)) {
-          rsClass = cmd.substring(rsClassArgKey.length());
-          continue;
-        }
-
-        final String rsImplArgKey = "--rs.impl=";
-        if (cmd.startsWith(rsImplArgKey)) {
-          rsImpl = cmd.substring(rsImplArgKey.length());
-          continue;
-        }
-
-        final String startTimeArgKey = "--starttime=";
-        if (cmd.startsWith(startTimeArgKey)) {
-          startTime = Long.parseLong(cmd.substring(startTimeArgKey.length()));
-          continue;
-        }
-
-        final String endTimeArgKey = "--endtime=";
-        if (cmd.startsWith(endTimeArgKey)) {
-          endTime = Long.parseLong(cmd.substring(endTimeArgKey.length()));
-          continue;
-        }
-
-        final String newNameArgKey = "--new.name=";
-        if (cmd.startsWith(newNameArgKey)) {
-          newTableName = cmd.substring(newNameArgKey.length());
-          continue;
-        }
-
-        final String peerAdrArgKey = "--peer.adr=";
-        if (cmd.startsWith(peerAdrArgKey)) {
-          peerAddress = cmd.substring(peerAdrArgKey.length());
-          continue;
-        }
-
-        final String familiesArgKey = "--families=";
-        if (cmd.startsWith(familiesArgKey)) {
-          families = cmd.substring(familiesArgKey.length());
-          continue;
-        }
+  private static Options buildOptions() {
+    Options opt = new Options();
+    opt.addOption("rs.class", true,
+        "hbase.regionserver.class of the destination cluster");
+    opt.addOption("rs.impl", true,
+        "hbase.regionserver.impl of the destination cluster");
+    opt.addOption(
+        "starttime",
+        true,
+        "beginning of the time range, don't specify if you want to copy [-infinity, end)");
+    opt.addOption(
+        "endtime",
+        true,
+        "end of the time range, don't specify if you want to copy the data [start, infinity)");
+    opt.addOption("new.name", true,
+        "name of the new table on the destination cluster");
+    opt.addOption("peer.adr", true,
+        "zookeeper.quorum of the destination cluster");
+    opt.addOption("families", true, "comma-seperated list of families to copy");
+    opt.addOption("tablename", true,
+        "name of the table that we are copying from the source cluster");
+    opt.addOption("D", true, "configuration override, provide arguments in format conf1=x,
conf2=y, etc.");
+    opt.addOption("-h", false, "help");
+    return opt;
+  }
 
-        if (i == args.length-1) {
-          tableName = cmd;
+  /**
+   * Construct CopyTable object from the arguments from command line
+   */
+  private static CopyTable buildCopyTableFromArguments(Configuration conf,
+      final String[] args) throws ParseException {
+    Options opt = buildOptions();
+    CommandLine cmd = new GnuParser().parse(opt, args);
+    String rsClass = cmd.getOptionValue("rs.class");
+    String rsImpl = cmd.getOptionValue("rs.impl");
+    String startTimeString = cmd.getOptionValue("starttime");
+    long startTime = startTimeString == null ? 0 : Integer
+        .valueOf(startTimeString);
+    String endTimeString = cmd.getOptionValue("endtime");
+    long endTime = endTimeString == null ? 0 : Integer
+        .valueOf(endTimeString);
+    String newTableName = cmd.getOptionValue("new.name");
+    String zookeeperQuorum = cmd.getOptionValue("peer.adr");
+    String[] families = cmd.getOptionValues("families");
+    String tableName = cmd.getOptionValue("tablename");
+    if (tableName == null && zookeeperQuorum == null) {
+      printHelp(opt);
+      throw new ParseException(
+          "tableName OR zookeperQuorum (peer.adr) must be specified");
+    }
+    if (cmd.hasOption("D")) {
+      for (String confOpt : cmd.getOptionValues("D")) {
+        String[] kv = confOpt.split("=", 2);
+        if (kv.length == 2) {
+          conf.set(kv[0], kv[1]);
+          System.out.println("-D configuration override: " + kv[0] + "="
+              + kv[1]);
+        } else {
+          throw new ParseException("-D option format invalid: " + confOpt);
         }
       }
-      if (newTableName == null && peerAddress == null) {
-        printUsage("At least a new table name or a " +
-            "peer address must be specified");
-        return false;
-      }
-    } catch (Exception e) {
-      e.printStackTrace();
-      printUsage("Can't start because " + e.getMessage());
-      return false;
     }
-    return true;
+    return new CopyTable(rsClass, rsImpl, startTime, endTime, tableName,
+        newTableName, zookeeperQuorum, families);
   }
 
   /**


Mime
View raw message