hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r1329575 - in /hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools: ./ Canary.java
Date Tue, 24 Apr 2012 07:22:53 GMT
Author: stack
Date: Tue Apr 24 07:22:53 2012
New Revision: 1329575

URL: http://svn.apache.org/viewvc?rev=1329575&view=rev
Log:
HBASE-4393 Implement a canary monitoring program

Added:
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools/
    hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools/Canary.java

Added: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools/Canary.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools/Canary.java?rev=1329575&view=auto
==============================================================================
--- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools/Canary.java (added)
+++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/tools/Canary.java Tue Apr 24
07:22:53 2012
@@ -0,0 +1,253 @@
+/**
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.tool;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.TableNotFoundException;
+
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.client.HTable;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
+
+/**
+ * HBase Canary Tool, that that can be used to do
+ * "canary monitoring" of a running HBase cluster.
+ *
+ * Foreach region tries to get one row per column family
+ * and outputs some information about failure or latency.
+ */
+public final class Canary implements Tool {
+  // Sink interface used by the canary to outputs information
+  public interface Sink {
+    public void publishReadFailure(HRegionInfo region);
+    public void publishReadFailure(HRegionInfo region, HColumnDescriptor column);
+    public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime);
+  }
+
+  // Simple implementation of canary sink that allows to plot on
+  // file or standard output timings or failures.
+  public static class StdOutSink implements Sink {
+    @Override
+    public void publishReadFailure(HRegionInfo region) {
+      LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()));
+    }
+
+    @Override
+    public void publishReadFailure(HRegionInfo region, HColumnDescriptor column) {
+      LOG.error(String.format("read from region %s column family %s failed",
+                region.getRegionNameAsString(), column.getNameAsString()));
+    }
+
+    @Override
+    public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime)
{
+      LOG.info(String.format("read from region %s column family %s in %dms",
+               region.getRegionNameAsString(), column.getNameAsString(), msTime));
+    }
+  }
+
+  private static final long DEFAULT_INTERVAL = 6000;
+
+  private static final Log LOG = LogFactory.getLog(Canary.class);
+
+  private Configuration conf = null;
+  private HBaseAdmin admin = null;
+  private long interval = 0;
+  private Sink sink = null;
+
+  public Canary() {
+    this(new StdOutSink());
+  }
+
+  public Canary(Sink sink) {
+    this.sink = sink;
+  }
+
+  @Override
+  public Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  public int run(String[] args) throws Exception {
+    int tables_index = -1;
+
+    // Process command line args
+    for (int i = 0; i < args.length; i++) {
+      String cmd = args[i];
+
+      if (cmd.startsWith("-")) {
+        if (tables_index >= 0) {
+          // command line args must be in the form: [opts] [table 1 [table 2 ...]]
+          System.err.println("Invalid command line options");
+          printUsageAndExit();
+        }
+
+        if (cmd.equals("-help")) {
+          // user asked for help, print the help and quit.
+          printUsageAndExit();
+        } else if (cmd.equals("-daemon") && interval == 0) {
+          // user asked for daemon mode, set a default interval between checks
+          interval = DEFAULT_INTERVAL;
+        } else if (cmd.equals("-interval")) {
+          // user has specified an interval for canary breaths (-interval N)
+          i++;
+
+          if (i == args.length) {
+            System.err.println("-interval needs a numeric value argument.");
+            printUsageAndExit();
+          }
+
+          try {
+            interval = Long.parseLong(args[i]) * 1000;
+          } catch (NumberFormatException e) {
+            System.err.println("-interval needs a numeric value argument.");
+            printUsageAndExit();
+          }
+        } else {
+          // no options match
+          System.err.println(cmd + " options is invalid.");
+          printUsageAndExit();
+        }
+      } else if (tables_index < 0) {
+        // keep track of first table name specified by the user
+        tables_index = i;
+      }
+    }
+
+    // initialize HBase conf and admin
+    if (conf == null) conf = HBaseConfiguration.create();
+    admin = new HBaseAdmin(conf);
+
+    // lets the canary monitor the cluster
+    do {
+      if (admin.isAborted()) {
+        LOG.error("HBaseAdmin aborted");
+        return(1);
+      }
+
+      if (tables_index >= 0) {
+        for (int i = tables_index; i < args.length; i++) {
+          sniff(args[i]);
+        }
+      } else {
+        sniff();
+      }
+
+      Thread.sleep(interval);
+    } while (interval > 0);
+
+    return(0);
+  }
+
+  private void printUsageAndExit() {
+    System.err.printf("Usage: bin/hbase %s [opts] [table 1 [table 2...]]\n", getClass().getName());
+    System.err.println(" where [opts] are:");
+    System.err.println("   -help          Show this help and exit.");
+    System.err.println("   -daemon        Continuous check at defined intervals.");
+    System.err.println("   -interval <N>  Interval between checks (sec)");
+    System.exit(1);
+  }
+
+  /*
+   * canary entry point to monitor all the tables.
+   */
+  private void sniff() throws Exception {
+    for (HTableDescriptor table : admin.listTables()) {
+      sniff(table);
+    }
+  }
+
+  /*
+   * canary entry point to monitor specified table.
+   */
+  private void sniff(String tableName) throws Exception {
+    if (admin.isTableAvailable(tableName)) {
+      sniff(admin.getTableDescriptor(tableName.getBytes()));
+    } else {
+      LOG.warn(String.format("Table %s is not available", tableName));
+    }
+  }
+
+  /*
+   * Loops over regions that owns this table,
+   * and output some information abouts the state.
+   */
+  private void sniff(HTableDescriptor tableDesc) throws Exception {
+    HTable table = null;
+
+    try {
+      table = new HTable(admin.getConfiguration(), tableDesc.getName());
+    } catch (TableNotFoundException e) {
+      return;
+    }
+
+    for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) {
+      try {
+        sniffRegion(region, table);
+      } catch (Exception e) {
+        sink.publishReadFailure(region);
+      }
+    }
+  }
+
+  /*
+   * For each column family of the region tries to get one row
+   * and outputs the latency, or the failure.
+   */
+  private void sniffRegion(HRegionInfo region, HTable table) throws Exception {
+    HTableDescriptor tableDesc = table.getTableDescriptor();
+    for (HColumnDescriptor column : tableDesc.getColumnFamilies()) {
+      Get get = new Get(region.getStartKey());
+      get.addFamily(column.getName());
+
+      try {
+        long startTime = System.currentTimeMillis();
+        table.get(get);
+        long time = System.currentTimeMillis() - startTime;
+
+        sink.publishReadTiming(region, column, time);
+      } catch (Exception e) {
+        sink.publishReadFailure(region, column);
+      }
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    int exitCode = ToolRunner.run(new Canary(), args);
+    System.exit(exitCode);
+  }
+}
+



Mime
View raw message