Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 54598200C5A for ; Tue, 18 Apr 2017 16:58:47 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 52E86160BB8; Tue, 18 Apr 2017 14:58:47 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 2B527160BB6 for ; Tue, 18 Apr 2017 16:58:45 +0200 (CEST) Received: (qmail 49060 invoked by uid 500); 18 Apr 2017 14:58:40 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 48323 invoked by uid 99); 18 Apr 2017 14:58:39 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 18 Apr 2017 14:58:39 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 370BDDFDAC; Tue, 18 Apr 2017 14:58:39 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: git-site-role@apache.org To: commits@hbase.apache.org Date: Tue, 18 Apr 2017 14:58:44 -0000 Message-Id: In-Reply-To: <36835e1ce0d249df8f8d4d723db32835@git.apache.org> References: <36835e1ce0d249df8f8d4d723db32835@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [06/51] [partial] hbase-site git commit: Published site at 82d554e3783372cc6b05489452c815b57c06f6cd. archived-at: Tue, 18 Apr 2017 14:58:47 -0000 http://git-wip-us.apache.org/repos/asf/hbase-site/blob/2fcc2ae0/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionServerMonitor.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionServerMonitor.html b/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionServerMonitor.html index 88dff07..af5536f 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionServerMonitor.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionServerMonitor.html @@ -77,16 +77,16 @@ 069import org.apache.hadoop.hbase.TableName; 070import org.apache.hadoop.hbase.TableNotEnabledException; 071import org.apache.hadoop.hbase.TableNotFoundException; -072import org.apache.hadoop.hbase.client.Admin; -073import org.apache.hadoop.hbase.client.Connection; -074import org.apache.hadoop.hbase.client.ConnectionFactory; -075import org.apache.hadoop.hbase.client.Get; -076import org.apache.hadoop.hbase.client.Put; -077import org.apache.hadoop.hbase.client.RegionLocator; -078import org.apache.hadoop.hbase.client.ResultScanner; -079import org.apache.hadoop.hbase.client.Scan; -080import org.apache.hadoop.hbase.client.Table; -081import org.apache.hadoop.hbase.client.Scan.ReadType; +072import org.apache.hadoop.hbase.classification.InterfaceAudience; +073import org.apache.hadoop.hbase.client.Admin; +074import org.apache.hadoop.hbase.client.Connection; +075import org.apache.hadoop.hbase.client.ConnectionFactory; +076import org.apache.hadoop.hbase.client.Get; +077import org.apache.hadoop.hbase.client.Put; +078import org.apache.hadoop.hbase.client.RegionLocator; +079import org.apache.hadoop.hbase.client.ResultScanner; +080import org.apache.hadoop.hbase.client.Scan; +081import org.apache.hadoop.hbase.client.Table; 082import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 083import org.apache.hadoop.hbase.tool.Canary.RegionTask.TaskType; 084import org.apache.hadoop.hbase.util.Bytes; @@ -118,1387 +118,1345 @@ 110 * 3. zookeeper mode - for each zookeeper instance, selects a zNode and 111 * outputs some information about failure or latency. 112 */ -113public final class Canary implements Tool { -114 // Sink interface used by the canary to outputs information -115 public interface Sink { -116 public long getReadFailureCount(); -117 public long incReadFailureCount(); -118 public void publishReadFailure(HRegionInfo region, Exception e); -119 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e); -120 public void updateReadFailedHostList(HRegionInfo region, String serverName); -121 public Map<String,String> getReadFailures(); -122 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); -123 public long getWriteFailureCount(); -124 public void publishWriteFailure(HRegionInfo region, Exception e); -125 public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e); -126 public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime); -127 public void updateWriteFailedHostList(HRegionInfo region, String serverName); -128 public Map<String,String> getWriteFailures(); -129 } -130 // new extended sink for output regionserver mode info -131 // do not change the Sink interface directly due to maintaining the API -132 public interface ExtendedSink extends Sink { -133 public void publishReadFailure(String table, String server); -134 public void publishReadTiming(String table, String server, long msTime); -135 } -136 -137 // Simple implementation of canary sink that allows to plot on -138 // file or standard output timings or failures. -139 public static class StdOutSink implements Sink { -140 private AtomicLong readFailureCount = new AtomicLong(0), -141 writeFailureCount = new AtomicLong(0); -142 -143 private Map<String, String> readFailures = new ConcurrentHashMap<>(); -144 private Map<String, String> writeFailures = new ConcurrentHashMap<>(); -145 -146 @Override -147 public long getReadFailureCount() { -148 return readFailureCount.get(); -149 } -150 -151 @Override -152 public long incReadFailureCount() { -153 return readFailureCount.incrementAndGet(); -154 } -155 -156 @Override -157 public void publishReadFailure(HRegionInfo region, Exception e) { -158 readFailureCount.incrementAndGet(); -159 LOG.error(String.format("read from region %s failed", region.getRegionNameAsString()), e); -160 } -161 -162 @Override -163 public void publishReadFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { -164 readFailureCount.incrementAndGet(); -165 LOG.error(String.format("read from region %s column family %s failed", -166 region.getRegionNameAsString(), column.getNameAsString()), e); -167 } -168 -169 @Override -170 public void updateReadFailedHostList(HRegionInfo region, String serverName) { -171 readFailures.put(region.getRegionNameAsString(), serverName); -172 } -173 -174 @Override -175 public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { -176 LOG.info(String.format("read from region %s column family %s in %dms", -177 region.getRegionNameAsString(), column.getNameAsString(), msTime)); -178 } -179 -180 @Override -181 public Map<String, String> getReadFailures() { -182 return readFailures; -183 } -184 -185 @Override -186 public Map<String, String> getWriteFailures() { -187 return writeFailures; -188 } -189 -190 @Override -191 public long getWriteFailureCount() { -192 return writeFailureCount.get(); -193 } -194 -195 @Override -196 public void publishWriteFailure(HRegionInfo region, Exception e) { -197 writeFailureCount.incrementAndGet(); -198 LOG.error(String.format("write to region %s failed", region.getRegionNameAsString()), e); -199 } -200 -201 @Override -202 public void publishWriteFailure(HRegionInfo region, HColumnDescriptor column, Exception e) { -203 writeFailureCount.incrementAndGet(); -204 LOG.error(String.format("write to region %s column family %s failed", -205 region.getRegionNameAsString(), column.getNameAsString()), e); -206 } -207 -208 @Override -209 public void publishWriteTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { -210 LOG.info(String.format("write to region %s column family %s in %dms", -211 region.getRegionNameAsString(), column.getNameAsString(), msTime)); -212 } -213 -214 @Override -215 public void updateWriteFailedHostList(HRegionInfo region, String serverName) { -216 writeFailures.put(region.getRegionNameAsString(), serverName); -217 } -218 -219 } -220 // a ExtendedSink implementation -221 public static class RegionServerStdOutSink extends StdOutSink implements ExtendedSink { -222 -223 @Override -224 public void publishReadFailure(String table, String server) { -225 incReadFailureCount(); -226 LOG.error(String.format("Read from table:%s on region server:%s", table, server)); -227 } -228 -229 @Override -230 public void publishReadTiming(String table, String server, long msTime) { -231 LOG.info(String.format("Read from table:%s on region server:%s in %dms", -232 table, server, msTime)); -233 } -234 } -235 -236 public static class ZookeeperStdOutSink extends StdOutSink implements ExtendedSink { -237 @Override public void publishReadFailure(String zNode, String server) { -238 incReadFailureCount(); -239 LOG.error(String.format("Read from zNode:%s on zookeeper instance:%s", zNode, server)); -240 } -241 -242 @Override public void publishReadTiming(String znode, String server, long msTime) { -243 LOG.info(String.format("Read from zNode:%s on zookeeper instance:%s in %dms", -244 znode, server, msTime)); -245 } -246 } -247 -248 static class ZookeeperTask implements Callable<Void> { -249 private final Connection connection; -250 private final String host; -251 private String znode; -252 private final int timeout; -253 private ZookeeperStdOutSink sink; -254 -255 public ZookeeperTask(Connection connection, String host, String znode, int timeout, -256 ZookeeperStdOutSink sink) { -257 this.connection = connection; -258 this.host = host; -259 this.znode = znode; -260 this.timeout = timeout; -261 this.sink = sink; -262 } -263 -264 @Override public Void call() throws Exception { -265 ZooKeeper zooKeeper = null; -266 try { -267 zooKeeper = new ZooKeeper(host, timeout, EmptyWatcher.instance); -268 Stat exists = zooKeeper.exists(znode, false); -269 StopWatch stopwatch = new StopWatch(); -270 stopwatch.start(); -271 zooKeeper.getData(znode, false, exists); -272 stopwatch.stop(); -273 sink.publishReadTiming(znode, host, stopwatch.getTime()); -274 } catch (KeeperException | InterruptedException e) { -275 sink.publishReadFailure(znode, host); -276 } finally { -277 if (zooKeeper != null) { -278 zooKeeper.close(); -279 } -280 } -281 return null; -282 } -283 } -284 -285 /** -286 * For each column family of the region tries to get one row and outputs the latency, or the -287 * failure. -288 */ -289 static class RegionTask implements Callable<Void> { -290 public enum TaskType{ -291 READ, WRITE -292 } -293 private Connection connection; -294 private HRegionInfo region; -295 private Sink sink; -296 private TaskType taskType; -297 private boolean rawScanEnabled; -298 private ServerName serverName; -299 -300 RegionTask(Connection connection, HRegionInfo region, ServerName serverName, Sink sink, -301 TaskType taskType, boolean rawScanEnabled) { -302 this.connection = connection; -303 this.region = region; -304 this.serverName = serverName; -305 this.sink = sink; -306 this.taskType = taskType; -307 this.rawScanEnabled = rawScanEnabled; -308 } -309 -310 @Override -311 public Void call() { -312 switch (taskType) { -313 case READ: -314 return read(); -315 case WRITE: -316 return write(); -317 default: -318 return read(); -319 } -320 } -321 -322 public Void read() { -323 Table table = null; -324 HTableDescriptor tableDesc = null; -325 try { -326 if (LOG.isDebugEnabled()) { -327 LOG.debug(String.format("reading table descriptor for table %s", -328 region.getTable())); -329 } -330 table = connection.getTable(region.getTable()); -331 tableDesc = table.getTableDescriptor(); -332 } catch (IOException e) { -333 LOG.debug("sniffRegion failed", e); -334 sink.publishReadFailure(region, e); -335 if (table != null) { -336 try { -337 table.close(); -338 } catch (IOException ioe) { -339 LOG.error("Close table failed", e); -340 } -341 } -342 return null; -343 } -344 -345 byte[] startKey = null; -346 Get get = null; -347 Scan scan = null; -348 ResultScanner rs = null; -349 StopWatch stopWatch = new StopWatch(); -350 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { -351 stopWatch.reset(); -352 startKey = region.getStartKey(); -353 // Can't do a get on empty start row so do a Scan of first element if any instead. -354 if (startKey.length > 0) { -355 get = new Get(startKey); -356 get.setCacheBlocks(false); -357 get.setFilter(new FirstKeyOnlyFilter()); -358 get.addFamily(column.getName()); -359 } else { -360 scan = new Scan(); -361 if (LOG.isDebugEnabled()) { -362 LOG.debug(String.format("rawScan : %s for table: %s", rawScanEnabled, -363 tableDesc.getTableName())); -364 } -365 scan.setRaw(rawScanEnabled); -366 scan.setCaching(1); -367 scan.setCacheBlocks(false); -368 scan.setFilter(new FirstKeyOnlyFilter()); -369 scan.addFamily(column.getName()); -370 scan.setMaxResultSize(1L); -371 scan.setOneRowLimit(); -372 } -373 -374 if (LOG.isDebugEnabled()) { -375 LOG.debug(String.format("reading from table %s region %s column family %s and key %s", -376 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(), -377 Bytes.toStringBinary(startKey))); -378 } -379 try { -380 stopWatch.start(); -381 if (startKey.length > 0) { -382 table.get(get); -383 } else { -384 rs = table.getScanner(scan); -385 rs.next(); -386 } -387 stopWatch.stop(); -388 sink.publishReadTiming(region, column, stopWatch.getTime()); -389 } catch (Exception e) { -390 sink.publishReadFailure(region, column, e); -391 sink.updateReadFailedHostList(region, serverName.getHostname()); -392 } finally { -393 if (rs != null) { -394 rs.close(); -395 } -396 scan = null; -397 get = null; -398 startKey = null; -399 } -400 } -401 try { -402 table.close(); -403 } catch (IOException e) { -404 LOG.error("Close table failed", e); -405 } -406 return null; -407 } -408 -409 /** -410 * Check writes for the canary table -411 * @return -412 */ -413 private Void write() { -414 Table table = null; -415 HTableDescriptor tableDesc = null; -416 try { -417 table = connection.getTable(region.getTable()); -418 tableDesc = table.getTableDescriptor(); -419 byte[] rowToCheck = region.getStartKey(); -420 if (rowToCheck.length == 0) { -421 rowToCheck = new byte[]{0x0}; -422 } -423 int writeValueSize = -424 connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10); -425 for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { -426 Put put = new Put(rowToCheck); -427 byte[] value = new byte[writeValueSize]; -428 Bytes.random(value); -429 put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value); -430 -431 if (LOG.isDebugEnabled()) { -432 LOG.debug(String.format("writing to table %s region %s column family %s and key %s", -433 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(), -434 Bytes.toStringBinary(rowToCheck))); -435 } -436 try { -437 long startTime = System.currentTimeMillis(); -438 table.put(put); -439 long time = System.currentTimeMillis() - startTime; -440 sink.publishWriteTiming(region, column, time); -441 } catch (Exception e) { -442 sink.publishWriteFailure(region, column, e); -443 } -444 } -445 table.close(); -446 } catch (IOException e) { -447 sink.publishWriteFailure(region, e); -448 sink.updateWriteFailedHostList(region, serverName.getHostname()); -449 } -450 return null; -451 } -452 } -453 -454 /** -455 * Get one row from a region on the regionserver and outputs the latency, or the failure. -456 */ -457 static class RegionServerTask implements Callable<Void> { -458 private Connection connection; -459 private String serverName; -460 private HRegionInfo region; -461 private ExtendedSink sink; -462 private AtomicLong successes; -463 -464 RegionServerTask(Connection connection, String serverName, HRegionInfo region, -465 ExtendedSink sink, AtomicLong successes) { -466 this.connection = connection; -467 this.serverName = serverName; -468 this.region = region; -469 this.sink = sink; -470 this.successes = successes; -471 } -472 -473 @Override -474 public Void call() { -475 TableName tableName = null; -476 Table table = null; -477 Get get = null; -478 byte[] startKey = null; -479 Scan scan = null; -480 StopWatch stopWatch = new StopWatch(); -481 // monitor one region on every region server -482 stopWatch.reset(); -483 try { -484 tableName = region.getTable(); -485 table = connection.getTable(tableName); -486 startKey = region.getStartKey(); -487 // Can't do a get on empty start row so do a Scan of first element if any instead. -488 if (LOG.isDebugEnabled()) { -489 LOG.debug(String.format("reading from region server %s table %s region %s and key %s", -490 serverName, region.getTable(), region.getRegionNameAsString(), -491 Bytes.toStringBinary(startKey))); -492 } -493 if (startKey.length > 0) { -494 get = new Get(startKey); -495 get.setCacheBlocks(false); -496 get.setFilter(new FirstKeyOnlyFilter()); -497 stopWatch.start(); -498 table.get(get); -499 stopWatch.stop(); -500 } else { -501 scan = new Scan(); -502 scan.setCacheBlocks(false); -503 scan.setFilter(new FirstKeyOnlyFilter()); -504 scan.setCaching(1); -505 scan.setMaxResultSize(1L); -506 scan.setOneRowLimit(); -507 stopWatch.start(); -508 ResultScanner s = table.getScanner(scan); -509 s.next(); -510 s.close(); -511 stopWatch.stop(); -512 } -513 successes.incrementAndGet(); -514 sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime()); -515 } catch (TableNotFoundException tnfe) { -516 LOG.error("Table may be deleted", tnfe); -517 // This is ignored because it doesn't imply that the regionserver is dead -518 } catch (TableNotEnabledException tnee) { -519 // This is considered a success since we got a response. -520 successes.incrementAndGet(); -521 LOG.debug("The targeted table was disabled. Assuming success."); -522 } catch (DoNotRetryIOException dnrioe) { -523 sink.publishReadFailure(tableName.getNameAsString(), serverName); -524 LOG.error(dnrioe); -525 } catch (IOException e) { -526 sink.publishReadFailure(tableName.getNameAsString(), serverName); -527 LOG.error(e); -528 } finally { -529 if (table != null) { -530 try { -531 table.close(); -532 } catch (IOException e) {/* DO NOTHING */ -533 LOG.error("Close table failed", e); -534 } -535 } -536 scan = null; -537 get = null; -538 startKey = null; -539 } -540 return null; -541 } -542 } -543 -544 private static final int USAGE_EXIT_CODE = 1; -545 private static final int INIT_ERROR_EXIT_CODE = 2; -546 private static final int TIMEOUT_ERROR_EXIT_CODE = 3; -547 private static final int ERROR_EXIT_CODE = 4; -548 private static final int FAILURE_EXIT_CODE = 5; -549 -550 private static final long DEFAULT_INTERVAL = 6000; -551 -552 private static final long DEFAULT_TIMEOUT = 600000; // 10 mins -553 private static final int MAX_THREADS_NUM = 16; // #threads to contact regions -554 -555 private static final Log LOG = LogFactory.getLog(Canary.class); -556 -557 public static final TableName DEFAULT_WRITE_TABLE_NAME = TableName.valueOf( -558 NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR, "canary"); -559 -560 private static final String CANARY_TABLE_FAMILY_NAME = "Test"; -561 -562 private Configuration conf = null; -563 private long interval = 0; -564 private Sink sink = null; -565 -566 private boolean useRegExp; -567 private long timeout = DEFAULT_TIMEOUT; -568 private boolean failOnError = true; -569 private boolean regionServerMode = false; -570 private boolean zookeeperMode = false; -571 private boolean regionServerAllRegions = false; -572 private boolean writeSniffing = false; -573 private boolean treatFailureAsError = false; -574 private TableName writeTableName = DEFAULT_WRITE_TABLE_NAME; -575 -576 private ExecutorService executor; // threads to retrieve data from regionservers -577 -578 public Canary() { -579 this(new ScheduledThreadPoolExecutor(1), new RegionServerStdOutSink()); -580 } -581 -582 public Canary(ExecutorService executor, Sink sink) { -583 this.executor = executor; -584 this.sink = sink; -585 } -586 -587 @Override -588 public Configuration getConf() { -589 return conf; -590 } -591 -592 @Override -593 public void setConf(Configuration conf) { -594 this.conf = conf; -595 } -596 -597 private int parseArgs(String[] args) { -598 int index = -1; -599 // Process command line args -600 for (int i = 0; i < args.length; i++) { -601 String cmd = args[i]; -602 -603 if (cmd.startsWith("-")) { -604 if (index >= 0) { -605 // command line args must be in the form: [opts] [table 1 [table 2 ...]] -606 System.err.println("Invalid command line options"); -607 printUsageAndExit(); -608 } -609 -610 if (cmd.equals("-help")) { -611 // user asked for help, print the help and quit. -612 printUsageAndExit(); -613 } else if (cmd.equals("-daemon") && interval == 0) { -614 // user asked for daemon mode, set a default interval between checks -615 interval = DEFAULT_INTERVAL; -616 } else if (cmd.equals("-interval")) { -617 // user has specified an interval for canary breaths (-interval N) -618 i++; -619 -620 if (i == args.length) { -621 System.err.println("-interval needs a numeric value argument."); -622 printUsageAndExit(); -623 } -624 -625 try { -626 interval = Long.parseLong(args[i]) * 1000; -627 } catch (NumberFormatException e) { -628 System.err.println("-interval needs a numeric value argument."); -629 printUsageAndExit(); -630 } -631 } else if (cmd.equals("-zookeeper")) { -632 this.zookeeperMode = true; -633 } else if(cmd.equals("-regionserver")) { -634 this.regionServerMode = true; -635 } else if(cmd.equals("-allRegions")) { -636 this.regionServerAllRegions = true; -637 } else if(cmd.equals("-writeSniffing")) { -638 this.writeSniffing = true; -639 } else if(cmd.equals("-treatFailureAsError")) { -640 this.treatFailureAsError = true; -641 } else if (cmd.equals("-e")) { -642 this.useRegExp = true; -643 } else if (cmd.equals("-t")) { -644 i++; -645 -646 if (i == args.length) { -647 System.err.println("-t needs a numeric value argument."); -648 printUsageAndExit(); -649 } -650 -651 try { -652 this.timeout = Long.parseLong(args[i]); -653 } catch (NumberFormatException e) { -654 System.err.println("-t needs a numeric value argument."); -655 printUsageAndExit(); -656 } -657 } else if (cmd.equals("-writeTable")) { -658 i++; -659 -660 if (i == args.length) { -661 System.err.println("-writeTable needs a string value argument."); -662 printUsageAndExit(); -663 } -664 this.writeTableName = TableName.valueOf(args[i]); -665 } else if (cmd.equals("-f")) { -666 i++; -667 -668 if (i == args.length) { -669 System.err -670 .println("-f needs a boolean value argument (true|false)."); -671 printUsageAndExit(); -672 } -673 -674 this.failOnError = Boolean.parseBoolean(args[i]); -675 } else { -676 // no options match -677 System.err.println(cmd + " options is invalid."); -678 printUsageAndExit(); -679 } -680 } else if (index < 0) { -681 // keep track of first table name specified by the user -682 index = i; -683 } -684 } -685 if (this.regionServerAllRegions && !this.regionServerMode) { -686 System.err.println("-allRegions can only be specified in regionserver mode."); -687 printUsageAndExit(); -688 } -689 if (this.zookeeperMode) { -690 if (this.regionServerMode || this.regionServerAllRegions || this.writeSniffing) { -691 System.err.println("-zookeeper is exclusive and cannot be combined with " -692 + "other modes."); -693 printUsageAndExit(); -694 } -695 } -696 return index; -697 } -698 -699 @Override -700 public int run(String[] args) throws Exception { -701 int index = parseArgs(args); -702 ChoreService choreService = null; -703 -704 // Launches chore for refreshing kerberos credentials if security is enabled. -705 // Please see http://hbase.apache.org/book.html#_running_canary_in_a_kerberos_enabled_cluster -706 // for more details. -707 final ScheduledChore authChore = AuthUtil.getAuthChore(conf); -708 if (authChore != null) { -709 choreService = new ChoreService("CANARY_TOOL"); -710 choreService.scheduleChore(authChore); -711 } -712 -713 // Start to prepare the stuffs -714 Monitor monitor = null; -715 Thread monitorThread = null; -716 long startTime = 0; -717 long currentTimeLength = 0; -718 // Get a connection to use in below. -719 try (Connection connection = ConnectionFactory.createConnection(this.conf)) { -720 do { -721 // Do monitor !! -722 try { -723 monitor = this.newMonitor(connection, index, args); -724 monitorThread = new Thread(monitor, "CanaryMonitor-" + System.currentTimeMillis()); -725 startTime = System.currentTimeMillis(); -726 monitorThread.start(); -727 while (!monitor.isDone()) { -728 // wait for 1 sec -729 Thread.sleep(1000); -730 // exit if any error occurs -731 if (this.failOnError && monitor.hasError()) { -732 monitorThread.interrupt(); -733 if (monitor.initialized) { -734 return monitor.errorCode; -735 } else { -736 return INIT_ERROR_EXIT_CODE; -737 } -738 } -739 currentTimeLength = System.currentTimeMillis() - startTime; -740 if (currentTimeLength > this.timeout) { -741 LOG.error("The monitor is running too long (" + currentTimeLength -742 + ") after timeout limit:" + this.timeout -743 + " will be killed itself !!"); -744 if (monitor.initialized) { -745 return TIMEOUT_ERROR_EXIT_CODE; -746 } else { -747 return INIT_ERROR_EXIT_CODE; -748 } -749 } -750 } -751 -752 if (this.failOnError && monitor.finalCheckForErrors()) { -753 monitorThread.interrupt(); -754 return monitor.errorCode; -755 } -756 } finally { -757 if (monitor != null) monitor.close(); -758 } -759 -760 Thread.sleep(interval); -761 } while (interval > 0); -762 } // try-with-resources close -763 -764 if (choreService != null) { -765 choreService.shutdown(); -766 } -767 return monitor.errorCode; -768 } -769 -770 public Map<String, String> getReadFailures() { -771 return sink.getReadFailures(); -772 } -773 -774 public Map<String, String> getWriteFailures() { -775 return sink.getWriteFailures(); -776 } -777 -778 private void printUsageAndExit() { -779 System.err.printf( -780 "Usage: hbase %s [opts] [table1 [table2]...] | [regionserver1 [regionserver2]..]%n", -781 getClass().getName()); -782 System.err.println(" where [opts] are:"); -783 System.err.println(" -help Show this help and exit."); -784 System.err.println(" -regionserver replace the table argument to regionserver,"); -785 System.err.println(" which means to enable regionserver mode"); -786 System.err.println(" -allRegions Tries all regions on a regionserver,"); -787 System.err.println(" only works in regionserver mode."); -788 System.err.println(" -zookeeper Tries to grab zookeeper.znode.parent "); -789 System.err.println(" on each zookeeper instance"); -790 System.err.println(" -daemon Continuous check at defined intervals."); -791 System.err.println(" -interval <N> Interval between checks (sec)"); -792 System.err.println(" -e Use table/regionserver as regular expression"); -793 System.err.println(" which means the table/regionserver is regular expression pattern"); -794 System.err.println(" -f <B> stop whole program if first error occurs," + -795 " default is true"); -796 System.err.println(" -t <N> timeout for a check, default is 600000 (milisecs)"); -797 System.err.println(" -writeSniffing enable the write sniffing in canary"); -798 System.err.println(" -treatFailureAsError treats read / write failure as error"); -799 System.err.println(" -writeTable The table used for write sniffing." -800 + " Default is hbase:canary"); -801 System.err.println(" -Dhbase.canary.read.raw.enabled=<true/false> Use this flag to enable or disable raw scan during read canary test" -802 + " Default is false and raw is not enabled during scan"); -803 System.err -804 .println(" -D<configProperty>=<value> assigning or override the configuration params"); -805 System.exit(USAGE_EXIT_CODE); -806 } -807 -808 /** -809 * A Factory method for {@link Monitor}. -810 * Can be overridden by user. -811 * @param index a start index for monitor target -812 * @param args args passed from user -813 * @return a Monitor instance -814 */ -815 public Monitor newMonitor(final Connection connection, int index, String[] args) { -816 Monitor monitor = null; -817 String[] monitorTargets = null; -818 -819 if(index >= 0) { -820 int length = args.length - index; -821 monitorTargets = new String[length]; -822 System.arraycopy(args, index, monitorTargets, 0, length); -823 } -824 -825 if (this.regionServerMode) { -826 monitor = -827 new RegionServerMonitor(connection, monitorTargets, this.useRegExp, -828 (ExtendedSink) this.sink, this.executor, this.regionServerAllRegions, -829 this.treatFailureAsError); -830 } else if (this.zookeeperMode) { -831 monitor = -832 new ZookeeperMonitor(connection, monitorTargets, this.useRegExp, -833 (ZookeeperStdOutSink) this.sink, this.executor, this.treatFailureAsError); -834 } else { -835 monitor = -836 new RegionMonitor(connection, monitorTargets, this.useRegExp, this.sink, this.executor, -837 this.writeSniffing, this.writeTableName, this.treatFailureAsError); -838 } -839 return monitor; -840 } -841 -842 // a Monitor super-class can be extended by users -843 public static abstract class Monitor implements Runnable, Closeable { -844 -845 protected Connection connection; -846 protected Admin admin; -847 protected String[] targets; -848 protected boolean useRegExp; -849 protected boolean treatFailureAsError; -850 protected boolean initialized = false; -851 -852 protected boolean done = false; -853 protected int errorCode = 0; -854 protected Sink sink; -855 protected ExecutorService executor; -856 -857 public boolean isDone() { -858 return done; -859 } -860 -861 public boolean hasError() { -862 return errorCode != 0; -863 } -864 -865 public boolean finalCheckForErrors() { -866 if (errorCode != 0) { -867 return true; -868 } -869 if (treatFailureAsError && -870 (sink.getReadFailureCount() > 0 || sink.getWriteFailureCount() > 0)) { -871 errorCode = FAILURE_EXIT_CODE; -872 return true; -873 } -874 return false; -875 } -876 -877 @Override -878 public void close() throws IOException { -879 if (this.admin != null) this.admin.close(); -880 } -881 -882 protected Monitor(Connection connection, String[] monitorTargets, boolean useRegExp, Sink sink, -883 ExecutorService executor, boolean treatFailureAsError) { -884 if (null == connection) throw new IllegalArgumentException("connection shall not be null"); -885 -886 this.connection = connection; -887 this.targets = monitorTargets; -888 this.useRegExp = useRegExp; -889 this.treatFailureAsError = treatFailureAsError; -890 this.sink = sink; -891 this.executor = executor; -892 } -893 -894 @Override -895 public abstract void run(); -896 -897 protected boolean initAdmin() { -898 if (null == this.admin) { -899 try { -900 this.admin = this.connection.getAdmin(); -901 } catch (Exception e) { -902 LOG.error("Initial HBaseAdmin failed...", e); -903 this.errorCode = INIT_ERROR_EXIT_CODE; -904 } -905 } else if (admin.isAborted()) { -906 LOG.error("HBaseAdmin aborted"); -907 this.errorCode = INIT_ERROR_EXIT_CODE; -908 } -909 return !this.hasError(); -910 } -911 } -912 -913 // a monitor for region mode -914 private static class RegionMonitor extends Monitor { -915 // 10 minutes -916 private static final int DEFAULT_WRITE_TABLE_CHECK_PERIOD = 10 * 60 * 1000; -917 // 1 days -918 private static final int DEFAULT_WRITE_DATA_TTL = 24 * 60 * 60; -919 -920 private long lastCheckTime = -1; -921 private boolean writeSniffing; -922 private TableName writeTableName; -923 private int writeDataTTL; -924 private float regionsLowerLimit; -925 private float regionsUpperLimit; -926 private int checkPeriod; -927 private boolean rawScanEnabled; -928 -929 public RegionMonitor(Connection connection, String[] monitorTargets, boolean useRegExp, -930 Sink sink, ExecutorService executor, boolean writeSniffing, TableName writeTableName, -931 boolean treatFailureAsError) { -932 super(connection, monitorTargets, useRegExp, sink, executor, treatFailureAsError); -933 Configuration conf = connection.getConfiguration(); -934 this.writeSniffing = writeSniffing; -935 this.writeTableName = writeTableName; -936 this.writeDataTTL = -937 conf.getInt(HConstants.HBASE_CANARY_WRITE_DATA_TTL_KEY, DEFAULT_WRITE_DATA_TTL); -938 this.regionsLowerLimit = -939 conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_LOWERLIMIT_KEY, 1.0f); -940 this.regionsUpperLimit = -941 conf.getFloat(HConstants.HBASE_CANARY_WRITE_PERSERVER_REGIONS_UPPERLIMIT_KEY, 1.5f); -942 this.checkPeriod = -943 conf.getInt(HConstants.HBASE_CANARY_WRITE_TABLE_CHECK_PERIOD_KEY, -944 DEFAULT_WRITE_TABLE_CHECK_PERIOD); -945 this.rawScanEnabled = conf.getBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, false); -946 } -947 -948 @Override -949 public void run() { -950 if (this.initAdmin()) { -951 try { -952 List<Future<Void>> taskFutures = new LinkedList<>(); -953 if (this.targets != null && this.targets.length > 0) { -954 String[] tables = generateMonitorTables(this.targets); -955 this.initialized = true; -956 for (String table : tables) { -957 taskFutures.addAll(Canary.sniff(admin, sink, table, executor, TaskType.READ, -958 this.rawScanEnabled)); -959 } -960 } else { -961 taskFutures.addAll(sniff(TaskType.READ)); -962 } -963 -964 if (writeSniffing) { -965 if (EnvironmentEdgeManager.currentTime() - lastCheckTime > checkPeriod) { -966 try { -967 checkWriteTableDistribution(); -968 } catch (IOException e) { -969 LOG.error("Check canary table distribution failed!", e); -970 } -971 lastCheckTime = EnvironmentEdgeManager.currentTime(); -972 } -973 // sniff canary table with write operation -974 taskFutures.addAll(Canary.sniff(admin, sink, admin.getTableDescriptor(writeTableName), -975 executor, TaskType.WRITE, this.rawScanEnabled)); -976 } -977 -978 for (Future<Void> future : taskFutures) { -979 try { -980 future.get(); -981 } catch (ExecutionException e) { -982 LOG.error("Sniff region failed!", e); -983 } -984 } -985 } catch (Exception e) { -986 LOG.error("Run regionMonitor failed", e); -987 this.errorCode = ERROR_EXIT_CODE; -988 } -989 } -990 this.done = true; -991 } -992 -993 private String[] generateMonitorTables(String[] monitorTargets) throws IOException { -994 String[] returnTables = null; -995