From commits-return-79316-archive-asf-public=cust-asf.ponee.io@hbase.apache.org Wed Oct 17 16:53:30 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id CAA1B1807A5 for ; Wed, 17 Oct 2018 16:53:25 +0200 (CEST) Received: (qmail 76153 invoked by uid 500); 17 Oct 2018 14:53:24 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 75726 invoked by uid 99); 17 Oct 2018 14:53:24 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 17 Oct 2018 14:53:23 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 35FCEE11C3; Wed, 17 Oct 2018 14:53:23 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: git-site-role@apache.org To: commits@hbase.apache.org Date: Wed, 17 Oct 2018 14:53:35 -0000 Message-Id: <73ff78c8cc1e4e669c1a61dd6932add7@git.apache.org> In-Reply-To: <2db56e24a8bd4f429c1f853dd561abb7@git.apache.org> References: <2db56e24a8bd4f429c1f853dd561abb7@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [14/35] hbase-site git commit: Published site at 8cc56bd18c40ba9a7131336e97c74f8d97d8b2be. http://git-wip-us.apache.org/repos/asf/hbase-site/blob/713132a3/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionStdOutSink.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionStdOutSink.html b/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionStdOutSink.html index ce887a2..506bc5c 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionStdOutSink.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/tool/Canary.RegionStdOutSink.html @@ -98,1529 +98,1560 @@ 090import org.apache.hadoop.util.GenericOptionsParser; 091import org.apache.hadoop.util.Tool; 092import org.apache.hadoop.util.ToolRunner; -093import org.apache.yetus.audience.InterfaceAudience; -094import org.apache.zookeeper.KeeperException; -095import org.apache.zookeeper.ZooKeeper; -096import org.apache.zookeeper.client.ConnectStringParser; -097import org.apache.zookeeper.data.Stat; -098import org.slf4j.Logger; -099import org.slf4j.LoggerFactory; -100 -101import org.apache.hbase.thirdparty.com.google.common.collect.Lists; -102 -103/** -104 * HBase Canary Tool, that that can be used to do -105 * "canary monitoring" of a running HBase cluster. +093import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; +094import org.apache.yetus.audience.InterfaceAudience; +095import org.apache.zookeeper.KeeperException; +096import org.apache.zookeeper.ZooKeeper; +097import org.apache.zookeeper.client.ConnectStringParser; +098import org.apache.zookeeper.data.Stat; +099import org.slf4j.Logger; +100import org.slf4j.LoggerFactory; +101 +102import org.apache.hbase.thirdparty.com.google.common.collect.Lists; +103 +104/** +105 * HBase Canary Tool for "canary monitoring" of a running HBase cluster. 106 * -107 * Here are three modes -108 * 1. region mode - Foreach region tries to get one row per column family -109 * and outputs some information about failure or latency. -110 * -111 * 2. regionserver mode - Foreach regionserver tries to get one row from one table -112 * selected randomly and outputs some information about failure or latency. -113 * -114 * 3. zookeeper mode - for each zookeeper instance, selects a zNode and -115 * outputs some information about failure or latency. -116 */ -117@InterfaceAudience.Private -118public final class Canary implements Tool { -119 // Sink interface used by the canary to outputs information -120 public interface Sink { -121 public long getReadFailureCount(); -122 public long incReadFailureCount(); -123 public Map<String,String> getReadFailures(); -124 public void updateReadFailures(String regionName, String serverName); -125 public long getWriteFailureCount(); -126 public long incWriteFailureCount(); -127 public Map<String,String> getWriteFailures(); -128 public void updateWriteFailures(String regionName, String serverName); -129 } -130 -131 // Simple implementation of canary sink that allows to plot on -132 // file or standard output timings or failures. -133 public static class StdOutSink implements Sink { -134 private AtomicLong readFailureCount = new AtomicLong(0), -135 writeFailureCount = new AtomicLong(0); -136 -137 private Map<String, String> readFailures = new ConcurrentHashMap<>(); -138 private Map<String, String> writeFailures = new ConcurrentHashMap<>(); -139 -140 @Override -141 public long getReadFailureCount() { -142 return readFailureCount.get(); -143 } -144 -145 @Override -146 public long incReadFailureCount() { -147 return readFailureCount.incrementAndGet(); -148 } -149 -150 @Override -151 public Map<String, String> getReadFailures() { -152 return readFailures; -153 } -154 -155 @Override -156 public void updateReadFailures(String regionName, String serverName) { -157 readFailures.put(regionName, serverName); -158 } -159 -160 @Override -161 public long getWriteFailureCount() { -162 return writeFailureCount.get(); -163 } -164 -165 @Override -166 public long incWriteFailureCount() { -167 return writeFailureCount.incrementAndGet(); -168 } -169 -170 @Override -171 public Map<String, String> getWriteFailures() { -172 return writeFailures; -173 } -174 -175 @Override -176 public void updateWriteFailures(String regionName, String serverName) { -177 writeFailures.put(regionName, serverName); -178 } -179 } -180 -181 public static class RegionServerStdOutSink extends StdOutSink { -182 -183 public void publishReadFailure(String table, String server) { -184 incReadFailureCount(); -185 LOG.error(String.format("Read from table:%s on region server:%s", table, server)); -186 } +107 * There are three modes: +108 * <ol> +109 * <li>region mode (Default): For each region, try to get one row per column family outputting +110 * information on failure (ERROR) or else the latency. +111 * </li> +112 * +113 * <li>regionserver mode: For each regionserver try to get one row from one table selected +114 * randomly outputting information on failure (ERROR) or else the latency. +115 * </li> +116 * +117 * <li>zookeeper mode: for each zookeeper instance, selects a znode outputting information on +118 * failure (ERROR) or else the latency. +119 * </li> +120 * </ol> +121 */ +122@InterfaceAudience.Private +123public final class Canary implements Tool { +124 /** +125 * Sink interface used by the canary to output information +126 */ +127 public interface Sink { +128 long getReadFailureCount(); +129 long incReadFailureCount(); +130 Map<String,String> getReadFailures(); +131 void updateReadFailures(String regionName, String serverName); +132 long getWriteFailureCount(); +133 long incWriteFailureCount(); +134 Map<String,String> getWriteFailures(); +135 void updateWriteFailures(String regionName, String serverName); +136 } +137 +138 /** +139 * Simple implementation of canary sink that allows plotting to a file or standard output. +140 */ +141 public static class StdOutSink implements Sink { +142 private AtomicLong readFailureCount = new AtomicLong(0), +143 writeFailureCount = new AtomicLong(0); +144 private Map<String, String> readFailures = new ConcurrentHashMap<>(); +145 private Map<String, String> writeFailures = new ConcurrentHashMap<>(); +146 +147 @Override +148 public long getReadFailureCount() { +149 return readFailureCount.get(); +150 } +151 +152 @Override +153 public long incReadFailureCount() { +154 return readFailureCount.incrementAndGet(); +155 } +156 +157 @Override +158 public Map<String, String> getReadFailures() { +159 return readFailures; +160 } +161 +162 @Override +163 public void updateReadFailures(String regionName, String serverName) { +164 readFailures.put(regionName, serverName); +165 } +166 +167 @Override +168 public long getWriteFailureCount() { +169 return writeFailureCount.get(); +170 } +171 +172 @Override +173 public long incWriteFailureCount() { +174 return writeFailureCount.incrementAndGet(); +175 } +176 +177 @Override +178 public Map<String, String> getWriteFailures() { +179 return writeFailures; +180 } +181 +182 @Override +183 public void updateWriteFailures(String regionName, String serverName) { +184 writeFailures.put(regionName, serverName); +185 } +186 } 187 -188 public void publishReadTiming(String table, String server, long msTime) { -189 LOG.info(String.format("Read from table:%s on region server:%s in %dms", -190 table, server, msTime)); -191 } -192 } -193 -194 public static class ZookeeperStdOutSink extends StdOutSink { -195 -196 public void publishReadFailure(String zNode, String server) { -197 incReadFailureCount(); -198 LOG.error(String.format("Read from zNode:%s on zookeeper instance:%s", zNode, server)); +188 /** +189 * By RegionServer, for 'regionserver' mode. +190 */ +191 public static class RegionServerStdOutSink extends StdOutSink { +192 public void publishReadFailure(String table, String server) { +193 incReadFailureCount(); +194 LOG.error("Read from {} on {}", table, server); +195 } +196 +197 public void publishReadTiming(String table, String server, long msTime) { +198 LOG.info("Read from {} on {} in {}ms", table, server, msTime); 199 } -200 -201 public void publishReadTiming(String znode, String server, long msTime) { -202 LOG.info(String.format("Read from zNode:%s on zookeeper instance:%s in %dms", -203 znode, server, msTime)); -204 } -205 } -206 -207 public static class RegionStdOutSink extends StdOutSink { -208 -209 private Map<String, LongAdder> perTableReadLatency = new HashMap<>(); -210 private LongAdder writeLatency = new LongAdder(); -211 -212 public void publishReadFailure(ServerName serverName, RegionInfo region, Exception e) { -213 incReadFailureCount(); -214 LOG.error(String.format("read from region %s on regionserver %s failed", region.getRegionNameAsString(), serverName), e); -215 } -216 -217 public void publishReadFailure(ServerName serverName, RegionInfo region, ColumnFamilyDescriptor column, Exception e) { -218 incReadFailureCount(); -219 LOG.error(String.format("read from region %s on regionserver %s column family %s failed", -220 region.getRegionNameAsString(), serverName, column.getNameAsString()), e); -221 } +200 } +201 +202 /** +203 * Output for 'zookeeper' mode. +204 */ +205 public static class ZookeeperStdOutSink extends StdOutSink { +206 public void publishReadFailure(String znode, String server) { +207 incReadFailureCount(); +208 LOG.error("Read from {} on {}", znode, server); +209 } +210 +211 public void publishReadTiming(String znode, String server, long msTime) { +212 LOG.info("Read from {} on {} in {}ms", znode, server, msTime); +213 } +214 } +215 +216 /** +217 * By Region, for 'region' mode. +218 */ +219 public static class RegionStdOutSink extends StdOutSink { +220 private Map<String, LongAdder> perTableReadLatency = new HashMap<>(); +221 private LongAdder writeLatency = new LongAdder(); 222 -223 public void publishReadTiming(ServerName serverName, RegionInfo region, ColumnFamilyDescriptor column, long msTime) { -224 LOG.info(String.format("read from region %s on regionserver %s column family %s in %dms", -225 region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime)); +223 public void publishReadFailure(ServerName serverName, RegionInfo region, Exception e) { +224 incReadFailureCount(); +225 LOG.error("Read from {} on {} failed", region.getRegionNameAsString(), serverName, e); 226 } 227 -228 public void publishWriteFailure(ServerName serverName, RegionInfo region, Exception e) { -229 incWriteFailureCount(); -230 LOG.error(String.format("write to region %s on regionserver %s failed", region.getRegionNameAsString(), serverName), e); -231 } -232 -233 public void publishWriteFailure(ServerName serverName, RegionInfo region, ColumnFamilyDescriptor column, Exception e) { -234 incWriteFailureCount(); -235 LOG.error(String.format("write to region %s on regionserver %s column family %s failed", -236 region.getRegionNameAsString(), serverName, column.getNameAsString()), e); -237 } -238 -239 public void publishWriteTiming(ServerName serverName, RegionInfo region, ColumnFamilyDescriptor column, long msTime) { -240 LOG.info(String.format("write to region %s on regionserver %s column family %s in %dms", -241 region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime)); -242 } -243 -244 public Map<String, LongAdder> getReadLatencyMap() { -245 return this.perTableReadLatency; -246 } -247 -248 public LongAdder initializeAndGetReadLatencyForTable(String tableName) { -249 LongAdder initLatency = new LongAdder(); -250 this.perTableReadLatency.put(tableName, initLatency); -251 return initLatency; -252 } -253 -254 public void initializeWriteLatency() { -255 this.writeLatency.reset(); -256 } -257 -258 public LongAdder getWriteLatency() { -259 return this.writeLatency; -260 } -261 } +228 public void publishReadFailure(ServerName serverName, RegionInfo region, +229 ColumnFamilyDescriptor column, Exception e) { +230 incReadFailureCount(); +231 LOG.error("Read from {} on {} {} failed", region.getRegionNameAsString(), serverName, +232 column.getNameAsString(), e); +233 } +234 +235 public void publishReadTiming(ServerName serverName, RegionInfo region, +236 ColumnFamilyDescriptor column, long msTime) { +237 LOG.info("Read from {} on {} {} in {}ms", region.getRegionNameAsString(), serverName, +238 column.getNameAsString(), msTime); +239 } +240 +241 public void publishWriteFailure(ServerName serverName, RegionInfo region, Exception e) { +242 incWriteFailureCount(); +243 LOG.error("Write to {} on {} failed", region.getRegionNameAsString(), serverName, e); +244 } +245 +246 public void publishWriteFailure(ServerName serverName, RegionInfo region, +247 ColumnFamilyDescriptor column, Exception e) { +248 incWriteFailureCount(); +249 LOG.error("Write to {} on {} {} failed", region.getRegionNameAsString(), serverName, +250 column.getNameAsString(), e); +251 } +252 +253 public void publishWriteTiming(ServerName serverName, RegionInfo region, +254 ColumnFamilyDescriptor column, long msTime) { +255 LOG.info("Write to {} on {} {} in {}ms", +256 region.getRegionNameAsString(), serverName, column.getNameAsString(), msTime); +257 } +258 +259 public Map<String, LongAdder> getReadLatencyMap() { +260 return this.perTableReadLatency; +261 } 262 -263 static class ZookeeperTask implements Callable<Void> { -264 private final Connection connection; -265 private final String host; -266 private String znode; -267 private final int timeout; -268 private ZookeeperStdOutSink sink; -269 -270 public ZookeeperTask(Connection connection, String host, String znode, int timeout, -271 ZookeeperStdOutSink sink) { -272 this.connection = connection; -273 this.host = host; -274 this.znode = znode; -275 this.timeout = timeout; -276 this.sink = sink; -277 } -278 -279 @Override public Void call() throws Exception { -280 ZooKeeper zooKeeper = null; -281 try { -282 zooKeeper = new ZooKeeper(host, timeout, EmptyWatcher.instance); -283 Stat exists = zooKeeper.exists(znode, false); -284 StopWatch stopwatch = new StopWatch(); -285 stopwatch.start(); -286 zooKeeper.getData(znode, false, exists); -287 stopwatch.stop(); -288 sink.publishReadTiming(znode, host, stopwatch.getTime()); -289 } catch (KeeperException | InterruptedException e) { -290 sink.publishReadFailure(znode, host); -291 } finally { -292 if (zooKeeper != null) { -293 zooKeeper.close(); -294 } -295 } -296 return null; -297 } -298 } -299 -300 /** -301 * For each column family of the region tries to get one row and outputs the latency, or the -302 * failure. -303 */ -304 static class RegionTask implements Callable<Void> { -305 public enum TaskType{ -306 READ, WRITE -307 } -308 private Connection connection; -309 private RegionInfo region; -310 private RegionStdOutSink sink; -311 private TaskType taskType; -312 private boolean rawScanEnabled; -313 private ServerName serverName; -314 private LongAdder readWriteLatency; -315 -316 RegionTask(Connection connection, RegionInfo region, ServerName serverName, RegionStdOutSink sink, -317 TaskType taskType, boolean rawScanEnabled, LongAdder rwLatency) { -318 this.connection = connection; -319 this.region = region; -320 this.serverName = serverName; -321 this.sink = sink; -322 this.taskType = taskType; -323 this.rawScanEnabled = rawScanEnabled; -324 this.readWriteLatency = rwLatency; +263 public LongAdder initializeAndGetReadLatencyForTable(String tableName) { +264 LongAdder initLatency = new LongAdder(); +265 this.perTableReadLatency.put(tableName, initLatency); +266 return initLatency; +267 } +268 +269 public void initializeWriteLatency() { +270 this.writeLatency.reset(); +271 } +272 +273 public LongAdder getWriteLatency() { +274 return this.writeLatency; +275 } +276 } +277 +278 /** +279 * Run a single zookeeper Task and then exit. +280 */ +281 static class ZookeeperTask implements Callable<Void> { +282 private final Connection connection; +283 private final String host; +284 private String znode; +285 private final int timeout; +286 private ZookeeperStdOutSink sink; +287 +288 public ZookeeperTask(Connection connection, String host, String znode, int timeout, +289 ZookeeperStdOutSink sink) { +290 this.connection = connection; +291 this.host = host; +292 this.znode = znode; +293 this.timeout = timeout; +294 this.sink = sink; +295 } +296 +297 @Override public Void call() throws Exception { +298 ZooKeeper zooKeeper = null; +299 try { +300 zooKeeper = new ZooKeeper(host, timeout, EmptyWatcher.instance); +301 Stat exists = zooKeeper.exists(znode, false); +302 StopWatch stopwatch = new StopWatch(); +303 stopwatch.start(); +304 zooKeeper.getData(znode, false, exists); +305 stopwatch.stop(); +306 sink.publishReadTiming(znode, host, stopwatch.getTime()); +307 } catch (KeeperException | InterruptedException e) { +308 sink.publishReadFailure(znode, host); +309 } finally { +310 if (zooKeeper != null) { +311 zooKeeper.close(); +312 } +313 } +314 return null; +315 } +316 } +317 +318 /** +319 * Run a single Region Task and then exit. For each column family of the Region, get one row and +320 * output latency or failure. +321 */ +322 static class RegionTask implements Callable<Void> { +323 public enum TaskType{ +324 READ, WRITE 325 } -326 -327 @Override -328 public Void call() { -329 switch (taskType) { -330 case READ: -331 return read(); -332 case WRITE: -333 return write(); -334 default: -335 return read(); -336 } -337 } -338 -339 public Void read() { -340 Table table = null; -341 TableDescriptor tableDesc = null; -342 try { -343 if (LOG.isDebugEnabled()) { -344 LOG.debug(String.format("reading table descriptor for table %s", -345 region.getTable())); -346 } -347 table = connection.getTable(region.getTable()); -348 tableDesc = table.getDescriptor(); -349 } catch (IOException e) { -350 LOG.debug("sniffRegion failed", e); -351 sink.publishReadFailure(serverName, region, e); -352 if (table != null) { -353 try { -354 table.close(); -355 } catch (IOException ioe) { -356 LOG.error("Close table failed", e); -357 } -358 } -359 return null; -360 } -361 -362 byte[] startKey = null; -363 Get get = null; -364 Scan scan = null; -365 ResultScanner rs = null; -366 StopWatch stopWatch = new StopWatch(); -367 for (ColumnFamilyDescriptor column : tableDesc.getColumnFamilies()) { -368 stopWatch.reset(); -369 startKey = region.getStartKey(); -370 // Can't do a get on empty start row so do a Scan of first element if any instead. -371 if (startKey.length > 0) { -372 get = new Get(startKey); -373 get.setCacheBlocks(false); -374 get.setFilter(new FirstKeyOnlyFilter()); -375 get.addFamily(column.getName()); -376 } else { -377 scan = new Scan(); -378 if (LOG.isDebugEnabled()) { -379 LOG.debug(String.format("rawScan : %s for table: %s", rawScanEnabled, -380 tableDesc.getTableName())); -381 } -382 scan.setRaw(rawScanEnabled); -383 scan.setCaching(1); -384 scan.setCacheBlocks(false); -385 scan.setFilter(new FirstKeyOnlyFilter()); -386 scan.addFamily(column.getName()); -387 scan.setMaxResultSize(1L); -388 scan.setOneRowLimit(); -389 } -390 -391 if (LOG.isDebugEnabled()) { -392 LOG.debug(String.format("reading from table %s region %s column family %s and key %s", -393 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(), -394 Bytes.toStringBinary(startKey))); -395 } -396 try { -397 stopWatch.start(); -398 if (startKey.length > 0) { -399 table.get(get); -400 } else { -401 rs = table.getScanner(scan); -402 rs.next(); -403 } -404 stopWatch.stop(); -405 this.readWriteLatency.add(stopWatch.getTime()); -406 sink.publishReadTiming(serverName, region, column, stopWatch.getTime()); -407 } catch (Exception e) { -408 sink.publishReadFailure(serverName, region, column, e); -409 sink.updateReadFailures(region.getRegionNameAsString(), serverName.getHostname()); -410 } finally { -411 if (rs != null) { -412 rs.close(); -413 } -414 scan = null; -415 get = null; -416 } -417 } -418 try { -419 table.close(); -420 } catch (IOException e) { -421 LOG.error("Close table failed", e); -422 } -423 return null; -424 } -425 -426 /** -427 * Check writes for the canary table -428 * @return -429 */ -430 private Void write() { -431 Table table = null; -432 TableDescriptor tableDesc = null; -433 try { -434 table = connection.getTable(region.getTable()); -435 tableDesc = table.getDescriptor(); -436 byte[] rowToCheck = region.getStartKey(); -437 if (rowToCheck.length == 0) { -438 rowToCheck = new byte[]{0x0}; -439 } -440 int writeValueSize = -441 connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10); -442 for (ColumnFamilyDescriptor column : tableDesc.getColumnFamilies()) { -443 Put put = new Put(rowToCheck); -444 byte[] value = new byte[writeValueSize]; -445 Bytes.random(value); -446 put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value); -447 -448 if (LOG.isDebugEnabled()) { -449 LOG.debug(String.format("writing to table %s region %s column family %s and key %s", -450 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(), -451 Bytes.toStringBinary(rowToCheck))); -452 } -453 try { -454 long startTime = System.currentTimeMillis(); -455 table.put(put); -456 long time = System.currentTimeMillis() - startTime; -457 this.readWriteLatency.add(time); -458 sink.publishWriteTiming(serverName, region, column, time); -459 } catch (Exception e) { -460 sink.publishWriteFailure(serverName, region, column, e); -461 } -462 } -463 table.close(); -464 } catch (IOException e) { -465 sink.publishWriteFailure(serverName, region, e); -466 sink.updateWriteFailures(region.getRegionNameAsString(), serverName.getHostname() ); -467 } -468 return null; -469 } -470 } -471 -472 /** -473 * Get one row from a region on the regionserver and outputs the latency, or the failure. -474 */ -475 static class RegionServerTask implements Callable<Void> { -476 private Connection connection; -477 private String serverName; -478 private RegionInfo region; -479 private RegionServerStdOutSink sink; -480 private AtomicLong successes; -481 -482 RegionServerTask(Connection connection, String serverName, RegionInfo region, -483 RegionServerStdOutSink sink, AtomicLong successes) { -484 this.connection = connection; -485 this.serverName = serverName; -486 this.region = region; -487 this.sink = sink; -488 this.successes = successes; -489 } -490 -491 @Override -492 public Void call() { -493 TableName tableName = null; -494 Table table = null; -495 Get get = null; -496 byte[] startKey = null; -497 Scan scan = null; -498 StopWatch stopWatch = new StopWatch(); -499 // monitor one region on every region server -500 stopWatch.reset(); -501 try { -502 tableName = region.getTable(); -503 table = connection.getTable(tableName); -504 startKey = region.getStartKey(); -505 // Can't do a get on empty start row so do a Scan of first element if any instead. -506 if (LOG.isDebugEnabled()) { -507 LOG.debug(String.format("reading from region server %s table %s region %s and key %s", -508 serverName, region.getTable(), region.getRegionNameAsString(), -509 Bytes.toStringBinary(startKey))); -510 } -511 if (startKey.length > 0) { -512 get = new Get(startKey); -513 get.setCacheBlocks(false); -514 get.setFilter(new FirstKeyOnlyFilter()); -515 stopWatch.start(); -516 table.get(get); -517 stopWatch.stop(); -518 } else { -519 scan = new Scan(); -520 scan.setCacheBlocks(false); -521 scan.setFilter(new FirstKeyOnlyFilter()); -522 scan.setCaching(1); -523 scan.setMaxResultSize(1L); -524 scan.setOneRowLimit(); -525 stopWatch.start(); -526 ResultScanner s = table.getScanner(scan); -527 s.next(); -528 s.close(); -529 stopWatch.stop(); -530 } -531 successes.incrementAndGet(); -532 sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime()); -533 } catch (TableNotFoundException tnfe) { -534 LOG.error("Table may be deleted", tnfe); -535 // This is ignored because it doesn't imply that the regionserver is dead -536 } catch (TableNotEnabledException tnee) { -537 // This is considered a success since we got a response. -538 successes.incrementAndGet(); -539 LOG.debug("The targeted table was disabled. Assuming success."); -540 } catch (DoNotRetryIOException dnrioe) { -541 sink.publishReadFailure(tableName.getNameAsString(), serverName); -542 LOG.error(dnrioe.toString(), dnrioe); -543 } catch (IOException e) { -544 sink.publishReadFailure(tableName.getNameAsString(), serverName); -545 LOG.error(e.toString(), e); -546 } finally { -547 if (table != null) { -548 try { -549 table.close(); -550 } catch (IOException e) {/* DO NOTHING */ -551 LOG.error("Close table failed", e); -552 } -553 } -554 scan = null; -555 get = null; -556 startKey = null; -557 } -558 return null; -559 } -560 } -561 -562 private static final int USAGE_EXIT_CODE = 1; -563 private static final int INIT_ERROR_EXIT_CODE = 2; -564 private static final int TIMEOUT_ERROR_EXIT_CODE = 3; -565 private static final int ERROR_EXIT_CODE = 4; -566 private static final int FAILURE_EXIT_CODE = 5; -567 -568 private static final long DEFAULT_INTERVAL = 60000; -569 -570 private static final long DEFAULT_TIMEOUT = 600000; // 10 mins -571 private static final int MAX_THREADS_NUM = 16; // #threads to contact regions +326 private Connection connection; +327 private RegionInfo region; +328 private RegionStdOutSink sink; +329 private TaskType taskType; +330 private boolean rawScanEnabled; +331 private ServerName serverName; +332 private LongAdder readWriteLatency; +333 +334 RegionTask(Connection connection, RegionInfo region, ServerName serverName, +335 RegionStdOutSink sink, TaskType taskType, boolean rawScanEnabled, LongAdder rwLatency) { +336 this.connection = connection; +337 this.region = region; +338 this.serverName = serverName; +339 this.sink = sink; +340 this.taskType = taskType; +341 this.rawScanEnabled = rawScanEnabled; +342 this.readWriteLatency = rwLatency; +343 } +344 +345 @Override +346 public Void call() { +347 switch (taskType) { +348 case READ: +349 return read(); +350 case WRITE: +351 return write(); +352 default: +353 return read(); +354 } +355 } +356 +357 public Void read() { +358 Table table = null; +359 TableDescriptor tableDesc = null; +360 try { +361 LOG.debug("Reading table descriptor for table {}", region.getTable()); +362 table = connection.getTable(region.getTable()); +363 tableDesc = table.getDescriptor(); +364 } catch (IOException e) { +365 LOG.debug("sniffRegion {} of {} failed", region.getEncodedName(), e); +366 sink.publishReadFailure(serverName, region, e); +367 if (table != null) { +368 try { +369 table.close(); +370 } catch (IOException ioe) { +371 LOG.error("Close table failed", e); +372 } +373 } +374 return null; +375 } +376 +377 byte[] startKey = null; +378 Get get = null; +379 Scan scan = null; +380 ResultScanner rs = null; +381 StopWatch stopWatch = new StopWatch(); +382 for (ColumnFamilyDescriptor column : tableDesc.getColumnFamilies()) { +383 stopWatch.reset(); +384 startKey = region.getStartKey(); +385 // Can't do a get on empty start row so do a Scan of first element if any instead. +386 if (startKey.length > 0) { +387 get = new Get(startKey); +388 get.setCacheBlocks(false); +389 get.setFilter(new FirstKeyOnlyFilter()); +390 get.addFamily(column.getName()); +391 } else { +392 scan = new Scan(); +393 LOG.debug("rawScan {} for {}", rawScanEnabled, tableDesc.getTableName()); +394 scan.setRaw(rawScanEnabled); +395 scan.setCaching(1); +396 scan.setCacheBlocks(false); +397 scan.setFilter(new FirstKeyOnlyFilter()); +398 scan.addFamily(column.getName()); +399 scan.setMaxResultSize(1L); +400 scan.setOneRowLimit(); +401 } +402 LOG.debug("Reading from {} {} {} {}", tableDesc.getTableName(), +403 region.getRegionNameAsString(), column.getNameAsString(), +404 Bytes.toStringBinary(startKey)); +405 try { +406 stopWatch.start(); +407 if (startKey.length > 0) { +408 table.get(get); +409 } else { +410 rs = table.getScanner(scan); +411 rs.next(); +412 } +413 stopWatch.stop(); +414 this.readWriteLatency.add(stopWatch.getTime()); +415 sink.publishReadTiming(serverName, region, column, stopWatch.getTime()); +416 } catch (Exception e) { +417 sink.publishReadFailure(serverName, region, column, e); +418 sink.updateReadFailures(region.getRegionNameAsString(), serverName.getHostname()); +419 } finally { +420 if (rs != null) { +421 rs.close(); +422 } +423 scan = null; +424 get = null; +425 } +426 } +427 try { +428 table.close(); +429 } catch (IOException e) { +430 LOG.error("Close table failed", e); +431 } +432 return null; +433 } +434 +435 /** +436 * Check writes for the canary table +437 */ +438 private Void write() { +439 Table table = null; +440 TableDescriptor tableDesc = null; +441 try { +442 table = connection.getTable(region.getTable()); +443 tableDesc = table.getDescriptor(); +444 byte[] rowToCheck = region.getStartKey(); +445 if (rowToCheck.length == 0) { +446 rowToCheck = new byte[]{0x0}; +447 } +448 int writeValueSize = +449 connection.getConfiguration().getInt(HConstants.HBASE_CANARY_WRITE_VALUE_SIZE_KEY, 10); +450 for (ColumnFamilyDescriptor column : tableDesc.getColumnFamilies()) { +451 Put put = new Put(rowToCheck); +452 byte[] value = new byte[writeValueSize]; +453 Bytes.random(value); +454 put.addColumn(column.getName(), HConstants.EMPTY_BYTE_ARRAY, value); +455 +456 LOG.debug("Writing to {} {} {} {}", +457 tableDesc.getTableName(), region.getRegionNameAsString(), column.getNameAsString(), +458 Bytes.toStringBinary(rowToCheck)); +459 try { +460 long startTime = System.currentTimeMillis(); +461 table.put(put); +462 long time = System.currentTimeMillis() - startTime; +463 this.readWriteLatency.add(time); +464 sink.publishWriteTiming(serverName, region, column, time); +465 } catch (Exception e) { +466 sink.publishWriteFailure(serverName, region, column, e); +467 } +468 } +469 table.close(); +470 } catch (IOException e) { +471 sink.publishWriteFailure(serverName, region, e); +472 sink.updateWriteFailures(region.getRegionNameAsString(), serverName.getHostname() ); +473 } +474 return null; +475 } +476 } +477 +478 /** +479 * Run a single RegionServer Task and then exit. +480 * Get one row from a region on the regionserver and output latency or the failure. +481 */ +482 static class RegionServerTask implements Callable<Void> { +483 private Connection connection; +484 private String serverName; +485 private RegionInfo region; +486 private RegionServerStdOutSink sink; +487 private AtomicLong successes; +488 +489 RegionServerTask(Connection connection, String serverName, RegionInfo region, +490 RegionServerStdOutSink sink, AtomicLong successes) { +491 this.connection = connection; +492 this.serverName = serverName; +493 this.region = region; +494 this.sink = sink; +495 this.successes = successes; +496 } +497 +498 @Override +499 public Void call() { +500 TableName tableName = null; +501 Table table = null; +502 Get get = null; +503 byte[] startKey = null; +504 Scan scan = null; +505 StopWatch stopWatch = new StopWatch(); +506 // monitor one region on every region server +507 stopWatch.reset(); +508 try { +509 tableName = region.getTable(); +510 table = connection.getTable(tableName); +511 startKey = region.getStartKey(); +512 // Can't do a get on empty start row so do a Scan of first element if any instead. +513 LOG.debug("Reading from {} {} {} {}", +514 serverName, region.getTable(), region.getRegionNameAsString(), +515 Bytes.toStringBinary(startKey)); +516 if (startKey.length > 0) { +517 get = new Get(startKey); +518 get.setCacheBlocks(false); +519 get.setFilter(new FirstKeyOnlyFilter()); +520 stopWatch.start(); +521 table.get(get); +522 stopWatch.stop(); +523 } else { +524 scan = new Scan(); +525 scan.setCacheBlocks(false); +526 scan.setFilter(new FirstKeyOnlyFilter()); +527 scan.setCaching(1); +528 scan.setMaxResultSize(1L); +529 scan.setOneRowLimit(); +530 stopWatch.start(); +531 ResultScanner s = table.getScanner(scan); +532 s.next(); +533 s.close(); +534 stopWatch.stop(); +535 } +536 successes.incrementAndGet(); +537 sink.publishReadTiming(tableName.getNameAsString(), serverName, stopWatch.getTime()); +538 } catch (TableNotFoundException tnfe) { +539 LOG.error("Table may be deleted", tnfe); +540 // This is ignored because it doesn't imply that the regionserver is dead +541 } catch (TableNotEnabledException tnee) { +542 // This is considered a success since we got a response. +543 successes.incrementAndGet();