Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 0294EF865 for ; Sat, 6 Apr 2013 06:07:19 +0000 (UTC) Received: (qmail 45067 invoked by uid 500); 6 Apr 2013 06:07:18 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 44999 invoked by uid 500); 6 Apr 2013 06:07:18 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 44856 invoked by uid 99); 6 Apr 2013 06:07:18 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Apr 2013 06:07:18 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 06 Apr 2013 06:07:14 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 043732388ABA; Sat, 6 Apr 2013 06:06:16 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1465198 [9/41] - in /hbase/hbase.apache.org/trunk: ./ css/ hbase-assembly/ images/ xref-test/ xref-test/org/apache/hadoop/hbase/ xref-test/org/apache/hadoop/hbase/client/ xref-test/org/apache/hadoop/hbase/client/replication/ xref-test/org/... Date: Sat, 06 Apr 2013 06:06:09 -0000 To: commits@hbase.apache.org From: stack@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130406060616.043732388ABA@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IngestIntegrationTestBase.html URL: http://svn.apache.org/viewvc/hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IngestIntegrationTestBase.html?rev=1465198&r1=1465197&r2=1465198&view=diff ============================================================================== --- hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IngestIntegrationTestBase.html (original) +++ hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IngestIntegrationTestBase.html Sat Apr 6 06:06:07 2013 @@ -34,103 +34,108 @@ 24 25 import org.apache.commons.logging.Log; 26 import org.apache.commons.logging.LogFactory; -27 import org.apache.hadoop.hbase.util.Bytes; -28 import org.apache.hadoop.hbase.util.LoadTestTool; -29 -30 /** -31 * A base class for tests that do something with the cluster while running -32 * {@link LoadTestTool} to write and verify some data. -33 */ -34 public abstract class IngestIntegrationTestBase { -35 private static String tableName = null; -36 -37 /** A soft limit on how long we should run */ -38 private static final String RUN_TIME_KEY = "hbase.%s.runtime"; -39 -40 protected static final Log LOG = LogFactory.getLog(IngestIntegrationTestBase.class); -41 protected IntegrationTestingUtility util; -42 protected HBaseCluster cluster; -43 private LoadTestTool loadTool; -44 -45 protected void setUp(int numSlavesBase) throws Exception { -46 tableName = this.getClass().getSimpleName(); -47 util = new IntegrationTestingUtility(); -48 LOG.info("Initializing cluster with " + numSlavesBase + " servers"); -49 util.initializeCluster(numSlavesBase); -50 LOG.info("Done initializing cluster"); -51 cluster = util.getHBaseClusterInterface(); -52 deleteTableIfNecessary(); -53 loadTool = new LoadTestTool(); -54 loadTool.setConf(util.getConfiguration()); -55 // Initialize load test tool before we start breaking things; -56 // LoadTestTool init, even when it is a no-op, is very fragile. -57 int ret = loadTool.run(new String[] { "-tn", tableName, "-init_only" }); -58 Assert.assertEquals("Failed to initialize LoadTestTool", 0, ret); -59 } -60 -61 protected void tearDown() throws Exception { -62 LOG.info("Restoring the cluster"); -63 util.restoreCluster(); -64 LOG.info("Done restoring the cluster"); -65 } -66 -67 private void deleteTableIfNecessary() throws IOException { -68 if (util.getHBaseAdmin().tableExists(tableName)) { -69 util.deleteTable(Bytes.toBytes(tableName)); -70 } -71 } -72 -73 protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, -74 int colsPerKey, int recordSize, int writeThreads) throws Exception { -75 LOG.info("Running ingest"); -76 LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize()); +27 import org.apache.hadoop.conf.Configuration; +28 import org.apache.hadoop.hbase.util.Bytes; +29 import org.apache.hadoop.hbase.util.LoadTestTool; +30 +31 /** +32 * A base class for tests that do something with the cluster while running +33 * {@link LoadTestTool} to write and verify some data. +34 */ +35 public abstract class IngestIntegrationTestBase { +36 private static String tableName = null; +37 +38 /** A soft limit on how long we should run */ +39 private static final String RUN_TIME_KEY = "hbase.%s.runtime"; +40 +41 protected static final Log LOG = LogFactory.getLog(IngestIntegrationTestBase.class); +42 protected IntegrationTestingUtility util; +43 protected HBaseCluster cluster; +44 private LoadTestTool loadTool; +45 +46 protected void setUp(int numSlavesBase, Configuration conf) throws Exception { +47 tableName = this.getClass().getSimpleName(); +48 util = (conf == null) ? new IntegrationTestingUtility() : new IntegrationTestingUtility(conf); +49 LOG.info("Initializing cluster with " + numSlavesBase + " servers"); +50 util.initializeCluster(numSlavesBase); +51 LOG.info("Done initializing cluster"); +52 cluster = util.getHBaseClusterInterface(); +53 deleteTableIfNecessary(); +54 loadTool = new LoadTestTool(); +55 loadTool.setConf(util.getConfiguration()); +56 // Initialize load test tool before we start breaking things; +57 // LoadTestTool init, even when it is a no-op, is very fragile. +58 int ret = loadTool.run(new String[] { "-tn", tableName, "-init_only" }); +59 Assert.assertEquals("Failed to initialize LoadTestTool", 0, ret); +60 } +61 +62 protected void setUp(int numSlavesBase) throws Exception { +63 setUp(numSlavesBase, null); +64 } +65 +66 protected void tearDown() throws Exception { +67 LOG.info("Restoring the cluster"); +68 util.restoreCluster(); +69 LOG.info("Done restoring the cluster"); +70 } +71 +72 private void deleteTableIfNecessary() throws IOException { +73 if (util.getHBaseAdmin().tableExists(tableName)) { +74 util.deleteTable(Bytes.toBytes(tableName)); +75 } +76 } 77 -78 long start = System.currentTimeMillis(); -79 String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName()); -80 long runtime = util.getConfiguration().getLong(runtimeKey, defaultRunTime); -81 long startKey = 0; +78 protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, +79 int colsPerKey, int recordSize, int writeThreads) throws Exception { +80 LOG.info("Running ingest"); +81 LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize()); 82 -83 long numKeys = getNumKeys(keysPerServerPerIter); -84 while (System.currentTimeMillis() - start < 0.9 * runtime) { -85 LOG.info("Intended run time: " + (runtime/60000) + " min, left:" + -86 ((runtime - (System.currentTimeMillis() - start))/60000) + " min"); +83 long start = System.currentTimeMillis(); +84 String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName()); +85 long runtime = util.getConfiguration().getLong(runtimeKey, defaultRunTime); +86 long startKey = 0; 87 -88 int ret = loadTool.run(new String[] { -89 "-tn", tableName, -90 "-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), -91 "-start_key", String.valueOf(startKey), -92 "-num_keys", String.valueOf(numKeys), -93 "-skip_init" -94 }); -95 if (0 != ret) { -96 String errorMsg = "Load failed with error code " + ret; -97 LOG.error(errorMsg); -98 Assert.fail(errorMsg); -99 } -100 -101 ret = loadTool.run(new String[] { -102 "-tn", tableName, -103 "-read", "100:20", -104 "-start_key", String.valueOf(startKey), -105 "-num_keys", String.valueOf(numKeys), -106 "-skip_init" -107 }); -108 if (0 != ret) { -109 String errorMsg = "Verification failed with error code " + ret; -110 LOG.error(errorMsg); -111 Assert.fail(errorMsg); -112 } -113 startKey += numKeys; -114 } -115 } -116 -117 /** Estimates a data size based on the cluster size */ -118 private long getNumKeys(int keysPerServer) -119 throws IOException { -120 int numRegionServers = cluster.getClusterStatus().getServersSize(); -121 return keysPerServer * numRegionServers; -122 } -123 } +88 long numKeys = getNumKeys(keysPerServerPerIter); +89 while (System.currentTimeMillis() - start < 0.9 * runtime) { +90 LOG.info("Intended run time: " + (runtime/60000) + " min, left:" + +91 ((runtime - (System.currentTimeMillis() - start))/60000) + " min"); +92 +93 int ret = loadTool.run(new String[] { +94 "-tn", tableName, +95 "-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), +96 "-start_key", String.valueOf(startKey), +97 "-num_keys", String.valueOf(numKeys), +98 "-skip_init" +99 }); +100 if (0 != ret) { +101 String errorMsg = "Load failed with error code " + ret; +102 LOG.error(errorMsg); +103 Assert.fail(errorMsg); +104 } +105 +106 ret = loadTool.run(new String[] { +107 "-tn", tableName, +108 "-read", "100:20", +109 "-start_key", String.valueOf(startKey), +110 "-num_keys", String.valueOf(numKeys), +111 "-skip_init" +112 }); +113 if (0 != ret) { +114 String errorMsg = "Verification failed with error code " + ret; +115 LOG.error(errorMsg); +116 Assert.fail(errorMsg); +117 } +118 startKey += numKeys; +119 } +120 } +121 +122 /** Estimates a data size based on the cluster size */ +123 private long getNumKeys(int keysPerServer) +124 throws IOException { +125 int numRegionServers = cluster.getClusterStatus().getServersSize(); +126 return keysPerServer * numRegionServers; +127 } +128 }
Modified: hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.html URL: http://svn.apache.org/viewvc/hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.html?rev=1465198&r1=1465197&r2=1465198&view=diff ============================================================================== --- hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.html (original) +++ hbase/hbase.apache.org/trunk/xref-test/org/apache/hadoop/hbase/IntegrationTestRebalanceAndKillServersTargeted.html Sat Apr 6 06:06:07 2013 @@ -37,102 +37,106 @@ 27 28 import org.apache.commons.logging.Log; 29 import org.apache.commons.logging.LogFactory; -30 import org.apache.hadoop.hbase.util.Bytes; -31 import org.apache.hadoop.hbase.util.ChaosMonkey; -32 import org.apache.hadoop.hbase.util.Pair; -33 import org.apache.hadoop.hbase.util.ChaosMonkey.Action; -34 import org.junit.After; -35 import org.junit.Before; -36 import org.junit.Test; -37 import org.junit.Ignore; -38 import org.junit.experimental.categories.Category; -39 -40 /** -41 * A system test which does large data ingestion and verify using {@link LoadTestTool}, -42 * while introducing chaos by hoarding many regions into few servers (unbalancing), then -43 * killing some of these servers, and triggering balancer. -44 * It's configured using a set of constants on top, which cover this scenario and are -45 * reasonable for minicluster. See constants if you want to tweak the test. -46 * You can configure how long the test should run by using -47 * "hbase.IntegrationTestRebalanceAndKillServersTargeted.runtime" configuration parameter, -48 * which is probably most useful on cluster. -49 */ -50 @Category(IntegrationTests.class) -51 public class IntegrationTestRebalanceAndKillServersTargeted extends IngestIntegrationTestBase { -52 private static final int NUM_SLAVES_BASE = 4; // number of slaves for the smallest cluster -53 private static final long DEFAULT_RUN_TIME = 5 * 60 * 1000; // run for 5 min by default -54 -55 /** How often to introduce the chaos. If too frequent, sequence of kills on minicluster -56 * can cause test to fail when Put runs out of retries. */ -57 private static final long CHAOS_EVERY_MS = 65 * 1000; -58 -59 private ChaosMonkey monkey; +30 import org.apache.hadoop.conf.Configuration; +31 import org.apache.hadoop.hbase.client.HConnectionManager; +32 import org.apache.hadoop.hbase.util.Bytes; +33 import org.apache.hadoop.hbase.util.ChaosMonkey; +34 import org.apache.hadoop.hbase.util.Pair; +35 import org.apache.hadoop.hbase.util.ChaosMonkey.Action; +36 import org.junit.After; +37 import org.junit.Before; +38 import org.junit.Test; +39 import org.junit.Ignore; +40 import org.junit.experimental.categories.Category; +41 +42 /** +43 * A system test which does large data ingestion and verify using {@link LoadTestTool}, +44 * while introducing chaos by hoarding many regions into few servers (unbalancing), then +45 * killing some of these servers, and triggering balancer. +46 * It's configured using a set of constants on top, which cover this scenario and are +47 * reasonable for minicluster. See constants if you want to tweak the test. +48 * You can configure how long the test should run by using +49 * "hbase.IntegrationTestRebalanceAndKillServersTargeted.runtime" configuration parameter, +50 * which is probably most useful on cluster. +51 */ +52 @Category(IntegrationTests.class) +53 public class IntegrationTestRebalanceAndKillServersTargeted extends IngestIntegrationTestBase { +54 private static final int NUM_SLAVES_BASE = 4; // number of slaves for the smallest cluster +55 private static final long DEFAULT_RUN_TIME = 5 * 60 * 1000; // run for 5 min by default +56 +57 /** How often to introduce the chaos. If too frequent, sequence of kills on minicluster +58 * can cause test to fail when Put runs out of retries. */ +59 private static final long CHAOS_EVERY_MS = 65 * 1000; 60 -61 /** This action is too specific to put in ChaosMonkey; put it here */ -62 static class UnbalanceKillAndRebalanceAction extends ChaosMonkey.Action { -63 /** Fractions of servers to get regions and live and die respectively; from all other -64 * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */ -65 private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1; -66 private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1; -67 private static final double HOARD_FRC_OF_REGIONS = 0.8; -68 /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance -69 * and restarting the servers; to make sure these events have time to impact the cluster. */ -70 private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000; -71 private static final long WAIT_FOR_KILLS_MS = 2 * 1000; -72 private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000; -73 -74 @Override -75 protected void perform() throws Exception { -76 ClusterStatus status = this.cluster.getClusterStatus(); -77 List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers()); -78 int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); -79 int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); -80 Assert.assertTrue((liveCount + deadCount) < victimServers.size()); -81 List<ServerName> targetServers = new ArrayList<ServerName>(liveCount); -82 for (int i = 0; i < liveCount + deadCount; ++i) { -83 int victimIx = random.nextInt(victimServers.size()); -84 targetServers.add(victimServers.remove(victimIx)); -85 } -86 unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); -87 Thread.sleep(WAIT_FOR_UNBALANCE_MS); -88 for (int i = 0; i < liveCount; ++i) { -89 killRs(targetServers.get(i)); -90 } -91 Thread.sleep(WAIT_FOR_KILLS_MS); -92 forceBalancer(); -93 Thread.sleep(WAIT_AFTER_BALANCE_MS); -94 for (int i = 0; i < liveCount; ++i) { -95 startRs(targetServers.get(i)); -96 } -97 } -98 } -99 -100 @Before -101 @SuppressWarnings("unchecked") -102 public void setUp() throws Exception { -103 super.setUp(NUM_SLAVES_BASE); -104 -105 ChaosMonkey.Policy chaosPolicy = new ChaosMonkey.PeriodicRandomActionPolicy( -106 CHAOS_EVERY_MS, new UnbalanceKillAndRebalanceAction()); -107 monkey = new ChaosMonkey(util, chaosPolicy); -108 monkey.start(); -109 } -110 -111 @After -112 public void tearDown() throws Exception { -113 if (monkey != null) { -114 monkey.stop("tearDown"); -115 monkey.waitForStop(); -116 } -117 super.tearDown(); -118 } -119 -120 // Disabled until we fix hbase-7520 -121 @Test -122 public void testDataIngest() throws Exception { -123 runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 100, 20); -124 } -125 } +61 private ChaosMonkey monkey; +62 +63 /** This action is too specific to put in ChaosMonkey; put it here */ +64 static class UnbalanceKillAndRebalanceAction extends ChaosMonkey.Action { +65 /** Fractions of servers to get regions and live and die respectively; from all other +66 * servers, HOARD_FRC_OF_REGIONS will be removed to the above randomly */ +67 private static final double FRC_SERVERS_THAT_HOARD_AND_LIVE = 0.1; +68 private static final double FRC_SERVERS_THAT_HOARD_AND_DIE = 0.1; +69 private static final double HOARD_FRC_OF_REGIONS = 0.8; +70 /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance +71 * and restarting the servers; to make sure these events have time to impact the cluster. */ +72 private static final long WAIT_FOR_UNBALANCE_MS = 2 * 1000; +73 private static final long WAIT_FOR_KILLS_MS = 2 * 1000; +74 private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000; +75 +76 @Override +77 protected void perform() throws Exception { +78 ClusterStatus status = this.cluster.getClusterStatus(); +79 List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers()); +80 int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); +81 int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); +82 Assert.assertTrue((liveCount + deadCount) < victimServers.size()); +83 List<ServerName> targetServers = new ArrayList<ServerName>(liveCount); +84 for (int i = 0; i < liveCount + deadCount; ++i) { +85 int victimIx = random.nextInt(victimServers.size()); +86 targetServers.add(victimServers.remove(victimIx)); +87 } +88 unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); +89 Thread.sleep(WAIT_FOR_UNBALANCE_MS); +90 for (int i = 0; i < liveCount; ++i) { +91 killRs(targetServers.get(i)); +92 } +93 Thread.sleep(WAIT_FOR_KILLS_MS); +94 forceBalancer(); +95 Thread.sleep(WAIT_AFTER_BALANCE_MS); +96 for (int i = 0; i < liveCount; ++i) { +97 startRs(targetServers.get(i)); +98 } +99 } +100 } +101 +102 @Before +103 @SuppressWarnings("unchecked") +104 public void setUp() throws Exception { +105 Configuration conf = HBaseConfiguration.create(); +106 conf.set(HConnectionManager.RETRIES_BY_SERVER, "true"); +107 super.setUp(NUM_SLAVES_BASE, conf); +108 +109 ChaosMonkey.Policy chaosPolicy = new ChaosMonkey.PeriodicRandomActionPolicy( +110 CHAOS_EVERY_MS, new UnbalanceKillAndRebalanceAction()); +111 monkey = new ChaosMonkey(util, chaosPolicy); +112 monkey.start(); +113 } +114 +115 @After +116 public void tearDown() throws Exception { +117 if (monkey != null) { +118 monkey.stop("tearDown"); +119 monkey.waitForStop(); +120 } +121 super.tearDown(); +122 } +123 +124 // Disabled until we fix hbase-7520 +125 @Test +126 public void testDataIngest() throws Exception { +127 runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 100, 20); +128 } +129 }