Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 96EBF99FA for ; Fri, 25 May 2012 16:54:32 +0000 (UTC) Received: (qmail 25742 invoked by uid 500); 25 May 2012 16:54:32 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 25707 invoked by uid 500); 25 May 2012 16:54:32 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 25699 invoked by uid 99); 25 May 2012 16:54:32 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 25 May 2012 16:54:32 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 25 May 2012 16:54:30 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 174F22388865 for ; Fri, 25 May 2012 16:54:10 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1342724 - in /hbase/trunk/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/master/handler/ main/java/org/apache/hadoop/hbase/protobuf/ test/java/org/apache/hadoop/hbase/master/ Date: Fri, 25 May 2012 16:54:09 -0000 To: commits@hbase.apache.org From: ramkrishna@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120525165410.174F22388865@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: ramkrishna Date: Fri May 25 16:54:09 2012 New Revision: 1342724 URL: http://svn.apache.org/viewvc?rev=1342724&view=rev Log: HBASE-6070 AM.nodeDeleted and SSH races creating problems for regions under SPLIT (Ramkrishna) Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1342724&r1=1342723&r2=1342724&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Fri May 25 16:54:09 2012 @@ -1145,7 +1145,7 @@ public class AssignmentManager extends Z RegionState rs = this.regionsInTransition.get(regionName); if (rs != null) { HRegionInfo regionInfo = rs.getRegion(); - if (rs.isSplitting() || rs.isSplit()) { + if (rs.isSplit()) { LOG.debug("Ephemeral node deleted, regionserver crashed?, " + "clearing from RIT; rs=" + rs); regionOffline(rs.getRegion()); Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1342724&r1=1342723&r2=1342724&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Fri May 25 16:54:09 2012 @@ -294,7 +294,7 @@ public class ServerShutdownHandler exten this.server.getCatalogTracker())) { ServerName addressFromAM = this.services.getAssignmentManager() .getRegionServerOfRegion(e.getKey()); - if (rit != null && !rit.isClosing() && !rit.isPendingClose()) { + if (rit != null && !rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) { // Skip regions that were in transition unless CLOSING or // PENDING_CLOSE LOG.info("Skip assigning region " + rit.toString()); @@ -307,6 +307,16 @@ public class ServerShutdownHandler exten } else { toAssignRegions.add(e.getKey()); } + } else if (rit != null && (rit.isSplitting() || rit.isSplit())) { + // This will happen when the RS went down and the call back for the SPLIITING or SPLIT + // has not yet happened for node Deleted event. In that case if the region was actually + // split + // but the RS had gone down before completing the split process then will not try to + // assign the parent region again. In that case we should make the region offline and + // also delete the region from RIT. + HRegionInfo region = rit.getRegion(); + AssignmentManager am = this.services.getAssignmentManager(); + am.regionOffline(region); } // If the table was partially disabled and the RS went down, we should clear the RIT // and remove the node for the region. Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java?rev=1342724&r1=1342723&r2=1342724&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java Fri May 25 16:54:09 2012 @@ -1039,6 +1039,7 @@ public final class ProtobufUtil { RequestConverter.buildGetRequest(regionName, get); try { GetResponse response = client.get(null, request); + if (response == null) return null; return toResult(response.getResult()); } catch (ServiceException se) { throw getRemoteException(se); Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java?rev=1342724&r1=1342723&r2=1342724&view=diff ============================================================================== --- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java (original) +++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/Mocking.java Fri May 25 16:54:09 2012 @@ -67,6 +67,22 @@ public class Mocking { Bytes.toBytes(sn.getStartcode()))); return new Result(kvs); } + + + /** + * @param sn + * ServerName to use making startcode and server in meta + * @param hri + * Region to serialize into HRegionInfo + * @return A mocked up Result that fakes a Get on a row in the .META. table. + * @throws IOException + */ + static Result getMetaTableRowResultAsSplitRegion(final HRegionInfo hri, final ServerName sn) + throws IOException { + hri.setOffline(true); + hri.setSplit(true); + return getMetaTableRowResult(hri, sn); + } /** * Fakes the regionserver-side zk transitions of a region open. Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java?rev=1342724&r1=1342723&r2=1342724&view=diff ============================================================================== --- hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (original) +++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java Fri May 25 16:54:09 2012 @@ -405,7 +405,7 @@ public class TestAssignmentManager { AssignmentManager am = new AssignmentManager(this.server, this.serverManager, ct, balancer, executor, null); try { - processServerShutdownHandler(ct, am); + processServerShutdownHandler(ct, am, false); } finally { executor.shutdown(); am.shutdown(); @@ -429,6 +429,70 @@ public class TestAssignmentManager { testCaseWithPartiallyDisabledState(Table.State.DISABLING); testCaseWithPartiallyDisabledState(Table.State.DISABLED); } + + + /** + * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS + * has actually completed the splitting in META but went down. See HBASE-6070 and also HBASE-5806 + * + * @throws KeeperException + * @throws IOException + */ + @Test + public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception { + // true indicates the region is split but still in RIT + testCaseWithSplitRegionPartial(true); + // false indicate the region is not split + testCaseWithSplitRegionPartial(false); + } + + private void testCaseWithSplitRegionPartial(boolean regionSplitDone) throws KeeperException, + IOException, NodeExistsException, InterruptedException, ServiceException { + // Create and startup an executor. This is used by AssignmentManager + // handling zk callbacks. + ExecutorService executor = startupMasterExecutor("testSSHWhenSplitRegionInProgress"); + + // We need a mocked catalog tracker. + CatalogTracker ct = Mockito.mock(CatalogTracker.class); + LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server.getConfiguration()); + // Create an AM. + AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server, + this.serverManager); + // adding region to regions and servers maps. + am.regionOnline(REGIONINFO, SERVERNAME_A); + // adding region in pending close. + am.regionsInTransition.put(REGIONINFO.getEncodedName(), new RegionState(REGIONINFO, + State.SPLITTING, System.currentTimeMillis(), SERVERNAME_A)); + am.getZKTable().setEnabledTable(REGIONINFO.getTableNameAsString()); + RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING, + REGIONINFO.getRegionName(), SERVERNAME_A); + String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName()); + // create znode in M_ZK_REGION_CLOSING state. + ZKUtil.createAndWatch(this.watcher, node, data.toByteArray()); + + try { + processServerShutdownHandler(ct, am, regionSplitDone); + // check znode deleted or not. + // In both cases the znode should be deleted. + + if (regionSplitDone) { + assertTrue("Region state of region in SPLITTING should be removed from rit.", + am.regionsInTransition.isEmpty()); + } else { + while (!am.assignInvoked) { + Thread.sleep(1); + } + assertTrue("Assign should be invoked.", am.assignInvoked); + } + } finally { + REGIONINFO.setOffline(false); + REGIONINFO.setSplit(false); + executor.shutdown(); + am.shutdown(); + // Clean up all znodes + ZKAssign.deleteAllNodes(this.watcher); + } + } private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException, IOException, NodeExistsException, ServiceException { @@ -460,7 +524,7 @@ public class TestAssignmentManager { // create znode in M_ZK_REGION_CLOSING state. ZKUtil.createAndWatch(this.watcher, node, data.toByteArray()); try { - processServerShutdownHandler(ct, am); + processServerShutdownHandler(ct, am, false); // check znode deleted or not. // In both cases the znode should be deleted. assertTrue("The znode should be deleted.", ZKUtil.checkExists(this.watcher, node) == -1); @@ -480,7 +544,7 @@ public class TestAssignmentManager { } } - private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am) + private void processServerShutdownHandler(CatalogTracker ct, AssignmentManager am, boolean splitRegion) throws IOException, ServiceException { // Make sure our new AM gets callbacks; once registered, can't unregister. // Thats ok because we make a new zk watcher for each test. @@ -490,7 +554,14 @@ public class TestAssignmentManager { // Make an RS Interface implementation. Make it so a scanner can go against it. ClientProtocol implementation = Mockito.mock(ClientProtocol.class); // Get a meta row result that has region up on SERVERNAME_A - Result r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + + Result r = null; + if (splitRegion) { + r = Mocking.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A); + } else { + r = Mocking.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + } + ScanResponse.Builder builder = ScanResponse.newBuilder(); builder.setMoreResults(true); builder.addResult(ProtobufUtil.toResult(r)); @@ -830,6 +901,7 @@ public class TestAssignmentManager { // Ditto for ct private final CatalogTracker ct; boolean processRITInvoked = false; + boolean assignInvoked = false; AtomicBoolean gate = new AtomicBoolean(true); public AssignmentManagerWithExtrasForTesting(final Server master, @@ -865,6 +937,17 @@ public class TestAssignmentManager { super.assign(region, setOfflineInZK, forceNewPlan, hijack); this.gate.set(true); } + + @Override + public ServerName getRegionServerOfRegion(HRegionInfo hri) { + return SERVERNAME_A; + } + + @Override + public void assign(java.util.List regions, java.util.List servers) + { + assignInvoked = true; + }; /** reset the watcher */ void setWatcher(ZooKeeperWatcher watcher) { this.watcher = watcher;