Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A18FA9411 for ; Tue, 17 Jan 2012 17:35:09 +0000 (UTC) Received: (qmail 18998 invoked by uid 500); 17 Jan 2012 17:35:09 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 18936 invoked by uid 500); 17 Jan 2012 17:35:08 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 18929 invoked by uid 99); 17 Jan 2012 17:35:08 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Jan 2012 17:35:08 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 17 Jan 2012 17:35:05 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id D41B723888FD for ; Tue, 17 Jan 2012 17:34:43 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1232503 - in /hbase/branches/0.90: ./ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/master/handler/ src/main/java/org/apache/hadoop/hbase/regionserver/ Date: Tue, 17 Jan 2012 17:34:43 -0000 To: commits@hbase.apache.org From: tedyu@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120117173443.D41B723888FD@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tedyu Date: Tue Jan 17 17:34:43 2012 New Revision: 1232503 URL: http://svn.apache.org/viewvc?rev=1232503&view=rev Log: HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang) Modified: hbase/branches/0.90/CHANGES.txt hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java Modified: hbase/branches/0.90/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1232503&r1=1232502&r2=1232503&view=diff ============================================================================== --- hbase/branches/0.90/CHANGES.txt (original) +++ hbase/branches/0.90/CHANGES.txt Tue Jan 17 17:34:43 2012 @@ -1,7 +1,6 @@ HBase Change Log Release 0.90.7 - Unreleased BUG FIXES - HBASE-5153 Add retry logic in HConnectionImplementation#resetZooKeeperTrackers (Jieshan) Release 0.90.6 - Unreleased BUG FIXES @@ -31,6 +30,9 @@ Release 0.90.6 - Unreleased HBASE-5192 Backport HBASE-4236 Don't lock the stream while serializing the response (Ram) HBASE-5155 ServerShutDownHandler And Disable/Delete should not happen parallely leading to recreation of regions that were deleted (Ram) + HBASE-5153 Add retry logic in HConnectionImplementation#resetZooKeeperTrackers (Jieshan) + HBASE-5196 Failure in region split after PONR could cause region hole (Jimmy Xiang) + IMPROVEMENT HBASE-5102 Change the default value of the property "hbase.connection.per.config" to false in hbase-default.xml Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java?rev=1232503&r1=1232502&r2=1232503&view=diff ============================================================================== --- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (original) +++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Tue Jan 17 17:34:43 2012 @@ -25,6 +25,7 @@ import java.lang.reflect.InvocationTarge import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -67,6 +68,7 @@ import org.apache.hadoop.hbase.master.ha import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.ModifyTableHandler; +import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.TableAddFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableDeleteFamilyHandler; import org.apache.hadoop.hbase.master.handler.TableModifyFamilyHandler; @@ -396,6 +398,9 @@ implements HMasterInterface, HMasterRegi this.assignmentManager.processFailover(); } + // Fixing up missing daughters if any + fixupDaughters(); + // Start balancer and meta catalog janitor after meta and regions have // been assigned. this.balancerChore = getAndStartBalancerChore(this); @@ -460,6 +465,37 @@ implements HMasterInterface, HMasterRegi } } + void fixupDaughters() throws IOException { + final Map offlineSplitParents = + new HashMap(); + // This visitor collects offline split parents in the .META. table + MetaReader.Visitor visitor = new MetaReader.Visitor() { + @Override + public boolean visit(Result r) throws IOException { + if (r == null || r.isEmpty()) return true; + HRegionInfo info = CatalogJanitor.getHRegionInfo(r); + if (info == null) return true; // Keep scanning + if (info.isOffline() && info.isSplit()) { + offlineSplitParents.put(info, r); + } + // Returning true means "keep scanning" + return true; + } + }; + // Run full scan of .META. catalog table passing in our custom visitor + MetaReader.fullScan(this.catalogTracker, visitor); + // Now work on our list of found parents. See if any we can clean up. + int fixups = 0; + for (Map.Entry e : offlineSplitParents.entrySet()) { + fixups += ServerShutdownHandler.fixupDaughters( + e.getValue(), assignmentManager, catalogTracker); + } + if (fixups != 0) { + LOG.info("Scanned the catalog and fixed up " + fixups + + " missing daughter region(s)"); + } + } + /* * @return This masters' address. * @throws UnknownHostException Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1232503&r1=1232502&r2=1232503&view=diff ============================================================================== --- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original) +++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Tue Jan 17 17:34:43 2012 @@ -286,30 +286,33 @@ public class ServerShutdownHandler exten * Check that daughter regions are up in .META. and if not, add them. * @param hris All regions for this server in meta. * @param result The contents of the parent row in .META. + * @return the number of daughters missing and fixed * @throws IOException */ - static void fixupDaughters(final Result result, + public static int fixupDaughters(final Result result, final AssignmentManager assignmentManager, final CatalogTracker catalogTracker) throws IOException { - fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager, - catalogTracker); - fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager, - catalogTracker); + int fixedA = fixupDaughter(result, HConstants.SPLITA_QUALIFIER, + assignmentManager, catalogTracker); + int fixedB = fixupDaughter(result, HConstants.SPLITB_QUALIFIER, + assignmentManager, catalogTracker); + return fixedA + fixedB; } /** * Check individual daughter is up in .META.; fixup if its not. * @param result The contents of the parent row in .META. * @param qualifier Which daughter to check for. + * @return 1 if the daughter is missing and fixed. Otherwise 0 * @throws IOException */ - static void fixupDaughter(final Result result, final byte [] qualifier, + static int fixupDaughter(final Result result, final byte [] qualifier, final AssignmentManager assignmentManager, final CatalogTracker catalogTracker) throws IOException { HRegionInfo daughter = getHRegionInfo(result, qualifier); - if (daughter == null) return; + if (daughter == null) return 0; if (isDaughterMissing(catalogTracker, daughter)) { LOG.info("Fixup; missing daughter " + daughter.getRegionNameAsString()); MetaEditor.addDaughter(catalogTracker, daughter, null); @@ -320,9 +323,11 @@ public class ServerShutdownHandler exten // And assign it. assignmentManager.assign(daughter, true); + return 1; } else { LOG.debug("Daughter " + daughter.getRegionNameAsString() + " present"); } + return 0; } /** Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java?rev=1232503&r1=1232502&r2=1232503&view=diff ============================================================================== --- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java (original) +++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java Tue Jan 17 17:34:43 2012 @@ -157,7 +157,7 @@ public class CompactSplitThread extends } catch (IOException ioe) { try { LOG.info("Running rollback/cleanup of failed split of " - + parent.getRegionNameAsString() + "; " + ioe.getMessage()); + + parent.getRegionNameAsString() + "; " + ioe.getMessage(), ioe); if (st.rollback(this.server, this.server)) { LOG.info("Successful rollback of failed split of " + parent.getRegionNameAsString());