Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 96027C507 for ; Mon, 30 Apr 2012 06:34:16 +0000 (UTC) Received: (qmail 58269 invoked by uid 500); 30 Apr 2012 06:34:16 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 57996 invoked by uid 500); 30 Apr 2012 06:34:11 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 57969 invoked by uid 99); 30 Apr 2012 06:34:10 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 Apr 2012 06:34:10 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 30 Apr 2012 06:34:06 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 378FE23889BB for ; Mon, 30 Apr 2012 06:33:45 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1332069 - in /hbase/branches/0.90: CHANGES.txt src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Date: Mon, 30 Apr 2012 06:33:45 -0000 To: commits@hbase.apache.org From: jmhsieh@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120430063345.378FE23889BB@eris.apache.org> Author: jmhsieh Date: Mon Apr 30 06:33:44 2012 New Revision: 1332069 URL: http://svn.apache.org/viewvc?rev=1332069&view=rev Log: HBASE-5712 Parallelize load of .regioninfo files in diagnostic/repair portion of hbck Modified: hbase/branches/0.90/CHANGES.txt hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Modified: hbase/branches/0.90/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1332069&r1=1332068&r2=1332069&view=diff ============================================================================== --- hbase/branches/0.90/CHANGES.txt (original) +++ hbase/branches/0.90/CHANGES.txt Mon Apr 30 06:33:44 2012 @@ -30,6 +30,7 @@ Release 0.90.7 - Unreleased HBASE-5589 Add of the offline call to the Master Interface HBASE-5734 Change hbck sideline root (Jimmy Xiang) HBASE-5801 [hbck] Hbck should handle case where some regions have different HTD settings in .regioninfo files (0.90 specific) (Jimmy Xiang) + HBASE-5712 Parallelize load of .regioninfo files in diagnostic/repair portion of hbck NEW FEATURE HBASE-5128 [uber hbck] Online automated repair of table integrity and region consistency problems Modified: hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1332069&r1=1332068&r2=1332069&view=diff ============================================================================== --- hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original) +++ hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Mon Apr 30 06:33:44 2012 @@ -28,9 +28,11 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -194,12 +196,12 @@ public class HBaseFsck { * detect table consistency problems (holes, dupes, overlaps). It is sorted * to prevent dupes. */ - private TreeMap tablesInfo = new TreeMap(); + private SortedMap tablesInfo = new ConcurrentSkipListMap(); /** * When initially looking at HDFS, we attempt to find any orphaned data. */ - private List orphanHdfsDirs = new ArrayList(); + private List orphanHdfsDirs = Collections.synchronizedList(new ArrayList()); /** * Constructor @@ -396,6 +398,11 @@ public class HBaseFsck { private void adoptHdfsOrphan(FileSystem fs, HbckInfo hi) throws IOException { Path p = hi.getHdfsRegionDir(); FileStatus[] dirs = fs.listStatus(p); + if (dirs == null) { + LOG.warn("Attempt to adopt ophan hdfs region skipped becuase no files present in " + + p + ". This dir could probably be deleted."); + return ; + } String tableName = Bytes.toString(hi.getTableName()); TableInfo tableInfo = tablesInfo.get(tableName); @@ -566,6 +573,12 @@ public class HBaseFsck { LOG.warn("No HDFS region dir found: " + hbi + " meta=" + hbi.metaEntry); return; } + + if (hbi.hdfsEntry.hri != null) { + // already loaded data + return; + } + Path regioninfo = new Path(regionDir, HRegion.REGIONINFO_FILE); FileSystem fs = FileSystem.get(conf); @@ -592,27 +605,37 @@ public class HBaseFsck { /** * Populate hbi's from regionInfos loaded from file system. */ - private TreeMap loadHdfsRegionInfos() throws IOException { + private SortedMap loadHdfsRegionInfos() throws IOException, InterruptedException { tablesInfo.clear(); // regenerating the data // generate region split structure - for (HbckInfo hbi : regionInfoMap.values()) { + Collection hbckInfos = regionInfoMap.values(); - // only load entries that haven't been loaded yet. - if (hbi.getHdfsHRI() == null) { - try { - loadHdfsRegioninfo(hbi); - } catch (IOException ioe) { - String msg = "Orphan region in HDFS: Unable to load .regioninfo from table " - + Bytes.toString(hbi.getTableName()) + " in hdfs dir " - + hbi.getHdfsRegionDir() - + "! It may be an invalid format or version file. Treating as " - + "an orphaned regiondir."; - errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg); - debugLsr(hbi.getHdfsRegionDir()); - orphanHdfsDirs.add(hbi); - continue; + // Parallelized read of .regioninfo files. + WorkItemHdfsRegionInfo[] hbis = new WorkItemHdfsRegionInfo[hbckInfos.size()]; + int num = 0; + for (HbckInfo hbi : hbckInfos) { + hbis[num] = new WorkItemHdfsRegionInfo(hbi, this, errors); + executor.execute(hbis[num]); + num++; + } + + for (int i=0; i < num; i++) { + WorkItemHdfsRegionInfo hbi = hbis[i]; + synchronized(hbi) { + while (!hbi.isDone()) { + hbi.wait(); } } + } + + // serialized table info gathering. + for (HbckInfo hbi: hbckInfos) { + + if (hbi.getHdfsHRI() == null) { + // was an orphan + continue; + } + // get table name from hdfs, populate various HBaseFsck tables. String tableName = Bytes.toString(hbi.getTableName()); @@ -664,7 +687,7 @@ public class HBaseFsck { * * @return An array list of puts to do in bulk, null if tables have problems */ - private ArrayList generatePuts(TreeMap tablesInfo) throws IOException { + private ArrayList generatePuts(SortedMap tablesInfo) throws IOException { ArrayList puts = new ArrayList(); boolean hasProblems = false; for (Entry e : tablesInfo.entrySet()) { @@ -704,7 +727,7 @@ public class HBaseFsck { /** * Suggest fixes for each table */ - private void suggestFixes(TreeMap tablesInfo) throws IOException { + private void suggestFixes(SortedMap tablesInfo) throws IOException { for (TableInfo tInfo : tablesInfo.values()) { TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); tInfo.checkRegionChain(handler); @@ -775,7 +798,7 @@ public class HBaseFsck { return true; } - private TreeMap checkHdfsIntegrity(boolean fixHoles, + private SortedMap checkHdfsIntegrity(boolean fixHoles, boolean fixOverlaps) throws IOException { LOG.info("Checking HBase region split map from HDFS data..."); for (TableInfo tInfo : tablesInfo.values()) { @@ -1402,7 +1425,7 @@ public class HBaseFsck { * Collects all the pieces for each table and checks if there are missing, * repeated or overlapping ones. */ - TreeMap checkIntegrity() throws IOException { + SortedMap checkIntegrity() throws IOException { tablesInfo = new TreeMap (); List noHDFSRegionInfos = new ArrayList(); LOG.debug("There are " + regionInfoMap.size() + " region info entries"); @@ -2463,7 +2486,7 @@ public class HBaseFsck { /** * Prints summary of all tables found on the system. */ - private void printTableSummary(TreeMap tablesInfo) { + private void printTableSummary(SortedMap tablesInfo) { System.out.println("Summary:"); if (isMultiTableDescFound()) { System.out.println(" Multiple table descriptors were found.\n" @@ -2769,6 +2792,58 @@ public class HBaseFsck { } /** + * Contact hdfs and get all information about specified table directory into + * regioninfo list. + */ + static class WorkItemHdfsRegionInfo implements Runnable { + private HbckInfo hbi; + private HBaseFsck hbck; + private ErrorReporter errors; + private boolean done; + + WorkItemHdfsRegionInfo(HbckInfo hbi, HBaseFsck hbck, ErrorReporter errors) { + this.hbi = hbi; + this.hbck = hbck; + this.errors = errors; + this.done = false; + } + + synchronized boolean isDone() { + return done; + } + + @Override + public synchronized void run() { + try { + // only load entries that haven't been loaded yet. + if (hbi.getHdfsHRI() == null) { + try { + hbck.loadHdfsRegioninfo(hbi); + } catch (IOException ioe) { + String msg = "Orphan region in HDFS: Unable to load .regioninfo from table " + + Bytes.toString(hbi.getTableName()) + " in hdfs dir " + + hbi.getHdfsRegionDir() + + "! It may be an invalid format or version file. Treating as " + + "an orphaned regiondir."; + errors.reportError(ERROR_CODE.ORPHAN_HDFS_REGION, msg); + try { + hbck.debugLsr(hbi.getHdfsRegionDir()); + } catch (IOException ioe2) { + LOG.error("Unable to read directory " + hbi.getHdfsRegionDir(), ioe2); + return; // TODO convert this in to a future + } + hbck.orphanHdfsDirs.add(hbi); + return; + } + } + } finally { + done = true; + notifyAll(); + } + } + }; + + /** * Display the full report from fsck. This displays all live and dead region * servers, and all known regions. */ Modified: hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java URL: http://svn.apache.org/viewvc/hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1332069&r1=1332068&r2=1332069&view=diff ============================================================================== --- hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original) +++ hbase/branches/0.90/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Mon Apr 30 06:33:44 2012 @@ -817,10 +817,10 @@ public class TestHBaseFsck { * the region is not deployed when the table is disabled. */ @Test - public void testRegionShouldNotDeployed() throws Exception { - String table = "tableRegionShouldNotDeployed"; + public void testRegionShouldNotBeDeployed() throws Exception { + String table = "tableRegionShouldNotBeDeployed"; try { - LOG.info("Starting testRegionShouldNotDeployed."); + LOG.info("Starting testRegionShouldNotBeDeployed."); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); assertTrue(cluster.waitForActiveAndReadyMaster());