Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 27EAEE508 for ; Mon, 26 Nov 2012 19:40:58 +0000 (UTC) Received: (qmail 31100 invoked by uid 500); 26 Nov 2012 19:40:58 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 31071 invoked by uid 500); 26 Nov 2012 19:40:58 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 31061 invoked by uid 99); 26 Nov 2012 19:40:58 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 26 Nov 2012 19:40:57 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 26 Nov 2012 19:40:54 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CDEF223889BF; Mon, 26 Nov 2012 19:40:32 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1413800 - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/util/HBaseFsck.java test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Date: Mon, 26 Nov 2012 19:40:32 -0000 To: commits@hbase.apache.org From: jxiang@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20121126194032.CDEF223889BF@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jxiang Date: Mon Nov 26 19:40:31 2012 New Revision: 1413800 URL: http://svn.apache.org/viewvc?rev=1413800&view=rev Log: HBASE-7190 Add an option to hbck to check only meta and assignment Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java?rev=1413800&r1=1413799&r2=1413800&view=diff ============================================================================== --- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java (original) +++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java Mon Nov 26 19:40:31 2012 @@ -176,6 +176,7 @@ public class HBaseFsck { private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older private boolean fixAssignments = false; // fix assignment errors? private boolean fixMeta = false; // fix meta errors? + private boolean checkHdfs = true; // load and check fs consistency? private boolean fixHdfsHoles = false; // fix fs holes? private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky) private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo) @@ -333,8 +334,8 @@ public class HBaseFsck { */ public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { // Initial pass to fix orphans. - if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() - || shouldFixHdfsOverlaps() || shouldFixTableOrphans()) { + if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles() + || shouldFixHdfsOverlaps() || shouldFixTableOrphans())) { LOG.info("Loading regioninfos HDFS"); // if nothing is happening this should always complete in two iterations. int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3); @@ -391,8 +392,10 @@ public class HBaseFsck { loadDeployedRegions(); // load regiondirs and regioninfos from HDFS - loadHdfsRegionDirs(); - loadHdfsRegionInfos(); + if (shouldCheckHdfs()) { + loadHdfsRegionDirs(); + loadHdfsRegionInfos(); + } // Empty cells in .META.? reportEmptyMetaCells(); @@ -791,7 +794,7 @@ public class HBaseFsck { List tmpList = new ArrayList(); tmpList.addAll(orphanTableDirs.keySet()); HTableDescriptor[] htds = getHTableDescriptors(tmpList); - Iterator iter = orphanTableDirs.entrySet().iterator(); + Iterator>> iter = orphanTableDirs.entrySet().iterator(); int j = 0; int numFailedCase = 0; while (iter.hasNext()) { @@ -1492,8 +1495,12 @@ public class HBaseFsck { errors.print(msg); undeployRegions(hbi); setShouldRerun(); - HBaseFsckRepair.fixUnassigned(admin, hbi.getHdfsHRI()); - HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI()); + HRegionInfo hri = hbi.getHdfsHRI(); + if (hri == null) { + hri = hbi.metaEntry; + } + HBaseFsckRepair.fixUnassigned(admin, hri); + HBaseFsckRepair.waitUntilAssigned(admin, hri); } } @@ -1505,7 +1512,8 @@ public class HBaseFsck { String descriptiveName = hbi.toString(); boolean inMeta = hbi.metaEntry != null; - boolean inHdfs = hbi.getHdfsRegionDir()!= null; + // In case not checking HDFS, assume the region is on HDFS + boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null; boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null; boolean isDeployed = !hbi.deployedOn.isEmpty(); boolean isMultiplyDeployed = hbi.deployedOn.size() > 1; @@ -1515,7 +1523,7 @@ public class HBaseFsck { boolean splitParent = (hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline(); boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry); - boolean recentlyModified = hbi.getHdfsRegionDir() != null && + boolean recentlyModified = inHdfs && hbi.getModTime() + timelag > System.currentTimeMillis(); // ========== First the healthy cases ============= @@ -3128,6 +3136,14 @@ public class HBaseFsck { return fixMeta; } + public void setCheckHdfs(boolean checking) { + checkHdfs = checking; + } + + boolean shouldCheckHdfs() { + return checkHdfs; + } + public void setFixHdfsHoles(boolean shouldFix) { fixHdfsHoles = shouldFix; } @@ -3283,6 +3299,8 @@ public class HBaseFsck { System.err.println(" -fix Try to fix region assignments. This is for backwards compatiblity"); System.err.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix"); System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); + System.err.println(" -noHdfsChecking Don't load/check region info from HDFS." + + " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap"); System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs."); System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs"); System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)"); @@ -3386,6 +3404,8 @@ public class HBaseFsck { setFixAssignments(true); } else if (cmd.equals("-fixMeta")) { setFixMeta(true); + } else if (cmd.equals("-noHdfsChecking")) { + setCheckHdfs(false); } else if (cmd.equals("-fixHdfsHoles")) { setFixHdfsHoles(true); } else if (cmd.equals("-fixHdfsOrphans")) { @@ -3417,6 +3437,7 @@ public class HBaseFsck { setFixVersionFile(true); setSidelineBigOverlaps(true); setFixSplitParents(false); + setCheckHdfs(true); } else if (cmd.equals("-repairHoles")) { // this will make all missing hdfs regions available but may lose data setFixHdfsHoles(true); @@ -3426,6 +3447,7 @@ public class HBaseFsck { setFixHdfsOverlaps(false); setSidelineBigOverlaps(false); setFixSplitParents(false); + setCheckHdfs(true); } else if (cmd.equals("-maxOverlapsToSideline")) { if (i == args.length - 1) { System.err.println("-maxOverlapsToSideline needs a numeric value argument."); @@ -3537,7 +3559,7 @@ public class HBaseFsck { * ls -r for debugging purposes */ public static void debugLsr(Configuration conf, Path p) throws IOException { - if (!LOG.isDebugEnabled()) { + if (!LOG.isDebugEnabled() || p == null) { return; } FileSystem fs = p.getFileSystem(conf); Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java?rev=1413800&r1=1413799&r2=1413800&view=diff ============================================================================== --- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java (original) +++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java Mon Nov 26 19:40:31 2012 @@ -1313,6 +1313,160 @@ public class TestHBaseFsck { } /** + * Test -noHdfsChecking option can detect and fix assignments issue. + */ + @Test + public void testFixAssignmentsAndNoHdfsChecking() throws Exception { + String table = "testFixAssignmentsAndNoHdfsChecking"; + try { + setupTable(table); + assertEquals(ROWKEYS.length, countRows()); + + // Mess it up by closing a region + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), + Bytes.toBytes("B"), true, false, false, false); + + // verify there is no other errors + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { + ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // verify that noHdfsChecking report the same errors + HBaseFsck fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setCheckHdfs(false); + fsck.onlineHbck(); + assertErrors(fsck, new ERROR_CODE[] { + ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // verify that fixAssignments works fine with noHdfsChecking + fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setCheckHdfs(false); + fsck.setFixAssignments(true); + fsck.onlineHbck(); + assertTrue(fsck.shouldRerun()); + fsck.onlineHbck(); + assertNoErrors(fsck); + + assertEquals(ROWKEYS.length, countRows()); + } finally { + deleteTable(table); + } + } + + /** + * Test -noHdfsChecking option can detect region is not in meta but deployed. + * However, it can not fix it without checking Hdfs because we need to get + * the region info from Hdfs in this case, then to patch the meta. + */ + @Test + public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception { + String table = "testFixMetaNotWorkingWithNoHdfsChecking"; + try { + setupTable(table); + assertEquals(ROWKEYS.length, countRows()); + + // Mess it up by deleting a region from the metadata + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), + Bytes.toBytes("B"), false, true, false, false); + + // verify there is no other errors + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { + ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // verify that noHdfsChecking report the same errors + HBaseFsck fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setCheckHdfs(false); + fsck.onlineHbck(); + assertErrors(fsck, new ERROR_CODE[] { + ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // verify that fixMeta doesn't work with noHdfsChecking + fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setCheckHdfs(false); + fsck.setFixAssignments(true); + fsck.setFixMeta(true); + fsck.onlineHbck(); + assertFalse(fsck.shouldRerun()); + assertErrors(fsck, new ERROR_CODE[] { + ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN}); + } finally { + deleteTable(table); + } + } + + /** + * Test -fixHdfsHoles doesn't work with -noHdfsChecking option, + * and -noHdfsChecking can't detect orphan Hdfs region. + */ + @Test + public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception { + String table = "testFixHdfsHolesNotWorkingWithNoHdfsChecking"; + try { + setupTable(table); + assertEquals(ROWKEYS.length, countRows()); + + // Mess it up by creating an overlap in the metadata + TEST_UTIL.getHBaseAdmin().disableTable(table); + deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), + Bytes.toBytes("B"), true, true, false, true); + TEST_UTIL.getHBaseAdmin().enableTable(table); + + HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(), + Bytes.toBytes("A2"), Bytes.toBytes("B")); + TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap); + TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() + .waitForAssignment(hriOverlap); + + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { + ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION + HBaseFsck fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setCheckHdfs(false); + fsck.onlineHbck(); + assertErrors(fsck, new ERROR_CODE[] { + ERROR_CODE.HOLE_IN_REGION_CHAIN}); + + // verify that fixHdfsHoles doesn't work with noHdfsChecking + fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setCheckHdfs(false); + fsck.setFixHdfsHoles(true); + fsck.setFixHdfsOverlaps(true); + fsck.setFixHdfsOrphans(true); + fsck.onlineHbck(); + assertFalse(fsck.shouldRerun()); + assertErrors(fsck, new ERROR_CODE[] { + ERROR_CODE.HOLE_IN_REGION_CHAIN}); + } finally { + if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) { + TEST_UTIL.getHBaseAdmin().enableTable(table); + } + deleteTable(table); + } + } + + /** * We don't have an easy way to verify that a flush completed, so we loop until we find a * legitimate hfile and return it. * @param fs