Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 2B707200B3B for ; Mon, 11 Jul 2016 21:44:24 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 29E2C160A78; Mon, 11 Jul 2016 19:44:24 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id CA9B8160A5E for ; Mon, 11 Jul 2016 21:44:22 +0200 (CEST) Received: (qmail 86157 invoked by uid 500); 11 Jul 2016 19:44:22 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 86148 invoked by uid 99); 11 Jul 2016 19:44:21 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 11 Jul 2016 19:44:21 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id D146FDFFF8; Mon, 11 Jul 2016 19:44:21 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: appy@apache.org To: commits@hbase.apache.org Message-Id: <8989ff2e60ba43b49118956ab6d5eae2@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hbase git commit: HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum. Date: Mon, 11 Jul 2016 19:44:21 +0000 (UTC) archived-at: Mon, 11 Jul 2016 19:44:24 -0000 Repository: hbase Updated Branches: refs/heads/branch-1.1 25c7dee21 -> 79b77e354 HBASE-11625 - Verifies data before building HFileBlock. - Adds HFileBlock.Header class which contains information about location of fields. Testing: Adds CorruptedFSReaderImpl to TestChecksum. Change-Id: I107e9636b28abb6b15ec329e885f1e31b1b1b988 Signed-off-by: stack Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/79b77e35 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/79b77e35 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/79b77e35 Branch: refs/heads/branch-1.1 Commit: 79b77e3542a93d362c5565c98ce8d8e1f0044337 Parents: 25c7dee Author: Apekshit Authored: Thu May 5 17:05:17 2016 -0700 Committer: Apekshit Sharma Committed: Mon Jul 11 11:39:48 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hbase/io/hfile/ChecksumUtil.java | 80 ++++---- .../hadoop/hbase/io/hfile/HFileBlock.java | 187 ++++++++++--------- .../hadoop/hbase/io/hfile/TestChecksum.java | 60 ++++-- 3 files changed, 190 insertions(+), 137 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/79b77e35/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index 0e03a42..ecf5f00 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -22,11 +22,13 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.zip.Checksum; +import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; +import org.apache.hadoop.util.DataChecksum; /** * Utility methods to compute and validate checksums. @@ -90,41 +92,37 @@ public class ChecksumUtil { } /** - * Validates that the data in the specified HFileBlock matches the - * checksum. Generates the checksum for the data and - * then validate that it matches the value stored in the header. - * If there is a checksum mismatch, then return false. Otherwise - * return true. - * The header is extracted from the specified HFileBlock while the - * data-to-be-verified is extracted from 'data'. + * Validates that the data in the specified HFileBlock matches the checksum. Generates the + * checksums for the data and then validate that it matches those stored in the end of the data. + * @param buffer Contains the data in following order: HFileBlock header, data, checksums. + * @param path Path of the HFile to which the {@code data} belongs. Only used for logging. + * @param offset offset of the data being validated. Only used for logging. + * @param hdrSize Size of the block header in {@code data}. Only used for logging. + * @return True if checksum matches, else false. */ - static boolean validateBlockChecksum(Path path, HFileBlock block, - byte[] data, int hdrSize) throws IOException { - - // If this is an older version of the block that does not have - // checksums, then return false indicating that checksum verification - // did not succeed. Actually, this methiod should never be called - // when the minorVersion is 0, thus this is a defensive check for a - // cannot-happen case. Since this is a cannot-happen case, it is - // better to return false to indicate a checksum validation failure. - if (!block.getHFileContext().isUseHBaseChecksum()) { - return false; - } - - // Get a checksum object based on the type of checksum that is - // set in the HFileBlock header. A ChecksumType.NULL indicates that - // the caller is not interested in validating checksums, so we - // always return true. - ChecksumType cktype = ChecksumType.codeToType(block.getChecksumType()); + static boolean validateChecksum(ByteBuffer buffer, Path path, long offset, int hdrSize) + throws IOException { + // A ChecksumType.NULL indicates that the caller is not interested in validating checksums, + // so we always return true. + ChecksumType cktype = + ChecksumType.codeToType(buffer.get(HFileBlock.Header.CHECKSUM_TYPE_INDEX)); if (cktype == ChecksumType.NULL) { return true; // No checkums validations needed for this block. } - Checksum checksumObject = cktype.getChecksumObject(); - checksumObject.reset(); - // read in the stored value of the checksum size from the header. - int bytesPerChecksum = block.getBytesPerChecksum(); - + int bytesPerChecksum = buffer.getInt(HFileBlock.Header.BYTES_PER_CHECKSUM_INDEX); + int onDiskDataSizeWithHeader = + buffer.getInt(HFileBlock.Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); + + if (HFile.LOG.isTraceEnabled()) { + HFile.LOG.info("dataLength=" + buffer.capacity() + + ", sizeWithHeader=" + onDiskDataSizeWithHeader + + ", checksumType=" + cktype.getName() + + ", file=" + path.toString() + + ", offset=" + offset + + ", headerSize=" + hdrSize + + ", bytesPerChecksum=" + bytesPerChecksum); + } // bytesPerChecksum is always larger than the size of the header if (bytesPerChecksum < hdrSize) { String msg = "Unsupported value of bytesPerChecksum. " + @@ -133,19 +131,23 @@ public class ChecksumUtil { HFile.LOG.warn(msg); return false; // cannot happen case, unable to verify checksum } - // Extract the header and compute checksum for the header. - ByteBuffer hdr = block.getBufferWithHeader(); - if (hdr.hasArray()) { - checksumObject.update(hdr.array(), hdr.arrayOffset(), hdrSize); + byte[] data; + if (buffer.hasArray()) { + data = buffer.array(); } else { - checksumObject.update(ByteBufferUtils.toBytes(hdr, 0, hdrSize), 0, hdrSize); + data = ByteBufferUtils.toBytes(buffer, 0); } + Checksum checksumObject = cktype.getChecksumObject(); + checksumObject.reset(); + // Extract the header and compute checksum for the header. + checksumObject.update(data, 0, hdrSize); + int off = hdrSize; int consumed = hdrSize; - int bytesLeft = block.getOnDiskDataSizeWithHeader() - off; - int cksumOffset = block.getOnDiskDataSizeWithHeader(); - + int cksumOffset = onDiskDataSizeWithHeader; + int bytesLeft = cksumOffset - off; + // validate each chunk while (bytesLeft > 0) { int thisChunkSize = bytesPerChecksum - consumed; @@ -161,7 +163,7 @@ public class ChecksumUtil { checksumObject.getValue() + ", total data size " + data.length + " Checksum data range offset " + off + " len " + count + - HFileBlock.toStringHeader(block.getBufferReadOnly()); + HFileBlock.toStringHeader(buffer); HFile.LOG.warn(msg); if (generateExceptions) { throw new IOException(msg); // this is only for unit tests http://git-wip-us.apache.org/repos/asf/hbase/blob/79b77e35/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 0a95888..08bc87e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -156,6 +156,26 @@ public class HFileBlock implements Cacheable { .registerDeserializer(blockDeserializer); } + // Todo: encapsulate Header related logic in this inner class. + static class Header { + // Format of header is: + // 8 bytes - block magic + // 4 bytes int - onDiskSizeWithoutHeader + // 4 bytes int - uncompressedSizeWithoutHeader + // 8 bytes long - prevBlockOffset + // The following 3 are only present if header contains checksum information + // 1 byte - checksum type + // 4 byte int - bytes per checksum + // 4 byte int - onDiskDataSizeWithHeader + static int BLOCK_MAGIC_INDEX = 0; + static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8; + static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12; + static int PREV_BLOCK_OFFSET_INDEX = 16; + static int CHECKSUM_TYPE_INDEX = 24; + static int BYTES_PER_CHECKSUM_INDEX = 25; + static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29; + } + /** Type of block. Header field 0. */ private BlockType blockType; @@ -251,15 +271,15 @@ public class HFileBlock implements Cacheable { HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException { b.rewind(); blockType = BlockType.read(b); - onDiskSizeWithoutHeader = b.getInt(); - uncompressedSizeWithoutHeader = b.getInt(); - prevBlockOffset = b.getLong(); + onDiskSizeWithoutHeader = b.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + uncompressedSizeWithoutHeader = b.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX); + prevBlockOffset = b.getLong(Header.PREV_BLOCK_OFFSET_INDEX); HFileContextBuilder contextBuilder = new HFileContextBuilder(); contextBuilder.withHBaseCheckSum(usesHBaseChecksum); if (usesHBaseChecksum) { - contextBuilder.withChecksumType(ChecksumType.codeToType(b.get())); - contextBuilder.withBytesPerCheckSum(b.getInt()); - this.onDiskDataSizeWithHeader = b.getInt(); + contextBuilder.withChecksumType(ChecksumType.codeToType(b.get(Header.CHECKSUM_TYPE_INDEX))); + contextBuilder.withBytesPerCheckSum(b.getInt(Header.BYTES_PER_CHECKSUM_INDEX)); + this.onDiskDataSizeWithHeader = b.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); } else { contextBuilder.withChecksumType(ChecksumType.NULL); contextBuilder.withBytesPerCheckSum(0); @@ -484,23 +504,21 @@ public class HFileBlock implements Cacheable { /** * Called after reading a block with provided onDiskSizeWithHeader. */ - private void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader) - throws IOException { - if (onDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) { + private static void validateOnDiskSizeWithoutHeader(int expectedOnDiskSizeWithoutHeader, + int actualOnDiskSizeWithoutHeader, ByteBuffer buf, long offset) throws IOException { + if (actualOnDiskSizeWithoutHeader != expectedOnDiskSizeWithoutHeader) { + // We make the read-only copy here instead of when passing the parameter to this function + // to make duplicates only in failure cases, instead of every single time. + ByteBuffer bufReadOnly = buf.asReadOnlyBuffer(); String dataBegin = null; - if (buf.hasArray()) { - dataBegin = Bytes.toStringBinary(buf.array(), buf.arrayOffset(), Math.min(32, buf.limit())); - } else { - ByteBuffer bufDup = getBufferReadOnly(); - byte[] dataBeginBytes = new byte[Math.min(32, bufDup.limit() - bufDup.position())]; - bufDup.get(dataBeginBytes); - dataBegin = Bytes.toStringBinary(dataBeginBytes); - } + byte[] dataBeginBytes = new byte[Math.min(32, bufReadOnly.limit() - bufReadOnly.position())]; + bufReadOnly.get(dataBeginBytes); + dataBegin = Bytes.toStringBinary(dataBeginBytes); String blockInfoMsg = "Block offset: " + offset + ", data starts with: " + dataBegin; throw new IOException("On-disk size without header provided is " + expectedOnDiskSizeWithoutHeader + ", but block " - + "header contains " + onDiskSizeWithoutHeader + ". " + + + "header contains " + actualOnDiskSizeWithoutHeader + ". " + blockInfoMsg); } } @@ -595,13 +613,23 @@ public class HFileBlock implements Cacheable { } /** An additional sanity-check in case no compression or encryption is being used. */ - public void assumeUncompressed() throws IOException { - if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + - totalChecksumBytes()) { + public static void verifyUncompressed(ByteBuffer buf, boolean useHBaseChecksum) + throws IOException { + int onDiskSizeWithoutHeader = buf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + int uncompressedSizeWithoutHeader = buf.getInt(Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX); + int onDiskDataSizeWithHeader; + int checksumBytes = 0; + if (useHBaseChecksum) { + onDiskDataSizeWithHeader = buf.getInt(Header.ON_DISK_DATA_SIZE_WITH_HEADER_INDEX); + checksumBytes = (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader, + buf.getInt(Header.BYTES_PER_CHECKSUM_INDEX)); + } + + if (onDiskSizeWithoutHeader != uncompressedSizeWithoutHeader + checksumBytes) { throw new IOException("Using no compression but " + "onDiskSizeWithoutHeader=" + onDiskSizeWithoutHeader + ", " + "uncompressedSizeWithoutHeader=" + uncompressedSizeWithoutHeader - + ", numChecksumbytes=" + totalChecksumBytes()); + + ", numChecksumbytes=" + checksumBytes); } } @@ -1378,10 +1406,8 @@ public class HFileBlock implements Cacheable { * -1 if it could not be determined * @throws IOException */ - protected int readAtOffset(FSDataInputStream istream, - byte[] dest, int destOffset, int size, - boolean peekIntoNextBlock, long fileOffset, boolean pread) - throws IOException { + protected int readAtOffset(FSDataInputStream istream, byte[] dest, int destOffset, int size, + boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { if (peekIntoNextBlock && destOffset + size + hdrSize > dest.length) { // We are asked to read the next block's header as well, but there is @@ -1501,10 +1527,8 @@ public class HFileBlock implements Cacheable { boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum(); FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum); - HFileBlock blk = readBlockDataInternal(is, offset, - onDiskSizeWithHeaderL, - uncompressedSize, pread, - doVerificationThruHBaseChecksum); + HFileBlock blk = readBlockDataInternal(is, offset, (int) onDiskSizeWithHeaderL, + uncompressedSize, pread, doVerificationThruHBaseChecksum); if (blk == null) { HFile.LOG.warn("HBase checksum verification failed for file " + path + " at offset " + @@ -1530,9 +1554,8 @@ public class HFileBlock implements Cacheable { // a few more than precisely this number. is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD); doVerificationThruHBaseChecksum = false; - blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, - uncompressedSize, pread, - doVerificationThruHBaseChecksum); + blk = readBlockDataInternal(is, offset, (int) onDiskSizeWithHeaderL, uncompressedSize, + pread, doVerificationThruHBaseChecksum); if (blk != null) { HFile.LOG.warn("HDFS checksum verification suceeded for file " + path + " at offset " + @@ -1562,7 +1585,7 @@ public class HFileBlock implements Cacheable { * Reads a version 2 block. * * @param offset the offset in the stream to read at - * @param onDiskSizeWithHeaderL the on-disk size of the block, including + * @param onDiskSizeWithHeader the on-disk size of the block, including * the header, or -1 if unknown * @param uncompressedSize the uncompressed size of the the block. Always * expected to be -1. This parameter is only used in version 1. @@ -1571,13 +1594,13 @@ public class HFileBlock implements Cacheable { * If HBase checksum is switched off, then use HDFS checksum. * @return the HFileBlock or null if there is a HBase checksum mismatch */ - private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, - long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, + protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, + int onDiskSizeWithHeader, int uncompressedSize, boolean pread, boolean verifyChecksum) throws IOException { if (offset < 0) { throw new IOException("Invalid offset=" + offset + " trying to read " - + "block (onDiskSize=" + onDiskSizeWithHeaderL + + "block (onDiskSize=" + onDiskSizeWithHeader + ", uncompressedSize=" + uncompressedSize + ")"); } @@ -1586,15 +1609,14 @@ public class HFileBlock implements Cacheable { "the uncompressed size parameter"); } - if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1) - || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) { - throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL + if ((onDiskSizeWithHeader < hdrSize && onDiskSizeWithHeader != -1) + || onDiskSizeWithHeader >= Integer.MAX_VALUE) { + throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeader + ": expected to be at least " + hdrSize + " and at most " + Integer.MAX_VALUE + ", or -1 (offset=" + offset + ", uncompressedSize=" + uncompressedSize + ")"); } - int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL; // See if we can avoid reading the header. This is desirable, because // we will not incur a backward seek operation if we have already // read this block's header as part of the previous read's look-ahead. @@ -1609,7 +1631,6 @@ public class HFileBlock implements Cacheable { int nextBlockOnDiskSize = 0; byte[] onDiskBlock = null; - HFileBlock b = null; if (onDiskSizeWithHeader > 0) { // We know the total on-disk size. Read the entire block into memory, // then parse the header. This code path is used when @@ -1635,28 +1656,12 @@ public class HFileBlock implements Cacheable { } else { headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize); } - // We know the total on-disk size but not the uncompressed size. Parse the header. - try { - // TODO: FIX!!! Expensive parse just to get a length - b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum()); - } catch (IOException ex) { - // Seen in load testing. Provide comprehensive debug info. - throw new IOException("Failed to read compressed block at " - + offset - + ", onDiskSizeWithoutHeader=" - + onDiskSizeWithHeader - + ", preReadHeaderSize=" - + hdrSize - + ", header.length=" - + prefetchedHeader.header.length - + ", header bytes: " - + Bytes.toStringBinary(prefetchedHeader.header, 0, - hdrSize), ex); - } // if the caller specifies a onDiskSizeWithHeader, validate it. - int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize; - assert onDiskSizeWithoutHeader >= 0; - b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader); + int expectedOnDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize; + int actualOnDiskSizeWithoutHeader = + headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + validateOnDiskSizeWithoutHeader(expectedOnDiskSizeWithoutHeader, + actualOnDiskSizeWithoutHeader, headerBuf, offset); } else { // Check headerBuf to see if we have read this block's header as part of // reading the previous block. This is an optimization of peeking into @@ -1677,22 +1682,21 @@ public class HFileBlock implements Cacheable { readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(), hdrSize, false, offset, pread); } - // TODO: FIX!!! Expensive parse just to get a length - b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum()); - onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize]; - // headerBuf is HBB + int onDiskSizeWithoutHeader = headerBuf.getInt(Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX); + onDiskSizeWithHeader = onDiskSizeWithoutHeader + hdrSize; + onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize); nextBlockOnDiskSize = - readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader() - - hdrSize, true, offset + hdrSize, pread); - onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize; + readAtOffset(is, onDiskBlock, hdrSize, onDiskSizeWithHeader - hdrSize, true, + offset + hdrSize, pread); } + ByteBuffer onDiskBlockByteBuffer = ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader); if (!fileContext.isCompressedOrEncrypted()) { - b.assumeUncompressed(); + verifyUncompressed(headerBuf, fileContext.isUseHBaseChecksum()); } - if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) { + if (verifyChecksum && !validateChecksum(offset, onDiskBlockByteBuffer, hdrSize)) { return null; // checksum mismatch } @@ -1700,8 +1704,7 @@ public class HFileBlock implements Cacheable { // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already // contains the header of next block, so no need to set next // block's header in it. - b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader), - this.fileContext.isUseHBaseChecksum()); + HFileBlock b = new HFileBlock(onDiskBlockByteBuffer, this.fileContext.isUseHBaseChecksum()); b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize; @@ -1736,14 +1739,21 @@ public class HFileBlock implements Cacheable { } /** - * Generates the checksum for the header as well as the data and - * then validates that it matches the value stored in the header. - * If there is a checksum mismatch, then return false. Otherwise - * return true. + * Generates the checksum for the header as well as the data and then validates it. + * If the block doesn't uses checksum, returns false. + * @return True if checksum matches, else false. */ - protected boolean validateBlockChecksum(HFileBlock block, byte[] data, int hdrSize) + protected boolean validateChecksum(long offset, ByteBuffer data, int hdrSize) throws IOException { - return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize); + // If this is an older version of the block that does not have checksums, then return false + // indicating that checksum verification did not succeed. Actually, this method should never + // be called when the minorVersion is 0, thus this is a defensive check for a cannot-happen + // case. Since this is a cannot-happen case, it is better to return false to indicate a + // checksum validation failure. + if (!fileContext.isUseHBaseChecksum()) { + return false; + } + return ChecksumUtil.validateChecksum(data, path, offset, hdrSize); } @Override @@ -1846,19 +1856,23 @@ public class HFileBlock implements Cacheable { } /** - * Calcuate the number of bytes required to store all the checksums - * for this block. Each checksum value is a 4 byte integer. + * Calculate the number of bytes required to store all the checksums for this block. Each + * checksum value is a 4 byte integer ({@link HFileBlock#CHECKSUM_SIZE}). */ int totalChecksumBytes() { + return HFileBlock.totalChecksumBytes(this.fileContext, onDiskDataSizeWithHeader); + } + + private static int totalChecksumBytes(HFileContext fileContext, int onDiskDataSizeWithHeader) { // If the hfile block has minorVersion 0, then there are no checksum // data to validate. Similarly, a zero value in this.bytesPerChecksum // indicates that cached blocks do not have checksum data because // checksums were already validated when the block was read from disk. - if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) { + if (!fileContext.isUseHBaseChecksum() || fileContext.getBytesPerChecksum() == 0) { return 0; } return (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader, - this.fileContext.getBytesPerChecksum()); + fileContext.getBytesPerChecksum()); } /** @@ -1908,10 +1922,9 @@ public class HFileBlock implements Cacheable { * This is mostly helpful for debugging. This assumes that the block * has minor version > 0. */ - static String toStringHeader(ByteBuffer buf) throws IOException { + public static String toStringHeader(ByteBuffer buf) throws IOException { byte[] magicBuf = new byte[Math.min(buf.limit() - buf.position(), BlockType.MAGIC_LENGTH)]; buf.get(magicBuf); - BlockType bt = BlockType.parse(magicBuf, 0, BlockType.MAGIC_LENGTH); int compressedBlockSizeNoHeader = buf.getInt(); int uncompressedBlockSizeNoHeader = buf.getInt(); long prevBlockOffset = buf.getLong(); @@ -1919,7 +1932,7 @@ public class HFileBlock implements Cacheable { long bytesPerChecksum = buf.getInt(); long onDiskDataSizeWithHeader = buf.getInt(); return " Header dump: magic: " + Bytes.toString(magicBuf) + - " blockType " + bt + + " blockType " + magicBuf + " compressedBlockSizeNoHeader " + compressedBlockSizeNoHeader + " uncompressedBlockSizeNoHeader " + http://git-wip-us.apache.org/repos/asf/hbase/blob/79b77e35/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index 011ddbf..8596bf5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -65,6 +65,7 @@ public class TestChecksum { public void setUp() throws Exception { fs = HFileSystem.get(TEST_UTIL.getConfiguration()); hfs = (HFileSystem)fs; + ChecksumUtil.generateExceptionForChecksumFailureForTest(false); } /** @@ -113,7 +114,7 @@ public class TestChecksum { .withIncludesTags(useTags) .withHBaseCheckSum(true) .build(); - HFileBlock.FSReader hbr = new FSReaderImplTest(is, totalSize, fs, path, meta); + HFileBlock.FSReader hbr = new CorruptedFSReaderImpl(is, totalSize, fs, path, meta); HFileBlock b = hbr.readBlockData(0, -1, -1, pread); b.sanityCheck(); assertEquals(4936, b.getUncompressedSizeWithoutHeader()); @@ -155,7 +156,7 @@ public class TestChecksum { HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false); assertEquals(false, newfs.useHBaseChecksum()); is = new FSDataInputStreamWrapper(newfs, path); - hbr = new FSReaderImplTest(is, totalSize, newfs, path, meta); + hbr = new CorruptedFSReaderImpl(is, totalSize, newfs, path, meta); b = hbr.readBlockData(0, -1, -1, pread); is.close(); b.sanityCheck(); @@ -176,7 +177,7 @@ public class TestChecksum { } } - /** + /** * Test different values of bytesPerChecksum */ @Test @@ -274,20 +275,57 @@ public class TestChecksum { } /** - * A class that introduces hbase-checksum failures while - * reading data from hfiles. This should trigger the hdfs level - * checksum validations. + * This class is to test checksum behavior when data is corrupted. It mimics the following + * behavior: + * - When fs checksum is disabled, hbase may get corrupted data from hdfs. If verifyChecksum + * is true, it means hbase checksum is on and fs checksum is off, so we corrupt the data. + * - When fs checksum is enabled, hdfs will get a different copy from another node, and will + * always return correct data. So we don't corrupt the data when verifyChecksum for hbase is + * off. */ - static private class FSReaderImplTest extends HFileBlock.FSReaderImpl { - public FSReaderImplTest(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, + static private class CorruptedFSReaderImpl extends HFileBlock.FSReaderImpl { + /** + * If set to true, corrupt reads using readAtOffset(...). + */ + boolean corruptDataStream = false; + + public CorruptedFSReaderImpl(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, Path path, HFileContext meta) throws IOException { super(istream, fileSize, (HFileSystem) fs, path, meta); } @Override - protected boolean validateBlockChecksum(HFileBlock block, - byte[] data, int hdrSize) throws IOException { - return false; // checksum validation failure + protected HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, + int onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, boolean verifyChecksum) + throws IOException { + if (verifyChecksum) { + corruptDataStream = true; + } + HFileBlock b = super.readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, + uncompressedSize, pread, verifyChecksum); + corruptDataStream = false; + return b; + } + + @Override + protected int readAtOffset(FSDataInputStream istream, byte [] dest, int destOffset, int size, + boolean peekIntoNextBlock, long fileOffset, boolean pread) throws IOException { + int returnValue = super.readAtOffset(istream, dest, destOffset, size, peekIntoNextBlock, + fileOffset, pread); + if (!corruptDataStream) { + return returnValue; + } + // Corrupt 3rd character of block magic of next block's header. + if (peekIntoNextBlock) { + dest[destOffset + size + 3] = 0b00000000; + } + // We might be reading this block's header too, corrupt it. + dest[destOffset + 1] = 0b00000000; + // Corrupt non header data + if (size > hdrSize) { + dest[destOffset + hdrSize + 1] = 0b00000000; + } + return returnValue; } } }