Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 7EA7E1852C for ; Thu, 4 Feb 2016 17:39:03 +0000 (UTC) Received: (qmail 95089 invoked by uid 500); 4 Feb 2016 17:31:22 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 94993 invoked by uid 500); 4 Feb 2016 17:31:22 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 94262 invoked by uid 99); 4 Feb 2016 17:31:22 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 04 Feb 2016 17:31:22 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id F3855E3927; Thu, 4 Feb 2016 17:31:21 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: misty@apache.org To: commits@hbase.apache.org Date: Thu, 04 Feb 2016 17:31:33 -0000 Message-Id: <9920fc727d1247f0a07942480a7426fc@git.apache.org> In-Reply-To: <8101ec9177374b809db23308cabb10df@git.apache.org> References: <8101ec9177374b809db23308cabb10df@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [13/23] hbase-site git commit: Published site at 2cf8af5bf1d501156cbb3b421cf75c1051ead7d9. http://git-wip-us.apache.org/repos/asf/hbase-site/blob/32672884/devapidocs/src-html/org/apache/hadoop/hbase/io/hfile/HFileBlock.PrefetchedHeader.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/io/hfile/HFileBlock.PrefetchedHeader.html b/devapidocs/src-html/org/apache/hadoop/hbase/io/hfile/HFileBlock.PrefetchedHeader.html index 745e703..1587e5e 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/io/hfile/HFileBlock.PrefetchedHeader.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/io/hfile/HFileBlock.PrefetchedHeader.html @@ -1361,609 +1361,610 @@ 1353 /** The filesystem used to access data */ 1354 protected HFileSystem hfs; 1355 -1356 /** The path (if any) where this data is coming from */ -1357 protected Path path; -1358 -1359 private final Lock streamLock = new ReentrantLock(); +1356 private final Lock streamLock = new ReentrantLock(); +1357 +1358 /** The default buffer size for our buffered streams */ +1359 public static final int DEFAULT_BUFFER_SIZE = 1 << 20; 1360 -1361 /** The default buffer size for our buffered streams */ -1362 public static final int DEFAULT_BUFFER_SIZE = 1 << 20; -1363 -1364 protected HFileContext fileContext; -1365 -1366 public FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path, -1367 HFileContext fileContext) throws IOException { -1368 this.fileSize = fileSize; -1369 this.hfs = hfs; -1370 this.path = path; -1371 this.fileContext = fileContext; -1372 this.hdrSize = headerSize(fileContext.isUseHBaseChecksum()); -1373 -1374 this.streamWrapper = stream; -1375 // Older versions of HBase didn't support checksum. -1376 this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum()); -1377 defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext); -1378 encodedBlockDecodingCtx = defaultDecodingCtx; -1379 } -1380 -1381 /** -1382 * A constructor that reads files with the latest minor version. -1383 * This is used by unit tests only. -1384 */ -1385 FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext) -1386 throws IOException { -1387 this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext); -1388 } -1389 -1390 public BlockIterator blockRange(final long startOffset, final long endOffset) { -1391 final FSReader owner = this; // handle for inner class -1392 return new BlockIterator() { -1393 private long offset = startOffset; -1394 -1395 @Override -1396 public HFileBlock nextBlock() throws IOException { -1397 if (offset >= endOffset) -1398 return null; -1399 HFileBlock b = readBlockData(offset, -1, -1, false); -1400 offset += b.getOnDiskSizeWithHeader(); -1401 return b.unpack(fileContext, owner); -1402 } -1403 -1404 @Override -1405 public HFileBlock nextBlockWithBlockType(BlockType blockType) -1406 throws IOException { -1407 HFileBlock blk = nextBlock(); -1408 if (blk.getBlockType() != blockType) { -1409 throw new IOException("Expected block of type " + blockType -1410 + " but found " + blk.getBlockType()); -1411 } -1412 return blk; -1413 } -1414 }; -1415 } -1416 -1417 /** -1418 * Does a positional read or a seek and read into the given buffer. Returns -1419 * the on-disk size of the next block, or -1 if it could not be determined. -1420 * -1421 * @param dest destination buffer -1422 * @param destOffset offset in the destination buffer -1423 * @param size size of the block to be read -1424 * @param peekIntoNextBlock whether to read the next block's on-disk size -1425 * @param fileOffset position in the stream to read at -1426 * @param pread whether we should do a positional read -1427 * @param istream The input source of data -1428 * @return the on-disk size of the next block with header size included, or -1429 * -1 if it could not be determined -1430 * @throws IOException -1431 */ -1432 protected int readAtOffset(FSDataInputStream istream, -1433 byte[] dest, int destOffset, int size, -1434 boolean peekIntoNextBlock, long fileOffset, boolean pread) -1435 throws IOException { -1436 if (peekIntoNextBlock && -1437 destOffset + size + hdrSize > dest.length) { -1438 // We are asked to read the next block's header as well, but there is -1439 // not enough room in the array. -1440 throw new IOException("Attempted to read " + size + " bytes and " + -1441 hdrSize + " bytes of next header into a " + dest.length + -1442 "-byte array at offset " + destOffset); -1443 } -1444 -1445 if (!pread && streamLock.tryLock()) { -1446 // Seek + read. Better for scanning. -1447 try { -1448 istream.seek(fileOffset); -1449 -1450 long realOffset = istream.getPos(); -1451 if (realOffset != fileOffset) { -1452 throw new IOException("Tried to seek to " + fileOffset + " to " -1453 + "read " + size + " bytes, but pos=" + realOffset -1454 + " after seek"); -1455 } -1456 -1457 if (!peekIntoNextBlock) { -1458 IOUtils.readFully(istream, dest, destOffset, size); -1459 return -1; -1460 } -1461 -1462 // Try to read the next block header. -1463 if (!readWithExtra(istream, dest, destOffset, size, hdrSize)) -1464 return -1; -1465 } finally { -1466 streamLock.unlock(); -1467 } -1468 } else { -1469 // Positional read. Better for random reads; or when the streamLock is already locked. -1470 int extraSize = peekIntoNextBlock ? hdrSize : 0; -1471 if (!positionalReadWithExtra(istream, fileOffset, dest, destOffset, -1472 size, extraSize)) { -1473 return -1; -1474 } -1475 } -1476 -1477 assert peekIntoNextBlock; -1478 return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize; -1479 } -1480 -1481 /** -1482 * Reads a version 2 block (version 1 blocks not supported and not expected). Tries to do as -1483 * little memory allocation as possible, using the provided on-disk size. -1484 * -1485 * @param offset the offset in the stream to read at -1486 * @param onDiskSizeWithHeaderL the on-disk size of the block, including -1487 * the header, or -1 if unknown -1488 * @param uncompressedSize the uncompressed size of the the block. Always -1489 * expected to be -1. This parameter is only used in version 1. -1490 * @param pread whether to use a positional read -1491 */ -1492 @Override -1493 public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL, -1494 int uncompressedSize, boolean pread) -1495 throws IOException { -1496 -1497 // get a copy of the current state of whether to validate -1498 // hbase checksums or not for this read call. This is not -1499 // thread-safe but the one constaint is that if we decide -1500 // to skip hbase checksum verification then we are -1501 // guaranteed to use hdfs checksum verification. -1502 boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum(); -1503 FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum); -1504 -1505 HFileBlock blk = readBlockDataInternal(is, offset, -1506 onDiskSizeWithHeaderL, -1507 uncompressedSize, pread, -1508 doVerificationThruHBaseChecksum); -1509 if (blk == null) { -1510 HFile.LOG.warn("HBase checksum verification failed for file " + -1511 path + " at offset " + -1512 offset + " filesize " + fileSize + -1513 ". Retrying read with HDFS checksums turned on..."); -1514 -1515 if (!doVerificationThruHBaseChecksum) { -1516 String msg = "HBase checksum verification failed for file " + -1517 path + " at offset " + -1518 offset + " filesize " + fileSize + -1519 " but this cannot happen because doVerify is " + -1520 doVerificationThruHBaseChecksum; -1521 HFile.LOG.warn(msg); -1522 throw new IOException(msg); // cannot happen case here -1523 } -1524 HFile.checksumFailures.incrementAndGet(); // update metrics -1525 -1526 // If we have a checksum failure, we fall back into a mode where -1527 // the next few reads use HDFS level checksums. We aim to make the -1528 // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid -1529 // hbase checksum verification, but since this value is set without -1530 // holding any locks, it can so happen that we might actually do -1531 // a few more than precisely this number. -1532 is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD); -1533 doVerificationThruHBaseChecksum = false; -1534 blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, -1535 uncompressedSize, pread, -1536 doVerificationThruHBaseChecksum); -1537 if (blk != null) { -1538 HFile.LOG.warn("HDFS checksum verification suceeded for file " + -1539 path + " at offset " + -1540 offset + " filesize " + fileSize); -1541 } -1542 } -1543 if (blk == null && !doVerificationThruHBaseChecksum) { -1544 String msg = "readBlockData failed, possibly due to " + -1545 "checksum verification failed for file " + path + -1546 " at offset " + offset + " filesize " + fileSize; -1547 HFile.LOG.warn(msg); -1548 throw new IOException(msg); -1549 } -1550 -1551 // If there is a checksum mismatch earlier, then retry with -1552 // HBase checksums switched off and use HDFS checksum verification. -1553 // This triggers HDFS to detect and fix corrupt replicas. The -1554 // next checksumOffCount read requests will use HDFS checksums. -1555 // The decrementing of this.checksumOffCount is not thread-safe, -1556 // but it is harmless because eventually checksumOffCount will be -1557 // a negative number. -1558 streamWrapper.checksumOk(); -1559 return blk; -1560 } -1561 -1562 /** -1563 * Reads a version 2 block. -1564 * -1565 * @param offset the offset in the stream to read at -1566 * @param onDiskSizeWithHeaderL the on-disk size of the block, including -1567 * the header, or -1 if unknown -1568 * @param uncompressedSize the uncompressed size of the the block. Always -1569 * expected to be -1. This parameter is only used in version 1. -1570 * @param pread whether to use a positional read -1571 * @param verifyChecksum Whether to use HBase checksums. -1572 * If HBase checksum is switched off, then use HDFS checksum. -1573 * @return the HFileBlock or null if there is a HBase checksum mismatch -1574 */ -1575 private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, -1576 long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, -1577 boolean verifyChecksum) -1578 throws IOException { -1579 if (offset < 0) { -1580 throw new IOException("Invalid offset=" + offset + " trying to read " -1581 + "block (onDiskSize=" + onDiskSizeWithHeaderL -1582 + ", uncompressedSize=" + uncompressedSize + ")"); -1583 } -1584 -1585 if (uncompressedSize != -1) { -1586 throw new IOException("Version 2 block reader API does not need " + -1587 "the uncompressed size parameter"); -1588 } -1589 -1590 if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1) -1591 || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) { -1592 throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL -1593 + ": expected to be at least " + hdrSize -1594 + " and at most " + Integer.MAX_VALUE + ", or -1 (offset=" -1595 + offset + ", uncompressedSize=" + uncompressedSize + ")"); -1596 } -1597 -1598 int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL; -1599 // See if we can avoid reading the header. This is desirable, because -1600 // we will not incur a backward seek operation if we have already -1601 // read this block's header as part of the previous read's look-ahead. -1602 // And we also want to skip reading the header again if it has already -1603 // been read. -1604 // TODO: How often does this optimization fire? Has to be same thread so the thread local -1605 // is pertinent and we have to be reading next block as in a big scan. -1606 PrefetchedHeader prefetchedHeader = prefetchedHeaderForThread.get(); -1607 ByteBuffer headerBuf = prefetchedHeader.offset == offset? prefetchedHeader.buf: null; -1608 -1609 // Allocate enough space to fit the next block's header too. -1610 int nextBlockOnDiskSize = 0; -1611 byte[] onDiskBlock = null; -1612 -1613 HFileBlock b = null; -1614 if (onDiskSizeWithHeader > 0) { -1615 // We know the total on-disk size. Read the entire block into memory, -1616 // then parse the header. This code path is used when -1617 // doing a random read operation relying on the block index, as well as -1618 // when the client knows the on-disk size from peeking into the next -1619 // block's header (e.g. this block's header) when reading the previous -1620 // block. This is the faster and more preferable case. -1621 -1622 // Size that we have to skip in case we have already read the header. -1623 int preReadHeaderSize = headerBuf == null ? 0 : hdrSize; -1624 onDiskBlock = new byte[onDiskSizeWithHeader + hdrSize]; // room for this block plus the -1625 // next block's header -1626 nextBlockOnDiskSize = readAtOffset(is, onDiskBlock, -1627 preReadHeaderSize, onDiskSizeWithHeader - preReadHeaderSize, -1628 true, offset + preReadHeaderSize, pread); -1629 if (headerBuf != null) { -1630 // the header has been read when reading the previous block, copy -1631 // to this block's header -1632 // headerBuf is HBB -1633 assert headerBuf.hasArray(); -1634 System.arraycopy(headerBuf.array(), -1635 headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize); -1636 } else { -1637 headerBuf = ByteBuffer.wrap(onDiskBlock, 0, hdrSize); -1638 } -1639 // We know the total on-disk size but not the uncompressed size. Parse the header. -1640 try { -1641 // TODO: FIX!!! Expensive parse just to get a length -1642 b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum()); -1643 } catch (IOException ex) { -1644 // Seen in load testing. Provide comprehensive debug info. -1645 throw new IOException("Failed to read compressed block at " -1646 + offset -1647 + ", onDiskSizeWithoutHeader=" -1648 + onDiskSizeWithHeader -1649 + ", preReadHeaderSize=" -1650 + hdrSize -1651 + ", header.length=" -1652 + prefetchedHeader.header.length -1653 + ", header bytes: " -1654 + Bytes.toStringBinary(prefetchedHeader.header, 0, -1655 hdrSize), ex); -1656 } -1657 // if the caller specifies a onDiskSizeWithHeader, validate it. -1658 int onDiskSizeWithoutHeader = onDiskSizeWithHeader - hdrSize; -1659 assert onDiskSizeWithoutHeader >= 0; -1660 b.validateOnDiskSizeWithoutHeader(onDiskSizeWithoutHeader); -1661 } else { -1662 // Check headerBuf to see if we have read this block's header as part of -1663 // reading the previous block. This is an optimization of peeking into -1664 // the next block's header (e.g.this block's header) when reading the -1665 // previous block. This is the faster and more preferable case. If the -1666 // header is already there, don't read the header again. -1667 -1668 // Unfortunately, we still have to do a separate read operation to -1669 // read the header. -1670 if (headerBuf == null) { -1671 // From the header, determine the on-disk size of the given hfile -1672 // block, and read the remaining data, thereby incurring two read -1673 // operations. This might happen when we are doing the first read -1674 // in a series of reads or a random read, and we don't have access -1675 // to the block index. This is costly and should happen very rarely. -1676 headerBuf = ByteBuffer.allocate(hdrSize); -1677 // headerBuf is HBB -1678 readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(), -1679 hdrSize, false, offset, pread); -1680 } -1681 // TODO: FIX!!! Expensive parse just to get a length -1682 b = new HFileBlock(headerBuf, fileContext.isUseHBaseChecksum()); -1683 onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize]; -1684 // headerBuf is HBB -1685 System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize); -1686 nextBlockOnDiskSize = -1687 readAtOffset(is, onDiskBlock, hdrSize, b.getOnDiskSizeWithHeader() -1688 - hdrSize, true, offset + hdrSize, pread); -1689 onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize; -1690 } -1691 -1692 if (!fileContext.isCompressedOrEncrypted()) { -1693 b.assumeUncompressed(); -1694 } -1695 -1696 if (verifyChecksum && !validateBlockChecksum(b, onDiskBlock, hdrSize)) { -1697 return null; // checksum mismatch -1698 } -1699 -1700 // The onDiskBlock will become the headerAndDataBuffer for this block. -1701 // If nextBlockOnDiskSizeWithHeader is not zero, the onDiskBlock already -1702 // contains the header of next block, so no need to set next -1703 // block's header in it. -1704 b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, onDiskSizeWithHeader), -1705 this.fileContext.isUseHBaseChecksum()); -1706 -1707 b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize; -1708 -1709 // Set prefetched header -1710 if (b.hasNextBlockHeader()) { -1711 prefetchedHeader.offset = offset + b.getOnDiskSizeWithHeader(); -1712 System.arraycopy(onDiskBlock, onDiskSizeWithHeader, prefetchedHeader.header, 0, hdrSize); -1713 } -1714 -1715 b.offset = offset; -1716 b.fileContext.setIncludesTags(this.fileContext.isIncludesTags()); -1717 b.fileContext.setIncludesMvcc(this.fileContext.isIncludesMvcc()); -1718 return b; -1719 } -1720 -1721 public void setIncludesMemstoreTS(boolean includesMemstoreTS) { -1722 this.fileContext.setIncludesMvcc(includesMemstoreTS); -1723 } -1724 -1725 public void setDataBlockEncoder(HFileDataBlockEncoder encoder) { -1726 encodedBlockDecodingCtx = encoder.newDataBlockDecodingContext(this.fileContext); -1727 } -1728 -1729 @Override -1730 public HFileBlockDecodingContext getBlockDecodingContext() { -1731 return this.encodedBlockDecodingCtx; -1732 } -1733 -1734 @Override -1735 public HFileBlockDecodingContext getDefaultBlockDecodingContext() { -1736 return this.defaultDecodingCtx; -1737 } -1738 -1739 /** -1740 * Generates the checksum for the header as well as the data and -1741 * then validates that it matches the value stored in the header. -1742 * If there is a checksum mismatch, then return false. Otherwise -1743 * return true. -1744 */ -1745 protected boolean validateBlockChecksum(HFileBlock block, byte[] data, int hdrSize) -1746 throws IOException { -1747 return ChecksumUtil.validateBlockChecksum(path, block, data, hdrSize); -1748 } -1749 -1750 @Override -1751 public void closeStreams() throws IOException { -1752 streamWrapper.close(); -1753 } -1754 -1755 @Override -1756 public String toString() { -1757 return "hfs=" + hfs + ", path=" + path + ", fileContext=" + fileContext; -1758 } -1759 } -1760 -1761 @Override -1762 public int getSerializedLength() { -1763 if (buf != null) { -1764 // include extra bytes for the next header when it's available. -1765 int extraSpace = hasNextBlockHeader() ? headerSize() : 0; -1766 return this.buf.limit() + extraSpace + HFileBlock.EXTRA_SERIALIZATION_SPACE; -1767 } -1768 return 0; -1769 } -1770 -1771 @Override -1772 public void serialize(ByteBuffer destination) { -1773 this.buf.get(destination, 0, getSerializedLength() -1774 - EXTRA_SERIALIZATION_SPACE); -1775 serializeExtraInfo(destination); -1776 } -1777 -1778 public void serializeExtraInfo(ByteBuffer destination) { -1779 destination.put(this.fileContext.isUseHBaseChecksum() ? (byte) 1 : (byte) 0); -1780 destination.putLong(this.offset); -1781 destination.putInt(this.nextBlockOnDiskSizeWithHeader); -1782 destination.rewind(); -1783 } -1784 -1785 @Override -1786 public CacheableDeserializer<Cacheable> getDeserializer() { -1787 return HFileBlock.blockDeserializer; -1788 } -1789 -1790 @Override -1791 public int hashCode() { -1792 int result = 1; -1793 result = result * 31 + blockType.hashCode(); -1794 result = result * 31 + nextBlockOnDiskSizeWithHeader; -1795 result = result * 31 + (int) (offset ^ (offset >>> 32)); -1796 result = result * 31 + onDiskSizeWithoutHeader; -1797 result = result * 31 + (int) (prevBlockOffset ^ (prevBlockOffset >>> 32)); -1798 result = result * 31 + uncompressedSizeWithoutHeader; -1799 result = result * 31 + buf.hashCode(); -1800 return result; -1801 } -1802 -1803 @Override -1804 public boolean equals(Object comparison) { -1805 if (this == comparison) { -1806 return true; -1807 } -1808 if (comparison == null) { -1809 return false; -1810 } -1811 if (comparison.getClass() != this.getClass()) { -1812 return false; -1813 } -1814 -1815 HFileBlock castedComparison = (HFileBlock) comparison; -1816 -1817 if (castedComparison.blockType != this.blockType) { -1818 return false; -1819 } -1820 if (castedComparison.nextBlockOnDiskSizeWithHeader != this.nextBlockOnDiskSizeWithHeader) { -1821 return false; -1822 } -1823 if (castedComparison.offset != this.offset) { -1824 return false; -1825 } -1826 if (castedComparison.onDiskSizeWithoutHeader != this.onDiskSizeWithoutHeader) { -1827 return false; -1828 } -1829 if (castedComparison.prevBlockOffset != this.prevBlockOffset) { -1830 return false; -1831 } -1832 if (castedComparison.uncompressedSizeWithoutHeader != this.uncompressedSizeWithoutHeader) { -1833 return false; -1834 } -1835 if (ByteBuff.compareTo(this.buf, 0, this.buf.limit(), castedComparison.buf, 0, -1836 castedComparison.buf.limit()) != 0) { -1837 return false; -1838 } -1839 return true; -1840 } -1841 -1842 public DataBlockEncoding getDataBlockEncoding() { -1843 if (blockType == BlockType.ENCODED_DATA) { -1844 return DataBlockEncoding.getEncodingById(getDataBlockEncodingId()); -1845 } -1846 return DataBlockEncoding.NONE; -1847 } -1848 -1849 byte getChecksumType() { -1850 return this.fileContext.getChecksumType().getCode(); -1851 } -1852 -1853 int getBytesPerChecksum() { -1854 return this.fileContext.getBytesPerChecksum(); -1855 } -1856 -1857 /** @return the size of data on disk + header. Excludes checksum. */ -1858 int getOnDiskDataSizeWithHeader() { -1859 return this.onDiskDataSizeWithHeader; -1860 } -1861 -1862 /** -1863 * Calcuate the number of bytes required to store all the checksums -1864 * for this block. Each checksum value is a 4 byte integer. -1865 */ -1866 int totalChecksumBytes() { -1867 // If the hfile block has minorVersion 0, then there are no checksum -1868 // data to validate. Similarly, a zero value in this.bytesPerChecksum -1869 // indicates that cached blocks do not have checksum data because -1870 // checksums were already validated when the block was read from disk. -1871 if (!fileContext.isUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) { -1872 return 0; -1873 } -1874 return (int) ChecksumUtil.numBytes(onDiskDataSizeWithHeader, -1875 this.fileContext.getBytesPerChecksum()); -1876 } -1877 -1878 /** -1879 * Returns the size of this block header. -1880 */ -1881 public int headerSize() { -1882 return headerSize(this.fileContext.isUseHBaseChecksum()); -1883 } -1884 -1885 /** -1886 * Maps a minor version to the size of the header. -1887 */ -1888 public static int headerSize(boolean usesHBaseChecksum) { -1889 if (usesHBaseChecksum) { -1890 return HConstants.HFILEBLOCK_HEADER_SIZE; -1891 } -1892 return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; -1893 } -1894 -1895 /** -1896 * Return the appropriate DUMMY_HEADER for the minor version -1897 */ -1898 public byte[] getDummyHeaderForVersion() { -1899 return getDummyHeaderForVersion(this.fileContext.isUseHBaseChecksum()); -1900 } -1901 -1902 /** -1903 * Return the appropriate DUMMY_HEADER for the minor version -1904 */ -1905 static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) { -1906 if (usesHBaseChecksum) { -1907 return HConstants.HFILEBLOCK_DUMMY_HEADER; -1908 } -1909 return DUMMY_HEADER_NO_CHECKSUM; -1910 } -1911 -1912 /** -1913 * @return the HFileContext used to create this HFileBlock. Not necessary the -1914 * fileContext for the file from which this block's data was originally read. -1915 */ -1916 public HFileContext getHFileContext() { -1917 return this.fileContext; -1918 } -1919 -1920 @Override -1921 public MemoryType getMemoryType() { -1922 return this.memType; -1923 } -1924 -1925 /** -1926 * @return true if this block is backed by a shared memory area(such as that of a BucketCache). -1927 */ -1928 public boolean usesSharedMemory() { -1929 return this.memType == MemoryType.SHARED; -1930 } -1931 -1932 /** -1933 * Convert the contents of the block header into a human readable string. -1934 * This is mostly helpful for debugging. This assumes that the block -1935 * has minor version > 0. -1936 */ -1937 static String toStringHeader(ByteBuff buf) throws IOException { -1938 byte[] magicBuf = new byte[Math.min(buf.limit() - buf.position(), BlockType.MAGIC_LENGTH)]; -1939 buf.get(magicBuf); -1940 BlockType bt = BlockType.parse(magicBuf, 0, BlockType.MAGIC_LENGTH); -1941 int compressedBlockSizeNoHeader = buf.getInt(); -1942 int uncompressedBlockSizeNoHeader = buf.getInt(); -1943 long prevBlockOffset = buf.getLong(); -1944 byte cksumtype = buf.get(); -1945 long bytesPerChecksum = buf.getInt(); -1946 long onDiskDataSizeWithHeader = buf.getInt(); -1947 return " Header dump: magic: " + Bytes.toString(magicBuf) + -1948 " blockType " + bt + -1949 " compressedBlockSizeNoHeader " + -1950 compressedBlockSizeNoHeader + -1951 " uncompressedBlockSizeNoHeader " + -1952 uncompressedBlockSizeNoHeader + -1953 " prevBlockOffset " + prevBlockOffset + -1954 " checksumType " + ChecksumType.codeToType(cksumtype) + -1955 " bytesPerChecksum " + bytesPerChecksum + -1956 " onDiskDataSizeWithHeader " + onDiskDataSizeWithHeader; -1957 } -1958} +1361 protected HFileContext fileContext; +1362 // Cache the fileName +1363 protected String pathName; +1364 +1365 public FSReaderImpl(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path, +1366 HFileContext fileContext) throws IOException { +1367 this.fileSize = fileSize; +1368 this.hfs = hfs; +1369 if (path != null) { +1370 this.pathName = path.toString(); +1371 } +1372 this.fileContext = fileContext; +1373 this.hdrSize = headerSize(fileContext.isUseHBaseChecksum()); +1374 +1375 this.streamWrapper = stream; +1376 // Older versions of HBase didn't support checksum. +1377 this.streamWrapper.prepareForBlockReader(!fileContext.isUseHBaseChecksum()); +1378 defaultDecodingCtx = new HFileBlockDefaultDecodingContext(fileContext); +1379 encodedBlockDecodingCtx = defaultDecodingCtx; +1380 } +1381 +1382 /** +1383 * A constructor that reads files with the latest minor version. +1384 * This is used by unit tests only. +1385 */ +1386 FSReaderImpl(FSDataInputStream istream, long fileSize, HFileContext fileContext) +1387 throws IOException { +1388 this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext); +1389 } +1390 +1391 public BlockIterator blockRange(final long startOffset, final long endOffset) { +1392 final FSReader owner = this; // handle for inner class +1393 return new BlockIterator() { +1394 private long offset = startOffset; +1395 +1396 @Override +1397 public HFileBlock nextBlock() throws IOException { +1398 if (offset >= endOffset) +1399 return null; +1400 HFileBlock b = readBlockData(offset, -1, -1, false); +1401 offset += b.getOnDiskSizeWithHeader(); +1402 return b.unpack(fileContext, owner); +1403 } +1404 +1405 @Override +1406 public HFileBlock nextBlockWithBlockType(BlockType blockType) +1407 throws IOException { +1408 HFileBlock blk = nextBlock(); +1409 if (blk.getBlockType() != blockType) { +1410 throw new IOException("Expected block of type " + blockType +1411 + " but found " + blk.getBlockType()); +1412 } +1413 return blk; +1414 } +1415 }; +1416 } +1417 +1418 /** +1419 * Does a positional read or a seek and read into the given buffer. Returns +1420 * the on-disk size of the next block, or -1 if it could not be determined. +1421 * +1422 * @param dest destination buffer +1423 * @param destOffset offset in the destination buffer +1424 * @param size size of the block to be read +1425 * @param peekIntoNextBlock whether to read the next block's on-disk size +1426 * @param fileOffset position in the stream to read at +1427 * @param pread whether we should do a positional read +1428 * @param istream The input source of data +1429 * @return the on-disk size of the next block with header size included, or +1430 * -1 if it could not be determined +1431 * @throws IOException +1432 */ +1433 protected int readAtOffset(FSDataInputStream istream, +1434 byte[] dest, int destOffset, int size, +1435 boolean peekIntoNextBlock, long fileOffset, boolean pread) +1436 throws IOException { +1437 if (peekIntoNextBlock && +1438 destOffset + size + hdrSize > dest.length) { +1439 // We are asked to read the next block's header as well, but there is +1440 // not enough room in the array. +1441 throw new IOException("Attempted to read " + size + " bytes and " + +1442 hdrSize + " bytes of next header into a " + dest.length + +1443 "-byte array at offset " + destOffset); +1444 } +1445 +1446 if (!pread && streamLock.tryLock()) { +1447 // Seek + read. Better for scanning. +1448 try { +1449 istream.seek(fileOffset); +1450 +1451 long realOffset = istream.getPos(); +1452 if (realOffset != fileOffset) { +1453 throw new IOException("Tried to seek to " + fileOffset + " to " +1454 + "read " + size + " bytes, but pos=" + realOffset +1455 + " after seek"); +1456 } +1457 +1458 if (!peekIntoNextBlock) { +1459 IOUtils.readFully(istream, dest, destOffset, size); +1460 return -1; +1461 } +1462 +1463 // Try to read the next block header. +1464 if (!readWithExtra(istream, dest, destOffset, size, hdrSize)) +1465 return -1; +1466 } finally { +1467 streamLock.unlock(); +1468 } +1469 } else { +1470 // Positional read. Better for random reads; or when the streamLock is already locked. +1471 int extraSize = peekIntoNextBlock ? hdrSize : 0; +1472 if (!positionalReadWithExtra(istream, fileOffset, dest, destOffset, +1473 size, extraSize)) { +1474 return -1; +1475 } +1476 } +1477 +1478 assert peekIntoNextBlock; +1479 return Bytes.toInt(dest, destOffset + size + BlockType.MAGIC_LENGTH) + hdrSize; +1480 } +1481 +1482 /** +1483 * Reads a version 2 block (version 1 blocks not supported and not expected). Tries to do as +1484 * little memory allocation as possible, using the provided on-disk size. +1485 * +1486 * @param offset the offset in the stream to read at +1487 * @param onDiskSizeWithHeaderL the on-disk size of the block, including +1488 * the header, or -1 if unknown +1489 * @param uncompressedSize the uncompressed size of the the block. Always +1490 * expected to be -1. This parameter is only used in version 1. +1491 * @param pread whether to use a positional read +1492 */ +1493 @Override +1494 public HFileBlock readBlockData(long offset, long onDiskSizeWithHeaderL, +1495 int uncompressedSize, boolean pread) +1496 throws IOException { +1497 +1498 // get a copy of the current state of whether to validate +1499 // hbase checksums or not for this read call. This is not +1500 // thread-safe but the one constaint is that if we decide +1501 // to skip hbase checksum verification then we are +1502 // guaranteed to use hdfs checksum verification. +1503 boolean doVerificationThruHBaseChecksum = streamWrapper.shouldUseHBaseChecksum(); +1504 FSDataInputStream is = streamWrapper.getStream(doVerificationThruHBaseChecksum); +1505 +1506 HFileBlock blk = readBlockDataInternal(is, offset, +1507 onDiskSizeWithHeaderL, +1508 uncompressedSize, pread, +1509 doVerificationThruHBaseChecksum); +1510 if (blk == null) { +1511 HFile.LOG.warn("HBase checksum verification failed for file " + +1512 pathName + " at offset " + +1513 offset + " filesize " + fileSize + +1514 ". Retrying read with HDFS checksums turned on..."); +1515 +1516 if (!doVerificationThruHBaseChecksum) { +1517 String msg = "HBase checksum verification failed for file " + +1518 pathName + " at offset " + +1519 offset + " filesize " + fileSize + +1520 " but this cannot happen because doVerify is " + +1521 doVerificationThruHBaseChecksum; +1522 HFile.LOG.warn(msg); +1523 throw new IOException(msg); // cannot happen case here +1524 } +1525 HFile.checksumFailures.incrementAndGet(); // update metrics +1526 +1527 // If we have a checksum failure, we fall back into a mode where +1528 // the next few reads use HDFS level checksums. We aim to make the +1529 // next CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD reads avoid +1530 // hbase checksum verification, but since this value is set without +1531 // holding any locks, it can so happen that we might actually do +1532 // a few more than precisely this number. +1533 is = this.streamWrapper.fallbackToFsChecksum(CHECKSUM_VERIFICATION_NUM_IO_THRESHOLD); +1534 doVerificationThruHBaseChecksum = false; +1535 blk = readBlockDataInternal(is, offset, onDiskSizeWithHeaderL, +1536 uncompressedSize, pread, +1537 doVerificationThruHBaseChecksum); +1538 if (blk != null) { +1539 HFile.LOG.warn("HDFS checksum verification suceeded for file " + +1540 pathName + " at offset " + +1541 offset + " filesize " + fileSize); +1542 } +1543 } +1544 if (blk == null && !doVerificationThruHBaseChecksum) { +1545 String msg = "readBlockData failed, possibly due to " + +1546 "checksum verification failed for file " + pathName + +1547 " at offset " + offset + " filesize " + fileSize; +1548 HFile.LOG.warn(msg); +1549 throw new IOException(msg); +1550 } +1551 +1552 // If there is a checksum mismatch earlier, then retry with +1553 // HBase checksums switched off and use HDFS checksum verification. +1554 // This triggers HDFS to detect and fix corrupt replicas. The +1555 // next checksumOffCount read requests will use HDFS checksums. +1556 // The decrementing of this.checksumOffCount is not thread-safe, +1557 // but it is harmless because eventually checksumOffCount will be +1558 // a negative number. +1559 streamWrapper.checksumOk(); +1560 return blk; +1561 } +1562 +1563 /** +1564 * Reads a version 2 block. +1565 * +1566 * @param offset the offset in the stream to read at +1567 * @param onDiskSizeWithHeaderL the on-disk size of the block, including +1568 * the header, or -1 if unknown +1569 * @param uncompressedSize the uncompressed size of the the block. Always +1570 * expected to be -1. This parameter is only used in version 1. +1571 * @param pread whether to use a positional read +1572 * @param verifyChecksum Whether to use HBase checksums. +1573 * If HBase checksum is switched off, then use HDFS checksum. +1574 * @return the HFileBlock or null if there is a HBase checksum mismatch +1575 */ +1576 private HFileBlock readBlockDataInternal(FSDataInputStream is, long offset, +1577 long onDiskSizeWithHeaderL, int uncompressedSize, boolean pread, +1578 boolean verifyChecksum) +1579 throws IOException { +1580 if (offset < 0) { +1581 throw new IOException("Invalid offset=" + offset + " trying to read " +1582 + "block (onDiskSize=" + onDiskSizeWithHeaderL +1583 + ", uncompressedSize=" + uncompressedSize + ")"); +1584 } +1585 +1586 if (uncompressedSize != -1) { +1587 throw new IOException("Version 2 block reader API does not need " + +1588 "the uncompressed size parameter"); +1589 } +1590 +1591 if ((onDiskSizeWithHeaderL < hdrSize && onDiskSizeWithHeaderL != -1) +1592 || onDiskSizeWithHeaderL >= Integer.MAX_VALUE) { +1593 throw new IOException("Invalid onDisksize=" + onDiskSizeWithHeaderL +1594 + ": expected to be at least " + hdrSize +1595 + " and at most " + Integer.MAX_VALUE + ", or -1 (offset=" +1596 + offset + ", uncompressedSize=" + uncompressedSize + ")"); +1597 } +1598 +1599 int onDiskSizeWithHeader = (int) onDiskSizeWithHeaderL; +1600 // See if we can avoid reading the header. This is desirable, because