Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 257F590E1 for ; Thu, 26 Apr 2012 20:06:57 +0000 (UTC) Received: (qmail 75063 invoked by uid 500); 26 Apr 2012 20:06:57 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 75027 invoked by uid 500); 26 Apr 2012 20:06:57 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 75017 invoked by uid 99); 26 Apr 2012 20:06:56 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Apr 2012 20:06:56 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 26 Apr 2012 20:06:55 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id D77322388962 for ; Thu, 26 Apr 2012 20:06:34 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1331057 - in /hbase/branches/0.94/src: main/java/org/apache/hadoop/hbase/io/hfile/ test/java/org/apache/hadoop/hbase/io/hfile/ Date: Thu, 26 Apr 2012 20:06:34 -0000 To: commits@hbase.apache.org From: larsh@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120426200634.D77322388962@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: larsh Date: Thu Apr 26 20:06:34 2012 New Revision: 1331057 URL: http://svn.apache.org/viewvc?rev=1331057&view=rev Log: HBASE-5864 Error while reading from hfile in 0.94 (Ram) Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java?rev=1331057&r1=1331056&r2=1331057&view=diff ============================================================================== --- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java (original) +++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java Thu Apr 26 20:06:34 2012 @@ -1189,10 +1189,9 @@ public class HFileBlock extends SchemaCo /** * Similar to {@link #nextBlock()} but checks block type, throws an - * exception if incorrect, and returns the data portion of the block as - * an input stream. + * exception if incorrect, and returns the HFile block */ - DataInputStream nextBlockAsStream(BlockType blockType) throws IOException; + HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException; } /** A full-fledged reader with iteration ability. */ @@ -1290,14 +1289,14 @@ public class HFileBlock extends SchemaCo } @Override - public DataInputStream nextBlockAsStream(BlockType blockType) + public HFileBlock nextBlockWithBlockType(BlockType blockType) throws IOException { HFileBlock blk = nextBlock(); if (blk.getBlockType() != blockType) { throw new IOException("Expected block of type " + blockType + " but found " + blk.getBlockType()); } - return blk.getByteStream(); + return blk; } }; } Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java?rev=1331057&r1=1331056&r2=1331057&view=diff ============================================================================== --- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (original) +++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java Thu Apr 26 20:06:34 2012 @@ -531,24 +531,43 @@ public class HFileBlockIndex { } } } + + /** + * Read in the root-level index from the given input stream. Must match + * what was written into the root level by + * {@link BlockIndexWriter#writeIndexBlocks(FSDataOutputStream)} at the + * offset that function returned. + * + * @param blk the HFile block + * @param numEntries the number of root-level index entries + * @return the buffered input stream or wrapped byte input stream + * @throws IOException + */ + public DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException { + DataInputStream in = blk.getByteStream(); + readRootIndex(in, numEntries); + return in; + } /** * Read the root-level metadata of a multi-level block index. Based on * {@link #readRootIndex(DataInput, int)}, but also reads metadata * necessary to compute the mid-key in a multi-level index. * - * @param in the buffered or byte input stream to read from + * @param blk the HFile block * @param numEntries the number of root-level index entries * @throws IOException */ - public void readMultiLevelIndexRoot(DataInputStream in, + public void readMultiLevelIndexRoot(HFileBlock blk, final int numEntries) throws IOException { - readRootIndex(in, numEntries); - if (in.available() < MID_KEY_METADATA_SIZE) { + DataInputStream in = readRootIndex(blk, numEntries); + // after reading the root index the checksum bytes have to + // be subtracted to know if the mid key exists. + int checkSumBytes = blk.totalChecksumBytes(); + if ((in.available() - checkSumBytes) < MID_KEY_METADATA_SIZE) { // No mid-key metadata available. return; } - midLeafBlockOffset = in.readLong(); midLeafBlockOnDiskSize = in.readInt(); midKeyEntry = in.readInt(); @@ -761,7 +780,7 @@ public class HFileBlockIndex { if (LOG.isTraceEnabled()) { LOG.trace("Wrote a " + numLevels + "-level index with root level at pos " - + out.getPos() + ", " + rootChunk.getNumEntries() + + rootLevelIndexPos + ", " + rootChunk.getNumEntries() + " root-level entries, " + totalNumEntries + " total entries, " + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) + " on-disk size, " Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java?rev=1331057&r1=1331056&r2=1331057&view=diff ============================================================================== --- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (original) +++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java Thu Apr 26 20:06:34 2012 @@ -124,17 +124,17 @@ public class HFileReaderV2 extends Abstr // Data index. We also read statistics about the block index written after // the root level. dataBlockIndexReader.readMultiLevelIndexRoot( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); // Meta index. metaBlockIndexReader.readRootIndex( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getMetaIndexCount()); // File info fileInfo = new FileInfo(); - fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); + fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); lastKey = fileInfo.get(FileInfo.LASTKEY); avgKeyLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_KEY_LEN)); avgValueLen = Bytes.toInt(fileInfo.get(FileInfo.AVG_VALUE_LEN)); Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1331057&r1=1331056&r2=1331057&view=diff ============================================================================== --- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (original) +++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Thu Apr 26 20:06:34 2012 @@ -176,7 +176,7 @@ public class TestHFileBlockIndex { Bytes.BYTES_RAWCOMPARATOR, numLevels, brw); indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, - fileSize).nextBlockAsStream(BlockType.ROOT_INDEX), numRootEntries); + fileSize).nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); long prevOffset = -1; int i = 0; Modified: hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java?rev=1331057&r1=1331056&r2=1331057&view=diff ============================================================================== --- hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java (original) +++ hbase/branches/0.94/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java Thu Apr 26 20:06:34 2012 @@ -37,8 +37,7 @@ import org.apache.hadoop.fs.FSDataInputS import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.*; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.RawComparator; @@ -72,14 +71,30 @@ public class TestHFileWriterV2 { @Test public void testHFileFormatV2() throws IOException { Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), - "testHFileFormatV2"); + "testHFileFormatV2"); + final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ; + final int entryCount = 10000; + writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false); + } + + + @Test + public void testMidKeyInHFile() throws IOException{ + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), + "testMidKeyInHFile"); + Compression.Algorithm compressAlgo = Compression.Algorithm.NONE; + int entryCount = 50000; + writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true); + } + + private void writeDataAndReadFromHFile(Path hfilePath, + Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException { - final Compression.Algorithm COMPRESS_ALGO = Compression.Algorithm.GZ; HFileWriterV2 writer = (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf)) .withPath(fs, hfilePath) .withBlockSize(4096) - .withCompression(COMPRESS_ALGO) + .withCompression(compressAlgo) .withComparator(KeyValue.KEY_COMPARATOR) .create(); @@ -88,11 +103,10 @@ public class TestHFileWriterV2 { Random rand = new Random(9713312); // Just a fixed seed. - final int ENTRY_COUNT = 10000; List keys = new ArrayList(); List values = new ArrayList(); - for (int i = 0; i < ENTRY_COUNT; ++i) { + for (int i = 0; i < entryCount; ++i) { byte[] keyBytes = randomOrderedKey(rand, i); // A random-length random value. @@ -113,6 +127,7 @@ public class TestHFileWriterV2 { writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris")); writer.close(); + FSDataInputStream fsdis = fs.open(hfilePath); @@ -124,10 +139,10 @@ public class TestHFileWriterV2 { FixedFileTrailer.readFromStream(fsdis, fileSize); assertEquals(2, trailer.getMajorVersion()); - assertEquals(ENTRY_COUNT, trailer.getEntryCount()); + assertEquals(entryCount, trailer.getEntryCount()); HFileBlock.FSReader blockReader = - new HFileBlock.FSReaderV2(fsdis, COMPRESS_ALGO, fileSize); + new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize); // Comparator class name is stored in the trailer in version 2. RawComparator comparator = trailer.createComparator(); HFileBlockIndex.BlockIndexReader dataBlockIndexReader = @@ -143,16 +158,21 @@ public class TestHFileWriterV2 { // Data index. We also read statistics about the block index written after // the root level. dataBlockIndexReader.readMultiLevelIndexRoot( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); - + + if (findMidKey) { + byte[] midkey = dataBlockIndexReader.midkey(); + assertNotNull("Midkey should not be null", midkey); + } + // Meta index. metaBlockIndexReader.readRootIndex( - blockIter.nextBlockAsStream(BlockType.ROOT_INDEX), + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), trailer.getMetaIndexCount()); // File info FileInfo fileInfo = new FileInfo(); - fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); + fileInfo.readFields(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); byte [] keyValueFormatVersion = fileInfo.get( HFileWriterV2.KEY_VALUE_VERSION); boolean includeMemstoreTS = keyValueFormatVersion != null && @@ -200,7 +220,7 @@ public class TestHFileWriterV2 { } LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead=" + blocksRead); - assertEquals(ENTRY_COUNT, entriesRead); + assertEquals(entryCount, entriesRead); // Meta blocks. We can scan until the load-on-open data offset (which is // the root block index offset in version 2) because we are not testing @@ -226,6 +246,7 @@ public class TestHFileWriterV2 { fsdis.close(); } + // Static stuff used by various HFile v2 unit tests private static final String COLUMN_FAMILY_NAME = "_-myColumnFamily-_";