Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A33809D29 for ; Fri, 17 Feb 2012 01:57:09 +0000 (UTC) Received: (qmail 87384 invoked by uid 500); 17 Feb 2012 01:57:09 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 87345 invoked by uid 500); 17 Feb 2012 01:57:09 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 87336 invoked by uid 99); 17 Feb 2012 01:57:09 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 17 Feb 2012 01:57:09 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 17 Feb 2012 01:57:01 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 1CFE42388B6C for ; Fri, 17 Feb 2012 01:56:40 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1245291 [6/7] - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/client/ main/java/org/apache/hadoop/hbase/io/ main/java/org/apache/hadoop/hbase/io/encoding/ main/java/org/apache/hadoop/h... Date: Fri, 17 Feb 2012 01:56:35 -0000 To: commits@hbase.apache.org From: mbautin@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120217015640.1CFE42388B6C@eris.apache.org> Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java Fri Feb 17 01:56:33 2012 @@ -38,9 +38,9 @@ import org.apache.hadoop.conf.Configurat import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.regionserver.CreateRandomStoreFile; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -72,10 +72,13 @@ public class TestCacheOnWrite { private FileSystem fs; private Random rand = new Random(12983177L); private Path storeFilePath; - private Compression.Algorithm compress; - private CacheOnWriteType cowType; private BlockCache blockCache; - private String testName; + private String testDescription; + + private final CacheOnWriteType cowType; + private final Compression.Algorithm compress; + private final BlockEncoderTestType encoderType; + private final HFileDataBlockEncoder encoder; private static final int DATA_BLOCK_SIZE = 2048; private static final int NUM_KV = 25000; @@ -84,49 +87,87 @@ public class TestCacheOnWrite { private static final BloomType BLOOM_TYPE = StoreFile.BloomType.ROWCOL; private static enum CacheOnWriteType { - DATA_BLOCKS(BlockType.DATA, CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY), - BLOOM_BLOCKS(BlockType.BLOOM_CHUNK, - CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY), - INDEX_BLOCKS(BlockType.LEAF_INDEX, - CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY); + DATA_BLOCKS(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, + BlockType.DATA, BlockType.ENCODED_DATA), + BLOOM_BLOCKS(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, + BlockType.BLOOM_CHUNK), + INDEX_BLOCKS(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, + BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX); private final String confKey; - private final BlockType inlineBlockType; + private final BlockType blockType1; + private final BlockType blockType2; + + private CacheOnWriteType(String confKey, BlockType blockType) { + this(confKey, blockType, blockType); + } - private CacheOnWriteType(BlockType inlineBlockType, String confKey) { - this.inlineBlockType = inlineBlockType; + private CacheOnWriteType(String confKey, BlockType blockType1, + BlockType blockType2) { + this.blockType1 = blockType1; + this.blockType2 = blockType2; this.confKey = confKey; } public boolean shouldBeCached(BlockType blockType) { - return blockType == inlineBlockType - || blockType == BlockType.INTERMEDIATE_INDEX - && inlineBlockType == BlockType.LEAF_INDEX; + return blockType == blockType1 || blockType == blockType2; } public void modifyConf(Configuration conf) { - for (CacheOnWriteType cowType : CacheOnWriteType.values()) + for (CacheOnWriteType cowType : CacheOnWriteType.values()) { conf.setBoolean(cowType.confKey, cowType == this); + } } } + private static final DataBlockEncoding ENCODING_ALGO = + DataBlockEncoding.PREFIX; + + /** Provides fancy names for three combinations of two booleans */ + private static enum BlockEncoderTestType { + NO_BLOCK_ENCODING(false, false), + BLOCK_ENCODING_IN_CACHE_ONLY(false, true), + BLOCK_ENCODING_EVERYWHERE(true, true); + + private final boolean encodeOnDisk; + private final boolean encodeInCache; + + BlockEncoderTestType(boolean encodeOnDisk, boolean encodeInCache) { + this.encodeOnDisk = encodeOnDisk; + this.encodeInCache = encodeInCache; + } + + public HFileDataBlockEncoder getEncoder() { + return new HFileDataBlockEncoderImpl( + encodeOnDisk ? ENCODING_ALGO : DataBlockEncoding.NONE, + encodeInCache ? ENCODING_ALGO : DataBlockEncoding.NONE); + } + } + public TestCacheOnWrite(CacheOnWriteType cowType, - Compression.Algorithm compress) { + Compression.Algorithm compress, BlockEncoderTestType encoderType) { this.cowType = cowType; this.compress = compress; - testName = "[cacheOnWrite=" + cowType + ", compress=" + compress + "]"; - System.out.println(testName); + this.encoderType = encoderType; + this.encoder = encoderType.getEncoder(); + testDescription = "[cacheOnWrite=" + cowType + ", compress=" + compress + + ", encoderType=" + encoderType + "]"; + System.out.println(testDescription); } @Parameters public static Collection getParameters() { List cowTypes = new ArrayList(); - for (CacheOnWriteType cowType : CacheOnWriteType.values()) + for (CacheOnWriteType cowType : CacheOnWriteType.values()) { for (Compression.Algorithm compress : HBaseTestingUtility.COMPRESSION_ALGORITHMS) { - cowTypes.add(new Object[] { cowType, compress }); + for (BlockEncoderTestType encoderType : + BlockEncoderTestType.values()) { + cowTypes.add(new Object[] { cowType, compress, encoderType }); + } } + } return cowTypes; } @@ -147,7 +188,6 @@ public class TestCacheOnWrite { fs = FileSystem.get(conf); cacheConf = new CacheConfig(conf); blockCache = cacheConf.getBlockCache(); - System.out.println("setUp()"); } @After @@ -163,29 +203,43 @@ public class TestCacheOnWrite { } private void readStoreFile() throws IOException { - HFileReaderV2 reader = (HFileReaderV2) HFile.createReader(fs, - storeFilePath, cacheConf); + HFileReaderV2 reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs, + storeFilePath, cacheConf, encoder.getEncodingInCache()); LOG.info("HFile information: " + reader); - HFileScanner scanner = reader.getScanner(false, false); - assertTrue(testName, scanner.seekTo()); + final boolean cacheBlocks = false; + final boolean pread = false; + HFileScanner scanner = reader.getScanner(cacheBlocks, pread); + assertTrue(testDescription, scanner.seekTo()); long offset = 0; HFileBlock prevBlock = null; EnumMap blockCountByType = new EnumMap(BlockType.class); + DataBlockEncoding encodingInCache = + encoderType.getEncoder().getEncodingInCache(); while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) { long onDiskSize = -1; if (prevBlock != null) { onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader(); } // Flags: don't cache the block, use pread, this is not a compaction. + // Also, pass null for expected block type to avoid checking it. HFileBlock block = reader.readBlock(offset, onDiskSize, false, true, - false); - BlockCacheKey blockCacheKey = HFile.getBlockCacheKey(reader.getName(), offset); + false, null); + BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(), + offset, encodingInCache, block.getBlockType()); boolean isCached = blockCache.getBlock(blockCacheKey, true) != null; boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType()); - assertEquals(testName + " " + block, shouldBeCached, isCached); + if (shouldBeCached != isCached) { + throw new AssertionError( + "shouldBeCached: " + shouldBeCached+ "\n" + + "isCached: " + isCached + "\n" + + "Test description: " + testDescription + "\n" + + "block: " + block + "\n" + + "encodingInCache: " + encodingInCache + "\n" + + "blockCacheKey: " + blockCacheKey); + } prevBlock = block; offset += block.getOnDiskSizeWithHeader(); BlockType bt = block.getBlockType(); @@ -195,8 +249,10 @@ public class TestCacheOnWrite { LOG.info("Block count by type: " + blockCountByType); String countByType = blockCountByType.toString(); - assertEquals( - "{DATA=1379, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=24}", + BlockType cachedDataBlockType = + encoderType.encodeInCache ? BlockType.ENCODED_DATA : BlockType.DATA; + assertEquals("{" + cachedDataBlockType + + "=1379, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=24}", countByType); reader.close(); @@ -206,8 +262,9 @@ public class TestCacheOnWrite { Path storeFileParentDir = new Path(HBaseTestingUtility.getTestDir(), "test_cache_on_write"); StoreFile.Writer sfw = StoreFile.createWriter(fs, storeFileParentDir, - DATA_BLOCK_SIZE, compress, KeyValue.COMPARATOR, conf, - cacheConf, BLOOM_TYPE, NUM_KV); + DATA_BLOCK_SIZE, compress, encoder, KeyValue.COMPARATOR, conf, + cacheConf, BLOOM_TYPE, BloomFilterFactory.getErrorRate(conf), NUM_KV, + null); final int rowLen = 32; for (int i = 0; i < NUM_KV; ++i) { @@ -238,7 +295,9 @@ public class TestCacheOnWrite { final byte[] cfBytes = Bytes.toBytes(cf); final int maxVersions = 3; HRegion region = TEST_UTIL.createTestRegion(table, cf, compress, - BLOOM_TYPE, maxVersions, HFile.DEFAULT_BLOCKSIZE); + BLOOM_TYPE, maxVersions, HFile.DEFAULT_BLOCKSIZE, + encoder.getEncodingInCache(), + encoder.getEncodingOnDisk() != DataBlockEncoding.NONE); int rowIdx = 0; long ts = EnvironmentEdgeManager.currentTimeMillis(); for (int iFile = 0; iFile < 5; ++iFile) { Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java Fri Feb 17 01:56:33 2012 @@ -156,7 +156,7 @@ public class TestHFile extends HBaseTest writeRecords(writer); fout.close(); FSDataInputStream fin = fs.open(ncTFile); - Reader reader = HFile.createReader(ncTFile, fs.open(ncTFile), + Reader reader = HFile.createReaderFromStream(ncTFile, fs.open(ncTFile), fs.getFileStatus(ncTFile).getLen(), cacheConf); System.out.println(cacheConf.toString()); // Load up the index. @@ -234,7 +234,7 @@ public class TestHFile extends HBaseTest writer.close(); fout.close(); FSDataInputStream fin = fs.open(mFile); - Reader reader = HFile.createReader(mFile, fs.open(mFile), + Reader reader = HFile.createReaderFromStream(mFile, fs.open(mFile), this.fs.getFileStatus(mFile).getLen(), cacheConf); reader.loadFileInfo(); // No data -- this should return false. Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java Fri Feb 17 01:56:33 2012 @@ -19,7 +19,11 @@ */ package org.apache.hadoop.hbase.io.hfile; -import static org.junit.Assert.*; +import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.GZ; +import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.NONE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; @@ -27,6 +31,8 @@ import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -44,15 +50,20 @@ import org.apache.hadoop.fs.FSDataOutput import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.DoubleOutputStream; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; +import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.compress.Compressor; - -import static org.apache.hadoop.hbase.io.hfile.Compression.Algorithm.*; import org.junit.Before; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; +@RunWith(Parameterized.class) public class TestHFileBlock { private static final boolean[] BOOLEAN_VALUES = new boolean[] { false, true }; @@ -66,14 +77,29 @@ public class TestHFileBlock { static final Compression.Algorithm[] GZIP_ONLY = { GZ }; private static final int NUM_TEST_BLOCKS = 1000; - private static final int NUM_READER_THREADS = 26; + // Used to generate KeyValues + private static int NUM_KEYVALUES = 50; + private static int FIELD_LENGTH = 10; + private static float CHANCE_TO_REPEAT = 0.6f; + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private FileSystem fs; private int uncompressedSizeV1; + private final boolean includesMemstoreTS; + + public TestHFileBlock(boolean includesMemstoreTS) { + this.includesMemstoreTS = includesMemstoreTS; + } + + @Parameters + public static Collection parameters() { + return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + } + @Before public void setUp() throws IOException { fs = FileSystem.get(TEST_UTIL.getConfiguration()); @@ -86,6 +112,72 @@ public class TestHFileBlock { dos.writeInt(i / 100); } + private int writeTestKeyValues(OutputStream dos, int seed) + throws IOException { + List keyValues = new ArrayList(); + Random randomizer = new Random(42l + seed); // just any fixed number + + // generate keyValues + for (int i = 0; i < NUM_KEYVALUES; ++i) { + byte[] row; + long timestamp; + byte[] family; + byte[] qualifier; + byte[] value; + + // generate it or repeat, it should compress well + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + row = keyValues.get(randomizer.nextInt(keyValues.size())).getRow(); + } else { + row = new byte[FIELD_LENGTH]; + randomizer.nextBytes(row); + } + if (0 == i) { + family = new byte[FIELD_LENGTH]; + randomizer.nextBytes(family); + } else { + family = keyValues.get(0).getFamily(); + } + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + qualifier = keyValues.get( + randomizer.nextInt(keyValues.size())).getQualifier(); + } else { + qualifier = new byte[FIELD_LENGTH]; + randomizer.nextBytes(qualifier); + } + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + value = keyValues.get(randomizer.nextInt(keyValues.size())).getValue(); + } else { + value = new byte[FIELD_LENGTH]; + randomizer.nextBytes(value); + } + if (0 < i && randomizer.nextFloat() < CHANCE_TO_REPEAT) { + timestamp = keyValues.get( + randomizer.nextInt(keyValues.size())).getTimestamp(); + } else { + timestamp = randomizer.nextLong(); + } + + keyValues.add(new KeyValue(row, family, qualifier, timestamp, value)); + } + + // sort it and write to stream + int totalSize = 0; + Collections.sort(keyValues, KeyValue.COMPARATOR); + DataOutputStream dataOutputStream = new DataOutputStream(dos); + for (KeyValue kv : keyValues) { + totalSize += kv.getLength(); + dataOutputStream.write(kv.getBuffer(), kv.getOffset(), kv.getLength()); + if (includesMemstoreTS) { + long memstoreTS = randomizer.nextLong(); + WritableUtils.writeVLong(dataOutputStream, memstoreTS); + totalSize += WritableUtils.getVIntSize(memstoreTS); + } + } + + return totalSize; + } + public byte[] createTestV1Block(Compression.Algorithm algo) throws IOException { Compressor compressor = algo.getCompressor(); @@ -103,8 +195,9 @@ public class TestHFileBlock { private byte[] createTestV2Block(Compression.Algorithm algo) throws IOException { final BlockType blockType = BlockType.DATA; - HFileBlock.Writer hbw = new HFileBlock.Writer(algo); - DataOutputStream dos = hbw.startWriting(blockType, false); + HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, + includesMemstoreTS); + DataOutputStream dos = hbw.startWriting(blockType); writeTestBlockContents(dos); byte[] headerAndData = hbw.getHeaderAndData(); assertEquals(1000 * 4, hbw.getUncompressedSizeWithoutHeader()); @@ -175,10 +268,11 @@ public class TestHFileBlock { Path path = new Path(HBaseTestingUtility.getTestDir(), "blocks_v2_" + algo); FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(algo); + HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, + includesMemstoreTS); long totalSize = 0; for (int blockId = 0; blockId < 2; ++blockId) { - DataOutputStream dos = hbw.startWriting(BlockType.DATA, false); + DataOutputStream dos = hbw.startWriting(BlockType.DATA); for (int i = 0; i < 1234; ++i) dos.writeInt(i); hbw.writeHeaderAndData(os); @@ -221,6 +315,136 @@ public class TestHFileBlock { } } + /** + * Test encoding/decoding data blocks. + * @throws IOException a bug or a problem with temporary files. + */ + @Test + public void testDataBlockEncoding() throws IOException { + final int numBlocks = 5; + for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { + for (boolean pread : new boolean[] { false, true }) { + for (DataBlockEncoding encoding : DataBlockEncoding.values()) { + Path path = new Path(HBaseTestingUtility.getTestDir(), "blocks_v2_" + + algo + "_" + encoding.toString()); + FSDataOutputStream os = fs.create(path); + HFileDataBlockEncoder dataBlockEncoder = + new HFileDataBlockEncoderImpl(encoding); + HFileBlock.Writer hbw = new HFileBlock.Writer(algo, dataBlockEncoder, + includesMemstoreTS); + long totalSize = 0; + final List encodedSizes = new ArrayList(); + final List encodedBlocks = new ArrayList(); + for (int blockId = 0; blockId < numBlocks; ++blockId) { + writeEncodedBlock(encoding, hbw, encodedSizes, encodedBlocks, + blockId); + + hbw.writeHeaderAndData(os); + totalSize += hbw.getOnDiskSizeWithHeader(); + } + os.close(); + + FSDataInputStream is = fs.open(path); + HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(is, algo, + totalSize); + hbr.setDataBlockEncoder(dataBlockEncoder); + hbr.setIncludesMemstoreTS(includesMemstoreTS); + + HFileBlock b; + int pos = 0; + for (int blockId = 0; blockId < numBlocks; ++blockId) { + b = hbr.readBlockData(pos, -1, -1, pread); + b.sanityCheck(); + pos += b.getOnDiskSizeWithHeader(); + + assertEquals((int) encodedSizes.get(blockId), + b.getUncompressedSizeWithoutHeader()); + ByteBuffer actualBuffer = b.getBufferWithoutHeader(); + if (encoding != DataBlockEncoding.NONE) { + // We expect a two-byte big-endian encoding id. + assertEquals(0, actualBuffer.get(0)); + assertEquals(encoding.getId(), actualBuffer.get(1)); + actualBuffer.position(2); + actualBuffer = actualBuffer.slice(); + } + + ByteBuffer expectedBuffer = encodedBlocks.get(blockId); + expectedBuffer.rewind(); + + // test if content matches, produce nice message + assertBuffersEqual(expectedBuffer, actualBuffer, algo, encoding, + pread); + } + is.close(); + } + } + } + } + + private void writeEncodedBlock(DataBlockEncoding encoding, + HFileBlock.Writer hbw, final List encodedSizes, + final List encodedBlocks, int blockId) throws IOException { + DataOutputStream dos = hbw.startWriting(BlockType.DATA); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DoubleOutputStream doubleOutputStream = + new DoubleOutputStream(dos, baos); + + final int rawBlockSize = writeTestKeyValues(doubleOutputStream, + blockId); + + ByteBuffer rawBuf = ByteBuffer.wrap(baos.toByteArray()); + rawBuf.rewind(); + + final int encodedSize; + final ByteBuffer encodedBuf; + if (encoding == DataBlockEncoding.NONE) { + encodedSize = rawBlockSize; + encodedBuf = rawBuf; + } else { + ByteArrayOutputStream encodedOut = new ByteArrayOutputStream(); + encoding.getEncoder().compressKeyValues( + new DataOutputStream(encodedOut), + rawBuf.duplicate(), includesMemstoreTS); + // We need to account for the two-byte encoding algorithm ID that + // comes after the 24-byte block header but before encoded KVs. + encodedSize = encodedOut.size() + DataBlockEncoding.ID_SIZE; + encodedBuf = ByteBuffer.wrap(encodedOut.toByteArray()); + } + encodedSizes.add(encodedSize); + encodedBlocks.add(encodedBuf); + } + + private void assertBuffersEqual(ByteBuffer expectedBuffer, + ByteBuffer actualBuffer, Compression.Algorithm compression, + DataBlockEncoding encoding, boolean pread) { + if (!actualBuffer.equals(expectedBuffer)) { + int prefix = 0; + int minLimit = Math.min(expectedBuffer.limit(), actualBuffer.limit()); + while (prefix < minLimit && + expectedBuffer.get(prefix) == actualBuffer.get(prefix)) { + prefix++; + } + + fail(String.format( + "Content mismath for compression %s, encoding %s, " + + "pread %s, commonPrefix %d, expected %s, got %s", + compression, encoding, pread, prefix, + nextBytesToStr(expectedBuffer, prefix), + nextBytesToStr(actualBuffer, prefix))); + } + } + + /** + * Convert a few next bytes in the given buffer at the given position to + * string. Used for error messages. + */ + private static String nextBytesToStr(ByteBuffer buf, int pos) { + int maxBytes = buf.limit() - pos; + int numBytes = Math.min(16, maxBytes); + return Bytes.toStringBinary(buf.array(), buf.arrayOffset() + pos, + numBytes) + (numBytes < maxBytes ? "..." : ""); + } + @Test public void testPreviousOffset() throws IOException { for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { @@ -421,13 +645,17 @@ public class TestHFileBlock { boolean detailedLogging) throws IOException { boolean cacheOnWrite = expectedContents != null; FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo); + HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo, null, + includesMemstoreTS); Map prevOffsetByType = new HashMap(); long totalSize = 0; for (int i = 0; i < NUM_TEST_BLOCKS; ++i) { int blockTypeOrdinal = rand.nextInt(BlockType.values().length); + if (blockTypeOrdinal == BlockType.ENCODED_DATA.ordinal()) { + blockTypeOrdinal = BlockType.DATA.ordinal(); + } BlockType bt = BlockType.values()[blockTypeOrdinal]; - DataOutputStream dos = hbw.startWriting(bt, cacheOnWrite); + DataOutputStream dos = hbw.startWriting(bt); for (int j = 0; j < rand.nextInt(500); ++j) { // This might compress well. dos.writeShort(i + 1); @@ -470,8 +698,7 @@ public class TestHFileBlock { byte[] byteArr = new byte[HFileBlock.HEADER_SIZE + size]; ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size); HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, - true, -1); - assertEquals(80, HFileBlock.BYTE_BUFFER_HEAP_SIZE); + HFileBlock.FILL_HEADER, -1, includesMemstoreTS); long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase(buf.getClass(), true) + HFileBlock.HEADER_SIZE + size); Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java Fri Feb 17 01:56:33 2012 @@ -20,6 +20,10 @@ package org.apache.hadoop.hbase.io.hfile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.IOException; @@ -41,20 +45,16 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexChunk; -import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; - import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; -import static org.junit.Assert.*; - @RunWith(Parameterized.class) public class TestHFileBlockIndex { @@ -92,6 +92,8 @@ public class TestHFileBlockIndex { private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 }; + private static final boolean includesMemstoreTS = true; + static { assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; @@ -138,7 +140,8 @@ public class TestHFileBlockIndex { @Override public HFileBlock readBlock(long offset, long onDiskSize, - boolean cacheBlock, boolean pread, boolean isCompaction) + boolean cacheBlock, boolean pread, boolean isCompaction, + BlockType expectedBlockType) throws IOException { if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) { @@ -210,13 +213,14 @@ public class TestHFileBlockIndex { private void writeWholeIndex() throws IOException { assertEquals(0, keys.size()); - HFileBlock.Writer hbw = new HFileBlock.Writer(compr); + HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null, + includesMemstoreTS); FSDataOutputStream outputStream = fs.create(path); HFileBlockIndex.BlockIndexWriter biw = new HFileBlockIndex.BlockIndexWriter(hbw, null, null); for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { - hbw.startWriting(BlockType.DATA, false).write( + hbw.startWriting(BlockType.DATA).write( String.valueOf(rand.nextInt(1000)).getBytes()); long blockOffset = outputStream.getPos(); hbw.writeHeaderAndData(outputStream); @@ -251,7 +255,7 @@ public class TestHFileBlockIndex { boolean isClosing) throws IOException { while (biw.shouldWriteBlock(isClosing)) { long offset = outputStream.getPos(); - biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType(), false)); + biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); hbw.writeHeaderAndData(outputStream); biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), hbw.getUncompressedSizeWithoutHeader()); @@ -480,7 +484,7 @@ public class TestHFileBlockIndex { HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).createWriter(fs, hfilePath, SMALL_BLOCK_SIZE, HFile.DEFAULT_BYTES_PER_CHECKSUM, - compr, KeyValue.KEY_COMPARATOR); + compr, null, KeyValue.KEY_COMPARATOR, null); Random rand = new Random(19231737); for (int i = 0; i < NUM_KV; ++i) { Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java?rev=1245291&view=auto ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java (added) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java Fri Feb 17 01:56:33 2012 @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.RedundantKVGenerator; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaConfigured; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +@RunWith(Parameterized.class) +public class TestHFileDataBlockEncoder { + private Configuration conf; + private final HBaseTestingUtility TEST_UTIL = + new HBaseTestingUtility(); + private HFileDataBlockEncoderImpl blockEncoder; + private RedundantKVGenerator generator = new RedundantKVGenerator(); + private SchemaConfigured UNKNOWN_TABLE_AND_CF = + SchemaConfigured.createUnknown(); + private boolean includesMemstoreTS; + + /** + * Create test for given data block encoding configuration. + * @param blockEncoder What kind of encoding policy will be used. + */ + public TestHFileDataBlockEncoder(HFileDataBlockEncoderImpl blockEncoder, + boolean includesMemstoreTS) { + this.blockEncoder = blockEncoder; + this.includesMemstoreTS = includesMemstoreTS; + System.err.println("On-disk encoding: " + blockEncoder.getEncodingOnDisk() + + ", in-cache encoding: " + blockEncoder.getEncodingInCache() + + ", includesMemstoreTS: " + includesMemstoreTS); + } + + /** + * Preparation before JUnit test. + */ + @Before + public void setUp() { + conf = TEST_UTIL.getConfiguration(); + SchemaMetrics.configureGlobally(conf); + } + + /** + * Cleanup after JUnit test. + */ + @After + public void tearDown() throws IOException { + TEST_UTIL.cleanupTestDir(); + } + + /** + * Test putting and taking out blocks into cache with different + * encoding options. + */ + @Test + public void testEncodingWithCache() { + HFileBlock block = getSampleHFileBlock(); + LruBlockCache blockCache = + new LruBlockCache(8 * 1024 * 1024, 32 * 1024); + HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(block, false); + BlockCacheKey cacheKey = new BlockCacheKey("test", 0); + blockCache.cacheBlock(cacheKey, cacheBlock); + + HeapSize heapSize = blockCache.getBlock(cacheKey, false); + assertTrue(heapSize instanceof HFileBlock); + + HFileBlock returnedBlock = (HFileBlock) heapSize;; + + if (blockEncoder.getEncodingInCache() == + DataBlockEncoding.NONE) { + assertEquals(block.getBufferWithHeader(), + returnedBlock.getBufferWithHeader()); + } else { + if (BlockType.ENCODED_DATA != returnedBlock.getBlockType()) { + System.out.println(blockEncoder); + } + assertEquals(BlockType.ENCODED_DATA, returnedBlock.getBlockType()); + } + } + + /** + * Test writing to disk. + */ + @Test + public void testEncodingWritePath() { + // usually we have just block without headers, but don't complicate that + HFileBlock block = getSampleHFileBlock(); + Pair result = + blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), + includesMemstoreTS); + + int size = result.getFirst().limit() - HFileBlock.HEADER_SIZE; + HFileBlock blockOnDisk = new HFileBlock(result.getSecond(), + size, size, -1, result.getFirst(), HFileBlock.FILL_HEADER, 0, + includesMemstoreTS); + + if (blockEncoder.getEncodingOnDisk() != + DataBlockEncoding.NONE) { + assertEquals(BlockType.ENCODED_DATA, blockOnDisk.getBlockType()); + assertEquals(blockEncoder.getEncodingOnDisk().getId(), + blockOnDisk.getDataBlockEncodingId()); + } else { + assertEquals(BlockType.DATA, blockOnDisk.getBlockType()); + } + } + + /** + * Test converting blocks from disk to cache format. + */ + @Test + public void testEncodingReadPath() { + HFileBlock origBlock = getSampleHFileBlock(); + blockEncoder.diskToCacheFormat(origBlock, false); + } + + private HFileBlock getSampleHFileBlock() { + ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer( + generator.generateTestKeyValues(60), includesMemstoreTS); + int size = keyValues.limit(); + ByteBuffer buf = ByteBuffer.allocate(size + HFileBlock.HEADER_SIZE); + buf.position(HFileBlock.HEADER_SIZE); + keyValues.rewind(); + buf.put(keyValues); + HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf, + HFileBlock.FILL_HEADER, 0, includesMemstoreTS); + UNKNOWN_TABLE_AND_CF.passSchemaMetricsTo(b); + return b; + } + + /** + * @return All possible data block encoding configurations + */ + @Parameters + public static Collection getAllConfigurations() { + List configurations = + new ArrayList(); + + for (DataBlockEncoding diskAlgo : DataBlockEncoding.values()) { + for (DataBlockEncoding cacheAlgo : DataBlockEncoding.values()) { + if (diskAlgo != cacheAlgo && diskAlgo != DataBlockEncoding.NONE) { + // We allow (1) the same encoding on disk and in cache, and + // (2) some encoding in cache but no encoding on disk (for testing). + continue; + } + for (boolean includesMemstoreTS : new boolean[] {false, true}) { + configurations.add(new Object[] { + new HFileDataBlockEncoderImpl(diskAlgo, cacheAlgo), + new Boolean(includesMemstoreTS)}); + } + } + } + + return configurations; + } +} Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java Fri Feb 17 01:56:33 2012 @@ -298,7 +298,7 @@ public class TestHFilePerformance extend FSDataInputStream fin = fs.open(path); if ("HFile".equals(fileType)){ - HFile.Reader reader = HFile.createReader(path, fs.open(path), + HFile.Reader reader = HFile.createReaderFromStream(path, fs.open(path), fs.getFileStatus(path).getLen(), new CacheConfig(conf)); reader.loadFileInfo(); switch (method) { @@ -309,7 +309,7 @@ public class TestHFilePerformance extend { HFileScanner scanner = reader.getScanner(false, false); scanner.seekTo(); - for (long l=0 ; l comparator = trailer.createComparator(); - HFileBlockIndex.BlockIndexReader dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator, - trailer.getNumDataIndexLevels()); - HFileBlockIndex.BlockIndexReader metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader( - Bytes.BYTES_RAWCOMPARATOR, 1); + HFileBlockIndex.BlockIndexReader dataBlockIndexReader = + new HFileBlockIndex.BlockIndexReader(comparator, + trailer.getNumDataIndexLevels()); + HFileBlockIndex.BlockIndexReader metaBlockIndexReader = + new HFileBlockIndex.BlockIndexReader( + Bytes.BYTES_RAWCOMPARATOR, 1); HFileBlock.BlockIterator blockIter = blockReader.blockRange( trailer.getLoadOnOpenDataOffset(), @@ -143,8 +146,10 @@ public class TestHFileWriterV2 { // File info FileInfo fileInfo = new FileInfo(); fileInfo.readFields(blockIter.nextBlockAsStream(BlockType.FILE_INFO)); - byte [] keyValueFormatVersion = fileInfo.get(HFileWriterV2.KEY_VALUE_VERSION); - boolean includeMemstoreTS = (keyValueFormatVersion != null && Bytes.toInt(keyValueFormatVersion) > 0); + byte [] keyValueFormatVersion = fileInfo.get( + HFileWriterV2.KEY_VALUE_VERSION); + boolean includeMemstoreTS = keyValueFormatVersion != null && + Bytes.toInt(keyValueFormatVersion) > 0; // Counters for the number of key/value pairs and the number of blocks int entriesRead = 0; Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java Fri Feb 17 01:56:33 2012 @@ -56,10 +56,12 @@ import org.apache.hadoop.hbase.client.Ro import org.apache.hadoop.hbase.client.RowMutation; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileScanner; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -857,7 +859,7 @@ public class TestHFileOutputFormat { // configured bloom type. Path dataFilePath = fileSystem.listStatus(f.getPath())[0].getPath(); StoreFile.Reader reader = new StoreFile.Reader(fileSystem, - dataFilePath, new CacheConfig(conf)); + dataFilePath, new CacheConfig(conf), null); Map metadataMap = reader.loadFileInfo(); assertTrue("timeRange is not set", Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java?rev=1245291&r1=1245290&r2=1245291&view=diff ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java (original) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java Fri Feb 17 01:56:33 2012 @@ -182,8 +182,8 @@ public class CreateRandomStoreFile { } StoreFile.Writer sfw = StoreFile.createWriter(fs, outputDir, blockSize, - compr, KeyValue.COMPARATOR, conf, new CacheConfig(conf), bloomType, - numKV); + compr, KeyValue.COMPARATOR, conf, new CacheConfig(conf), + bloomType, numKV); rand = new Random(); LOG.info("Writing " + numKV + " key/value pairs"); Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java?rev=1245291&view=auto ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java (added) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java Fri Feb 17 01:56:33 2012 @@ -0,0 +1,601 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.apache.commons.cli.PosixParser; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.Compression.Algorithm; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.compress.Compressor; +import org.apache.hadoop.io.compress.Decompressor; + +/** + * Tests various algorithms for key compression on an existing HFile. Useful + * for testing, debugging and benchmarking. + */ +public class DataBlockEncodingTool { + private static final Log LOG = LogFactory.getLog( + DataBlockEncodingTool.class); + + private static final boolean includesMemstoreTS = true; + + /** + * How many times should benchmark run. + * More times means better data in terms of statistics. + * It has to be larger than BENCHMARK_N_OMIT. + */ + public static int BENCHMARK_N_TIMES = 12; + + /** + * How many first runs should omit benchmark. + * Usually it is one in order to exclude setup cost. + * Has to be 0 or larger. + */ + public static int BENCHMARK_N_OMIT = 2; + + /** Compression algorithm to use if not specified on the command line */ + private static final Algorithm DEFAULT_COMPRESSION = + Compression.Algorithm.GZ; + + private List codecs = new ArrayList(); + private int totalPrefixLength = 0; + private int totalKeyLength = 0; + private int totalValueLength = 0; + private int totalKeyRedundancyLength = 0; + + final private String compressionAlgorithmName; + final private Algorithm compressionAlgorithm; + final private Compressor compressor; + final private Decompressor decompressor; + + /** + * @param compressionAlgorithmName What kind of algorithm should be used + * as baseline for comparison (e.g. lzo, gz). + */ + public DataBlockEncodingTool(String compressionAlgorithmName) { + this.compressionAlgorithmName = compressionAlgorithmName; + this.compressionAlgorithm = Compression.getCompressionAlgorithmByName( + compressionAlgorithmName); + this.compressor = this.compressionAlgorithm.getCompressor(); + this.decompressor = this.compressionAlgorithm.getDecompressor(); + } + + /** + * Check statistics for given HFile for different data block encoders. + * @param scanner Of file which will be compressed. + * @param kvLimit Maximal count of KeyValue which will be processed. + * @throws IOException thrown if scanner is invalid + */ + public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) + throws IOException { + scanner.seek(KeyValue.LOWESTKEY); + + KeyValue currentKv; + + byte[] previousKey = null; + byte[] currentKey; + + List dataBlockEncoders = + DataBlockEncoding.getAllEncoders(); + + for (DataBlockEncoder d : dataBlockEncoders) { + codecs.add(new EncodedDataBlock(d, includesMemstoreTS)); + } + + int j = 0; + while ((currentKv = scanner.next()) != null && j < kvLimit) { + // Iterates through key/value pairs + j++; + currentKey = currentKv.getKey(); + if (previousKey != null) { + for (int i = 0; i < previousKey.length && i < currentKey.length && + previousKey[i] == currentKey[i]; ++i) { + totalKeyRedundancyLength++; + } + } + + for (EncodedDataBlock codec : codecs) { + codec.addKv(currentKv); + } + + previousKey = currentKey; + + totalPrefixLength += currentKv.getLength() - currentKv.getKeyLength() - + currentKv.getValueLength(); + totalKeyLength += currentKv.getKeyLength(); + totalValueLength += currentKv.getValueLength(); + } + } + + /** + * Verify if all data block encoders are working properly. + * + * @param scanner Of file which was compressed. + * @param kvLimit Maximal count of KeyValue which will be processed. + * @return true if all data block encoders compressed/decompressed correctly. + * @throws IOException thrown if scanner is invalid + */ + public boolean verifyCodecs(final KeyValueScanner scanner, final int kvLimit) + throws IOException { + KeyValue currentKv; + + scanner.seek(KeyValue.LOWESTKEY); + List> codecIterators = + new ArrayList>(); + for(EncodedDataBlock codec : codecs) { + codecIterators.add(codec.getIterator()); + } + + int j = 0; + while ((currentKv = scanner.next()) != null && j < kvLimit) { + // Iterates through key/value pairs + ++j; + for (Iterator it : codecIterators) { + KeyValue codecKv = it.next(); + if (codecKv == null || 0 != Bytes.compareTo( + codecKv.getBuffer(), codecKv.getOffset(), codecKv.getLength(), + currentKv.getBuffer(), currentKv.getOffset(), + currentKv.getLength())) { + if (codecKv == null) { + LOG.error("There is a bug in codec " + it + + " it returned null KeyValue,"); + } else { + int prefix = 0; + int limitLength = 2 * Bytes.SIZEOF_INT + + Math.min(codecKv.getLength(), currentKv.getLength()); + while (prefix < limitLength && + codecKv.getBuffer()[prefix + codecKv.getOffset()] == + currentKv.getBuffer()[prefix + currentKv.getOffset()]) { + prefix++; + } + + LOG.error("There is bug in codec " + it.toString() + + "\n on element " + j + + "\n codecKv.getKeyLength() " + codecKv.getKeyLength() + + "\n codecKv.getValueLength() " + codecKv.getValueLength() + + "\n codecKv.getLength() " + codecKv.getLength() + + "\n currentKv.getKeyLength() " + currentKv.getKeyLength() + + "\n currentKv.getValueLength() " + currentKv.getValueLength() + + "\n codecKv.getLength() " + currentKv.getLength() + + "\n currentKV rowLength " + currentKv.getRowLength() + + " familyName " + currentKv.getFamilyLength() + + " qualifier " + currentKv.getQualifierLength() + + "\n prefix " + prefix + + "\n codecKv '" + Bytes.toStringBinary(codecKv.getBuffer(), + codecKv.getOffset(), prefix) + "' diff '" + + Bytes.toStringBinary(codecKv.getBuffer(), + codecKv.getOffset() + prefix, codecKv.getLength() - + prefix) + "'" + + "\n currentKv '" + Bytes.toStringBinary( + currentKv.getBuffer(), + currentKv.getOffset(), prefix) + "' diff '" + + Bytes.toStringBinary(currentKv.getBuffer(), + currentKv.getOffset() + prefix, currentKv.getLength() - + prefix) + "'" + ); + } + return false; + } + } + } + + LOG.info("Verification was successful!"); + + return true; + } + + /** + * Benchmark codec's speed. + */ + public void benchmarkCodecs() { + int prevTotalSize = -1; + for (EncodedDataBlock codec : codecs) { + prevTotalSize = benchmarkEncoder(prevTotalSize, codec); + } + + byte[] buffer = codecs.get(0).getRawKeyValues(); + + benchmarkDefaultCompression(prevTotalSize, buffer); + } + + /** + * Benchmark compression/decompression throughput. + * @param previousTotalSize Total size used for verification. Use -1 if + * unknown. + * @param codec Tested encoder. + * @return Size of uncompressed data. + */ + private int benchmarkEncoder(int previousTotalSize, EncodedDataBlock codec) { + int prevTotalSize = previousTotalSize; + int totalSize = 0; + + // decompression time + List durations = new ArrayList(); + for (int itTime = 0; itTime < BENCHMARK_N_TIMES; ++itTime) { + totalSize = 0; + + Iterator it; + + it = codec.getIterator(); + + // count only the algorithm time, without memory allocations + // (expect first time) + final long startTime = System.nanoTime(); + while (it.hasNext()) { + totalSize += it.next().getLength(); + } + final long finishTime = System.nanoTime(); + if (itTime >= BENCHMARK_N_OMIT) { + durations.add(finishTime - startTime); + } + + if (prevTotalSize != -1 && prevTotalSize != totalSize) { + throw new IllegalStateException(String.format( + "Algorithm '%s' decoded data to different size", codec.toString())); + } + prevTotalSize = totalSize; + } + + // compression time + List compressDurations = new ArrayList(); + for (int itTime = 0; itTime < BENCHMARK_N_TIMES; ++itTime) { + final long startTime = System.nanoTime(); + codec.doCompressData(); + final long finishTime = System.nanoTime(); + if (itTime >= BENCHMARK_N_OMIT) { + compressDurations.add(finishTime - startTime); + } + } + + System.out.println(codec.toString() + ":"); + printBenchmarkResult(totalSize, compressDurations, false); + printBenchmarkResult(totalSize, durations, true); + + return prevTotalSize; + } + + private void benchmarkDefaultCompression(int totalSize, byte[] rawBuffer) { + benchmarkAlgorithm(compressionAlgorithm, compressor, decompressor, + compressionAlgorithmName.toUpperCase(), rawBuffer, 0, totalSize); + } + + /** + * Check decompress performance of a given algorithm and print it. + * @param algorithm Compression algorithm. + * @param compressorCodec Compressor to be tested. + * @param decompressorCodec Decompressor of the same algorithm. + * @param name Name of algorithm. + * @param buffer Buffer to be compressed. + * @param offset Position of the beginning of the data. + * @param length Length of data in buffer. + */ + public static void benchmarkAlgorithm( + Compression.Algorithm algorithm, + Compressor compressorCodec, + Decompressor decompressorCodec, + String name, + byte[] buffer, int offset, int length) { + System.out.println(name + ":"); + + // compress it + List compressDurations = new ArrayList(); + ByteArrayOutputStream compressedStream = new ByteArrayOutputStream(); + OutputStream compressingStream; + try { + for (int itTime = 0; itTime < BENCHMARK_N_TIMES; ++itTime) { + final long startTime = System.nanoTime(); + compressingStream = algorithm.createCompressionStream( + compressedStream, compressorCodec, 0); + compressingStream.write(buffer, offset, length); + compressingStream.flush(); + compressedStream.toByteArray(); + + final long finishTime = System.nanoTime(); + + // add time record + if (itTime >= BENCHMARK_N_OMIT) { + compressDurations.add(finishTime - startTime); + } + + if (itTime + 1 < BENCHMARK_N_TIMES) { // not the last one + compressedStream.reset(); + } + } + } catch (IOException e) { + throw new RuntimeException(String.format( + "Benchmark, or encoding algorithm '%s' cause some stream problems", + name), e); + } + printBenchmarkResult(length, compressDurations, false); + + + byte[] compBuffer = compressedStream.toByteArray(); + + // uncompress it several times and measure performance + List durations = new ArrayList(); + for (int itTime = 0; itTime < BENCHMARK_N_TIMES; ++itTime) { + final long startTime = System.nanoTime(); + byte[] newBuf = new byte[length + 1]; + + try { + + ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer, + 0, compBuffer.length); + InputStream decompressedStream = algorithm.createDecompressionStream( + downStream, decompressorCodec, 0); + + int destOffset = 0; + int nextChunk; + while ((nextChunk = decompressedStream.available()) > 0) { + destOffset += decompressedStream.read(newBuf, destOffset, nextChunk); + } + decompressedStream.close(); + + // iterate over KeyValue + KeyValue kv; + for (int pos = 0; pos < length; pos += kv.getLength()) { + kv = new KeyValue(newBuf, pos); + } + + } catch (IOException e) { + throw new RuntimeException(String.format( + "Decoding path in '%s' algorithm cause exception ", name), e); + } + + final long finishTime = System.nanoTime(); + + // check correctness + if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) { + int prefix = 0; + for(; prefix < buffer.length && prefix < newBuf.length; ++prefix) { + if (buffer[prefix] != newBuf[prefix]) { + break; + } + } + throw new RuntimeException(String.format( + "Algorithm '%s' is corrupting the data", name)); + } + + // add time record + if (itTime >= BENCHMARK_N_OMIT) { + durations.add(finishTime - startTime); + } + } + printBenchmarkResult(length, durations, true); + } + + private static void printBenchmarkResult(int totalSize, + List durationsInNanoSed, boolean isDecompression) { + long meanTime = 0; + for (long time : durationsInNanoSed) { + meanTime += time; + } + meanTime /= durationsInNanoSed.size(); + + long standardDev = 0; + for (long time : durationsInNanoSed) { + standardDev += (time - meanTime) * (time - meanTime); + } + standardDev = (long) Math.sqrt(standardDev / durationsInNanoSed.size()); + + final double million = 1000.0 * 1000.0 * 1000.0; + double mbPerSec = (totalSize * million) / (1024.0 * 1024.0 * meanTime); + double mbPerSecDev = (totalSize * million) / + (1024.0 * 1024.0 * (meanTime - standardDev)); + + System.out.println(String.format( + " %s performance:%s %6.2f MB/s (+/- %.2f MB/s)", + isDecompression ? "Decompression" : "Compression", + isDecompression ? "" : " ", + mbPerSec, mbPerSecDev - mbPerSec)); + } + + /** + * Display statistics of different compression algorithms. + */ + public void displayStatistics() { + int totalLength = totalPrefixLength + totalKeyLength + totalValueLength; + if (compressor != null) { // might be null e.g. for pure-Java GZIP + compressor.reset(); + } + + for(EncodedDataBlock codec : codecs) { + System.out.println(codec.toString()); + int saved = totalKeyLength + totalPrefixLength + totalValueLength + - codec.getSize(); + System.out.println( + String.format(" Saved bytes: %8d", saved)); + double keyRatio = (saved * 100.0) / (totalPrefixLength + totalKeyLength); + double allRatio = (saved * 100.0) / totalLength; + System.out.println( + String.format(" Key compression ratio: %.2f %%", keyRatio)); + System.out.println( + String.format(" All compression ratio: %.2f %%", allRatio)); + + String compressedSizeCaption = + String.format(" %s compressed size: ", + compressionAlgorithmName.toUpperCase()); + String compressOnlyRatioCaption = + String.format(" %s compression ratio: ", + compressionAlgorithmName.toUpperCase()); + + if (compressor != null) { + int compressedSize = codec.checkCompressedSize(compressor); + System.out.println(compressedSizeCaption + + String.format("%8d", compressedSize)); + double compressOnlyRatio = + 100.0 * (1.0 - compressedSize / (0.0 + totalLength)); + System.out.println(compressOnlyRatioCaption + + String.format("%.2f %%", compressOnlyRatio)); + } else { + System.out.println(compressedSizeCaption + "N/A"); + System.out.println(compressOnlyRatioCaption + "N/A"); + } + } + + System.out.println( + String.format("Total KV prefix length: %8d", totalPrefixLength)); + System.out.println( + String.format("Total key length: %8d", totalKeyLength)); + System.out.println( + String.format("Total key redundancy: %8d", + totalKeyRedundancyLength)); + System.out.println( + String.format("Total value length: %8d", totalValueLength)); + } + + /** + * Test a data block encoder on the given HFile. Output results to console. + * @param kvLimit The limit of KeyValue which will be analyzed. + * @param hfilePath an HFile path on the file system. + * @param compressionName Compression algorithm used for comparison. + * @param doBenchmark Run performance benchmarks. + * @param doVerify Verify correctness. + * @throws IOException When pathName is incorrect. + */ + public static void testCodecs(Configuration conf, int kvLimit, + String hfilePath, String compressionName, boolean doBenchmark, + boolean doVerify) throws IOException { + // create environment + Path path = new Path(hfilePath); + CacheConfig cacheConf = new CacheConfig(conf); + FileSystem fs = FileSystem.get(conf); + StoreFile hsf = new StoreFile(fs, path, conf, cacheConf, + StoreFile.BloomType.NONE, NoOpDataBlockEncoder.INSTANCE); + + StoreFile.Reader reader = hsf.createReader(); + reader.loadFileInfo(); + KeyValueScanner scanner = reader.getStoreFileScanner(true, true); + + // run the utilities + DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); + comp.checkStatistics(scanner, kvLimit); + if (doVerify) { + comp.verifyCodecs(scanner, kvLimit); + } + if (doBenchmark) { + comp.benchmarkCodecs(); + } + comp.displayStatistics(); + + // cleanup + scanner.close(); + reader.close(cacheConf.shouldEvictOnClose()); + } + + private static void printUsage(Options options) { + System.err.println("Usage:"); + System.err.println(String.format("./hbase %s ", + DataBlockEncodingTool.class.getName())); + System.err.println("Options:"); + for (Object it : options.getOptions()) { + Option opt = (Option) it; + if (opt.hasArg()) { + System.err.println(String.format("-%s %s: %s", opt.getOpt(), + opt.getArgName(), opt.getDescription())); + } else { + System.err.println(String.format("-%s: %s", opt.getOpt(), + opt.getDescription())); + } + } + } + + /** + * A command line interface to benchmarks. + * @param args Should have length at least 1 and holds the file path to HFile. + * @throws IOException If you specified the wrong file. + */ + public static void main(final String[] args) throws IOException { + // set up user arguments + Options options = new Options(); + options.addOption("f", true, "HFile to analyse (REQUIRED)"); + options.getOption("f").setArgName("FILENAME"); + options.addOption("n", true, + "Limit number of KeyValue which will be analysed"); + options.getOption("n").setArgName("NUMBER"); + options.addOption("b", false, "Measure read throughput"); + options.addOption("c", false, "Omit corectness tests."); + options.addOption("a", true, + "What kind of compression algorithm use for comparison."); + + // parse arguments + CommandLineParser parser = new PosixParser(); + CommandLine cmd = null; + try { + cmd = parser.parse(options, args); + } catch (ParseException e) { + System.err.println("Could not parse arguments!"); + System.exit(-1); + return; // avoid warning + } + + int kvLimit = Integer.MAX_VALUE; + if (cmd.hasOption("n")) { + kvLimit = Integer.parseInt(cmd.getOptionValue("n")); + } + + // basic argument sanity checks + if (!cmd.hasOption("f")) { + System.err.println("ERROR: Filename is required!"); + printUsage(options); + System.exit(-1); + } + + String pathName = cmd.getOptionValue("f"); + String compressionName = DEFAULT_COMPRESSION.getName(); + if (cmd.hasOption("a")) { + compressionName = cmd.getOptionValue("a").toLowerCase(); + } + boolean doBenchmark = cmd.hasOption("b"); + boolean doVerify = !cmd.hasOption("c"); + + final Configuration conf = HBaseConfiguration.create(); + try { + testCodecs(conf, kvLimit, pathName, compressionName, doBenchmark, + doVerify); + } finally { + (new CacheConfig(conf)).getBlockCache().shutdown(); + } + } + +} Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java?rev=1245291&view=auto ============================================================================== --- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java (added) +++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java Fri Feb 17 01:56:33 2012 @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; +import org.apache.hadoop.hbase.io.hfile.LruBlockCache; +import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; + +/** + * Test seek performance for encoded data blocks. Read an HFile and do several + * random seeks. + */ +public class EncodedSeekPerformanceTest { + private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0; + private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0; + /** Default number of seeks which will be used in benchmark. */ + public static int DEFAULT_NUMBER_OF_SEEKS = 10000; + + private final HBaseTestingUtility testingUtility = new HBaseTestingUtility(); + private Configuration configuration = testingUtility.getConfiguration(); + private CacheConfig cacheConf = new CacheConfig(configuration); + private Random randomizer; + private int numberOfSeeks; + + /** Use this benchmark with default options */ + public EncodedSeekPerformanceTest() { + configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f); + randomizer = new Random(42l); + numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS; + } + + private List prepareListOfTestSeeks(Path path) throws IOException { + List allKeyValues = new ArrayList(); + + // read all of the key values + StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(), + path, configuration, cacheConf, BloomType.NONE, + NoOpDataBlockEncoder.INSTANCE); + + StoreFile.Reader reader = storeFile.createReader(); + StoreFileScanner scanner = reader.getStoreFileScanner(true, false); + KeyValue current; + + scanner.seek(KeyValue.LOWESTKEY); + while (null != (current = scanner.next())) { + allKeyValues.add(current); + } + + storeFile.closeReader(cacheConf.shouldEvictOnClose()); + + // pick seeks by random + List seeks = new ArrayList(); + for (int i = 0; i < numberOfSeeks; ++i) { + KeyValue keyValue = allKeyValues.get( + randomizer.nextInt(allKeyValues.size())); + seeks.add(keyValue); + } + + clearBlockCache(); + + return seeks; + } + + private void runTest(Path path, HFileDataBlockEncoder blockEncoder, + List seeks) throws IOException { + // read all of the key values + StoreFile storeFile = new StoreFile(testingUtility.getTestFileSystem(), + path, configuration, cacheConf, BloomType.NONE, blockEncoder); + + long totalSize = 0; + + StoreFile.Reader reader = storeFile.createReader(); + StoreFileScanner scanner = reader.getStoreFileScanner(true, false); + + long startReadingTime = System.nanoTime(); + KeyValue current; + scanner.seek(KeyValue.LOWESTKEY); + while (null != (current = scanner.next())) { // just iterate it! + if (current.getLength() < 0) { + throw new IOException("Negative KV size: " + current); + } + totalSize += current.getLength(); + } + long finishReadingTime = System.nanoTime(); + + // do seeks + long startSeeksTime = System.nanoTime(); + for (KeyValue keyValue : seeks) { + scanner.seek(keyValue); + KeyValue toVerify = scanner.next(); + if (!keyValue.equals(toVerify)) { + System.out.println(String.format("KeyValue doesn't match:\n" + + "Orig key: %s\n" + + "Ret key: %s", keyValue.getKeyString(), toVerify.getKeyString())); + break; + } + } + long finishSeeksTime = System.nanoTime(); + if (finishSeeksTime < startSeeksTime) { + throw new AssertionError("Finish time " + finishSeeksTime + + " is earlier than start time " + startSeeksTime); + } + + // write some stats + double readInMbPerSec = (totalSize * NANOSEC_IN_SEC) / + (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime)); + double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) / + (finishSeeksTime - startSeeksTime); + + storeFile.closeReader(cacheConf.shouldEvictOnClose()); + clearBlockCache(); + + System.out.println(blockEncoder); + System.out.printf(" Read speed: %8.2f (MB/s)\n", readInMbPerSec); + System.out.printf(" Seeks per second: %8.2f (#/s)\n", seeksPerSec); + System.out.printf(" Total KV size: %d\n", totalSize); + } + + /** + * @param path Path to the HFile which will be used. + * @param encoders List of encoders which will be used for tests. + * @throws IOException if there is a bug while reading from disk + */ + public void runTests(Path path, List encoders) + throws IOException { + List seeks = prepareListOfTestSeeks(path); + + for (HFileDataBlockEncoder blockEncoder : encoders) { + runTest(path, blockEncoder, seeks); + } + } + + /** + * Command line interface: + * @param args Takes one argument - file size. + * @throws IOException if there is a bug while reading from disk + */ + public static void main(final String[] args) throws IOException { + if (args.length < 1) { + printUsage(); + System.exit(-1); + } + + Path path = new Path(args[0]); + List encoders = + new ArrayList(); + + encoders.add(new HFileDataBlockEncoderImpl(DataBlockEncoding.NONE)); + for (DataBlockEncoding encodingAlgo : DataBlockEncoding.values()) { + encoders.add(new HFileDataBlockEncoderImpl(DataBlockEncoding.NONE, + encodingAlgo)); + } + + EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest(); + utility.runTests(path, encoders); + + System.exit(0); + } + + private static void printUsage() { + System.out.println("Usage: one argument, name of the HFile"); + } + + private void clearBlockCache() { + ((LruBlockCache) cacheConf.getBlockCache()).clearCache(); + } +}