Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BBC0118048 for ; Mon, 26 Oct 2015 23:44:58 +0000 (UTC) Received: (qmail 52397 invoked by uid 500); 26 Oct 2015 23:44:58 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 52342 invoked by uid 500); 26 Oct 2015 23:44:58 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 52309 invoked by uid 99); 26 Oct 2015 23:44:58 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 26 Oct 2015 23:44:58 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 67DAEE095C; Mon, 26 Oct 2015 23:44:58 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit From: apurtell@apache.org To: commits@hbase.apache.org Date: Mon, 26 Oct 2015 23:45:01 -0000 Message-Id: <565c73c1be854aba958719dfae8e1c43@git.apache.org> In-Reply-To: <60f46d3484fa4c428c5285855ba19a4e@git.apache.org> References: <60f46d3484fa4c428c5285855ba19a4e@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: =?utf-8?q?=5B4/6=5D_hbase_git_commit=3A_HBASE-14283_Reverse_scan_d?= =?utf-8?q?oesn=E2=80=99t_work_with_HFile_inline_index/bloom_blocks?= HBASE-14283 Reverse scan doesn’t work with HFile inline index/bloom blocks Signed-off-by: Andrew Purtell Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/0db04a17 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/0db04a17 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/0db04a17 Branch: refs/heads/branch-1.1 Commit: 0db04a1705e5e8cc04cc9c010ddfc5612f60cfec Parents: b756058 Author: Ben Lau Authored: Wed Oct 7 17:52:34 2015 -0700 Committer: Andrew Purtell Committed: Mon Oct 26 15:13:48 2015 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/hbase/CellUtil.java | 11 ++ .../hadoop/hbase/io/hfile/HFileReaderV2.java | 7 +- .../hfile/TestSeekBeforeWithInlineBlocks.java | 187 +++++++++++++++++++ 3 files changed, 203 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/0db04a17/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java index b0eece8..a89d573 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java @@ -905,4 +905,15 @@ public final class CellUtil { return builder.toString(); } + + /**************** equals ****************************/ + + public static boolean equals(Cell a, Cell b) { + return matchingRow(a, b) && matchingFamily(a, b) && matchingQualifier(a, b) + && matchingTimestamp(a, b) && matchingType(a, b); + } + + public static boolean matchingType(Cell a, Cell b) { + return a.getTypeByte() == b.getTypeByte(); + } } http://git-wip-us.apache.org/repos/asf/hbase/blob/0db04a17/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index 26648c9..c3f864b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -669,9 +669,12 @@ public class HFileReaderV2 extends AbstractHFileReader { // It is important that we compute and pass onDiskSize to the block // reader so that it does not have to read the header separately to - // figure out the size. + // figure out the size. Currently, we do not have a way to do this + // correctly in the general case however. + // TODO: See https://issues.apache.org/jira/browse/HBASE-14576 + int prevBlockSize = -1; seekToBlock = reader.readBlock(previousBlockOffset, - seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, + prevBlockSize, cacheBlocks, pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); // TODO shortcut: seek forward in this block to the last key of the // block. http://git-wip-us.apache.org/repos/asf/hbase/blob/0db04a17/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBeforeWithInlineBlocks.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBeforeWithInlineBlocks.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBeforeWithInlineBlocks.java new file mode 100644 index 0000000..ac92f4f --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekBeforeWithInlineBlocks.java @@ -0,0 +1,187 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.regionserver.BloomType; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.BloomFilterFactory; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({MediumTests.class}) +public class TestSeekBeforeWithInlineBlocks { + + private static final Log LOG = LogFactory.getLog(TestSeekBeforeWithInlineBlocks.class); + + private static final HBaseTestingUtility TEST_UTIL = + new HBaseTestingUtility(); + + private static final int NUM_KV = 10000; + + private static final int DATA_BLOCK_SIZE = 4096; + private static final int BLOOM_BLOCK_SIZE = 1024; + private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 }; + private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 }; + + private static final Random RAND = new Random(192537); + private static final byte[] FAM = Bytes.toBytes("family"); + + private FileSystem fs; + private Configuration conf; + + /** + * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs + * to know the size of that data block, which it calculates using current data block offset and + * the previous data block offset. This fails to work when there are leaf-level index blocks in + * the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore() + * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This + * bug also happens for inline Bloom blocks for the same reasons. + */ + @Test + public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException { + conf = TEST_UTIL.getConfiguration(); + + // Try out different HFile versions to ensure reverse scan works on each version + for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; + hfileVersion <= HFile.MAX_FORMAT_VERSION; hfileVersion++) { + + conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion); + fs = HFileSystem.get(conf); + + // Try out different bloom types because inline Bloom blocks break seekBefore() + for (BloomType bloomType : BloomType.values()) { + + // Test out HFile block indices of various sizes/levels + for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) { + int indexBlockSize = INDEX_CHUNK_SIZES[testI]; + int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; + + LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s", + hfileVersion, bloomType, expectedNumLevels)); + + conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); + conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); + + Cell[] cells = new Cell[NUM_KV]; + + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), + String.format("testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", + hfileVersion, bloomType, testI)); + + // Disable caching to prevent it from hiding any bugs in block seeks/reads + conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); + CacheConfig cacheConf = new CacheConfig(conf); + + // Write the HFile + { + HFileContext meta = new HFileContextBuilder() + .withBlockSize(DATA_BLOCK_SIZE) + .build(); + + StoreFile.Writer storeFileWriter = + new StoreFile.WriterBuilder(conf, cacheConf, fs) + .withFilePath(hfilePath) + .withFileContext(meta) + .withBloomType(bloomType) + .build(); + + for (int i = 0; i < NUM_KV; i++) { + byte[] row = TestHFileWriterV2.randomOrderedKey(RAND, i); + byte[] qual = TestHFileWriterV2.randomRowOrQualifier(RAND); + byte[] value = TestHFileWriterV2.randomValue(RAND); + KeyValue kv = new KeyValue(row, FAM, qual, value); + + storeFileWriter.append(kv); + cells[i] = kv; + } + + storeFileWriter.close(); + } + + // Read the HFile + HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, conf); + + // Sanity check the HFile index level + assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); + + // Check that we can seekBefore in either direction and with both pread + // enabled and disabled + for (boolean pread : new boolean[] { false, true }) { + HFileScanner scanner = reader.getScanner(true, pread); + checkNoSeekBefore(cells, scanner, 0); + for (int i = 1; i < NUM_KV; i++) { + checkSeekBefore(cells, scanner, i); + checkCell(cells[i-1], scanner.getKeyValue()); + } + assertTrue(scanner.seekTo()); + for (int i = NUM_KV - 1; i >= 1; i--) { + checkSeekBefore(cells, scanner, i); + checkCell(cells[i-1], scanner.getKeyValue()); + } + checkNoSeekBefore(cells, scanner, 0); + } + + reader.close(); + } + } + } + } + + private void checkSeekBefore(Cell[] cells, HFileScanner scanner, int i) + throws IOException { + assertEquals("Failed to seek to the key before #" + i + " (" + + CellUtil.getCellKeyAsString(cells[i]) + ")", true, + scanner.seekBefore(cells[i])); + } + + private void checkNoSeekBefore(Cell[] cells, HFileScanner scanner, int i) + throws IOException { + assertEquals("Incorrectly succeeded in seeking to before first key (" + + CellUtil.getCellKeyAsString(cells[i]) + ")", false, + scanner.seekBefore(cells[i])); + } + + /** Check a key/value pair after it was read by the reader */ + private void checkCell(Cell expected, Cell actual) { + assertTrue(String.format("Expected key %s, but was %s", + CellUtil.getCellKeyAsString(expected), CellUtil.getCellKeyAsString(actual)), + CellUtil.equals(expected, actual)); + } +} +