hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From apurt...@apache.org
Subject svn commit: r1344105 - in /hbase/trunk/hbase-server/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/io/hfile/ test/java/org/apache/hadoop/hbase/regionserver/
Date Wed, 30 May 2012 05:26:15 GMT
Author: apurtell
Date: Wed May 30 05:26:15 2012
New Revision: 1344105

URL: http://svn.apache.org/viewvc?rev=1344105&view=rev
Log:
HBASE-6114. CacheControl flags should be tunable per table schema per CF

Added:
    hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java
Modified:
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
    hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java?rev=1344105&r1=1344104&r2=1344105&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
Wed May 30 05:26:15 2012
@@ -77,7 +77,11 @@ public class HColumnDescriptor implement
   public static final String DATA_BLOCK_ENCODING =
       "DATA_BLOCK_ENCODING";
   public static final String BLOCKCACHE = "BLOCKCACHE";
-  
+  public static final String CACHE_DATA_ON_WRITE = "CACHE_DATA_ON_WRITE";
+  public static final String CACHE_INDEX_ON_WRITE = "CACHE_INDEX_ON_WRITE";
+  public static final String CACHE_BLOOMS_ON_WRITE = "CACHE_BLOOMS_ON_WRITE";
+  public static final String EVICT_BLOCKS_ON_CLOSE = "EVICT_BLOCKS_ON_CLOSE";
+
   /**
    * Size of storefile/hfile 'blocks'.  Default is {@link #DEFAULT_BLOCKSIZE}.
    * Use smaller block sizes for faster random-access at expense of larger
@@ -142,6 +146,18 @@ public class HColumnDescriptor implement
   public static final boolean DEFAULT_BLOCKCACHE = true;
 
   /**
+   * Default setting for whether to cache data blocks on write if block caching
+   * is enabled.
+   */
+  public static final boolean DEFAULT_CACHE_DATA_ON_WRITE = false;
+  
+  /**
+   * Default setting for whether to cache index blocks on write if block
+   * caching is enabled.
+   */
+  public static final boolean DEFAULT_CACHE_INDEX_ON_WRITE = false;
+
+  /**
    * Default size of blocks in files stored to the filesytem (hfiles).
    */
   public static final int DEFAULT_BLOCKSIZE = HFile.DEFAULT_BLOCKSIZE;
@@ -152,6 +168,12 @@ public class HColumnDescriptor implement
   public static final String DEFAULT_BLOOMFILTER = StoreFile.BloomType.NONE.toString();
 
   /**
+   * Default setting for whether to cache bloom filter blocks on write if block
+   * caching is enabled.
+   */
+  public static final boolean DEFAULT_CACHE_BLOOMS_ON_WRITE = false;
+
+  /**
    * Default time to live of cell contents.
    */
   public static final int DEFAULT_TTL = HConstants.FOREVER;
@@ -161,6 +183,12 @@ public class HColumnDescriptor implement
    */
   public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
 
+  /**
+   * Default setting for whether to evict cached blocks from the blockcache on
+   * close.
+   */
+  public static final boolean DEFAULT_EVICT_BLOCKS_ON_CLOSE = false;
+
   private final static Map<String, String> DEFAULT_VALUES
     = new HashMap<String, String>();
   private final static Set<ImmutableBytesWritable> RESERVED_KEYWORDS
@@ -178,6 +206,10 @@ public class HColumnDescriptor implement
       DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
       DEFAULT_VALUES.put(ENCODE_ON_DISK, String.valueOf(DEFAULT_ENCODE_ON_DISK));
       DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
+      DEFAULT_VALUES.put(CACHE_DATA_ON_WRITE, String.valueOf(DEFAULT_CACHE_DATA_ON_WRITE));
+      DEFAULT_VALUES.put(CACHE_INDEX_ON_WRITE, String.valueOf(DEFAULT_CACHE_INDEX_ON_WRITE));
+      DEFAULT_VALUES.put(CACHE_BLOOMS_ON_WRITE, String.valueOf(DEFAULT_CACHE_BLOOMS_ON_WRITE));
+      DEFAULT_VALUES.put(EVICT_BLOCKS_ON_CLOSE, String.valueOf(DEFAULT_EVICT_BLOCKS_ON_CLOSE));
       for (String s : DEFAULT_VALUES.keySet()) {
         RESERVED_KEYWORDS.add(new ImmutableBytesWritable(Bytes.toBytes(s)));
       }
@@ -779,6 +811,84 @@ public class HColumnDescriptor implement
   }
 
   /**
+   * @return true if we should cache data blocks on write
+   */
+  public boolean shouldCacheDataOnWrite() {
+    String value = getValue(CACHE_DATA_ON_WRITE);
+    if (value != null) {
+      return Boolean.valueOf(value).booleanValue();
+    }
+    return DEFAULT_CACHE_DATA_ON_WRITE;
+  }
+
+  /**
+   * @param value true if we should cache data blocks on write
+   * @return this (for chained invocation)
+   */
+  public HColumnDescriptor setCacheDataOnWrite(boolean value) {
+    return setValue(CACHE_DATA_ON_WRITE, Boolean.toString(value));
+  }
+
+  /**
+   * @return true if we should cache index blocks on write
+   */
+  public boolean shouldCacheIndexesOnWrite() {
+    String value = getValue(CACHE_INDEX_ON_WRITE);
+    if (value != null) {
+      return Boolean.valueOf(value).booleanValue();
+    }
+    return DEFAULT_CACHE_INDEX_ON_WRITE;
+  }
+
+  /**
+   * @param value true if we should cache index blocks on write
+   * @return this (for chained invocation)
+   */
+  public HColumnDescriptor setCacheIndexesOnWrite(boolean value) {
+    return setValue(CACHE_INDEX_ON_WRITE, Boolean.toString(value));
+  }
+
+  /**
+   * @return true if we should cache bloomfilter blocks on write
+   */
+  public boolean shouldCacheBloomsOnWrite() {
+    String value = getValue(CACHE_BLOOMS_ON_WRITE);
+    if (value != null) {
+      return Boolean.valueOf(value).booleanValue();
+    }
+    return DEFAULT_CACHE_BLOOMS_ON_WRITE;
+  }
+
+  /**
+   * @param value true if we should cache bloomfilter blocks on write
+   * @return this (for chained invocation)
+   */
+  public HColumnDescriptor setCacheBloomsOnWrite(boolean value) {
+    return setValue(CACHE_BLOOMS_ON_WRITE, Boolean.toString(value));
+  }
+
+  /**
+   * @return true if we should evict cached blocks from the blockcache on
+   * close
+   */
+  public boolean shouldEvictBlocksOnClose() {
+    String value = getValue(EVICT_BLOCKS_ON_CLOSE);
+    if (value != null) {
+      return Boolean.valueOf(value).booleanValue();
+    }
+    return DEFAULT_EVICT_BLOCKS_ON_CLOSE;
+  }
+
+  /**
+   * @param value true if we should evict cached blocks from the blockcache on
+   * close
+   * @return this (for chained invocation)
+   */
+  public HColumnDescriptor setEvictBlocksOnClose(boolean value) {
+    return setValue(EVICT_BLOCKS_ON_CLOSE, Boolean.toString(value));
+  }
+
+  /**
    * @see java.lang.Object#toString()
    */
   @Override

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java?rev=1344105&r1=1344104&r2=1344105&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java
Wed May 30 05:26:15 2012
@@ -117,13 +117,18 @@ public class CacheConfig {
    */
   public CacheConfig(Configuration conf, HColumnDescriptor family) {
     this(CacheConfig.instantiateBlockCache(conf),
-        family.isBlockCacheEnabled(), family.isInMemory(),
-        conf.getBoolean(CACHE_BLOCKS_ON_WRITE_KEY, DEFAULT_CACHE_DATA_ON_WRITE),
+        family.isBlockCacheEnabled(),
+        family.isInMemory(),
+        // For the following flags we enable them regardless of per-schema settings
+        // if they are enabled in the global configuration.
+        conf.getBoolean(CACHE_BLOCKS_ON_WRITE_KEY,
+            DEFAULT_CACHE_DATA_ON_WRITE) || family.shouldCacheDataOnWrite(),
         conf.getBoolean(CACHE_INDEX_BLOCKS_ON_WRITE_KEY,
-            DEFAULT_CACHE_INDEXES_ON_WRITE),
+            DEFAULT_CACHE_INDEXES_ON_WRITE) || family.shouldCacheIndexesOnWrite(),
         conf.getBoolean(CACHE_BLOOM_BLOCKS_ON_WRITE_KEY,
-            DEFAULT_CACHE_BLOOMS_ON_WRITE),
-        conf.getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, DEFAULT_EVICT_ON_CLOSE),
+            DEFAULT_CACHE_BLOOMS_ON_WRITE) || family.shouldCacheBloomsOnWrite(),
+        conf.getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY,
+            DEFAULT_EVICT_ON_CLOSE) || family.shouldEvictBlocksOnClose(),
         conf.getBoolean(CACHE_DATA_BLOCKS_COMPRESSED_KEY, DEFAULT_COMPRESSED_CACHE)
      );
   }

Modified: hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java?rev=1344105&r1=1344104&r2=1344105&view=diff
==============================================================================
--- hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
(original)
+++ hbase/trunk/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
Wed May 30 05:26:15 2012
@@ -288,7 +288,7 @@ public class HFileBlock extends SchemaCo
    * @return the on-disk size of the block with header size included. This
    * includes the header, the data and the checksum data.
    */
-  int getOnDiskSizeWithHeader() {
+  public int getOnDiskSizeWithHeader() {
     return onDiskSizeWithoutHeader + headerSize();
   }
 

Added: hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java
URL: http://svn.apache.org/viewvc/hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java?rev=1344105&view=auto
==============================================================================
--- hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java
(added)
+++ hbase/trunk/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java
Wed May 30 05:26:15 2012
@@ -0,0 +1,270 @@
+/*
+ * Copyright 2011 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.MediumTests;
+import org.apache.hadoop.hbase.fs.HFileSystem;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.BlockType;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileReaderV2;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2;
+import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+/**
+ * Tests {@link HFile} cache-on-write functionality for data blocks, non-root
+ * index blocks, and Bloom filter blocks, as specified by the column family. 
+ */
+@RunWith(Parameterized.class)
+@Category(MediumTests.class)
+public class TestCacheOnWriteInSchema {
+
+  private static final Log LOG = LogFactory.getLog(TestCacheOnWriteInSchema.class);
+
+  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static final String DIR = TEST_UTIL.getDataTestDir("TestCacheOnWriteInSchema").toString();
+  private static final byte [] table = Bytes.toBytes("table");
+  private static byte [] family = Bytes.toBytes("family");
+  private static final int NUM_KV = 25000;
+  private static final Random rand = new Random(12983177L);
+  /** The number of valid key types possible in a store file */
+  private static final int NUM_VALID_KEY_TYPES =
+      KeyValue.Type.values().length - 2;
+
+  private static enum CacheOnWriteType {
+    DATA_BLOCKS(BlockType.DATA, BlockType.ENCODED_DATA),
+    BLOOM_BLOCKS(BlockType.BLOOM_CHUNK),
+    INDEX_BLOCKS(BlockType.LEAF_INDEX, BlockType.INTERMEDIATE_INDEX);
+
+    private final BlockType blockType1;
+    private final BlockType blockType2;
+
+    private CacheOnWriteType(BlockType blockType) {
+      this(blockType, blockType);
+    }
+
+    private CacheOnWriteType(BlockType blockType1, BlockType blockType2) {
+      this.blockType1 = blockType1;
+      this.blockType2 = blockType2;
+    }
+
+    public boolean shouldBeCached(BlockType blockType) {
+      return blockType == blockType1 || blockType == blockType2;
+    }
+
+    public void modifyFamilySchema(HColumnDescriptor family) {
+      switch (this) {
+      case DATA_BLOCKS:
+        family.setCacheDataOnWrite(true);
+        break;
+      case BLOOM_BLOCKS:
+        family.setCacheBloomsOnWrite(true);
+        break;
+      case INDEX_BLOCKS:
+        family.setCacheIndexesOnWrite(true);
+        break;
+      }
+    }
+  }
+
+  private final CacheOnWriteType cowType;
+  private Configuration conf;
+  private final String testDescription;
+  private Store store;
+  private FileSystem fs;
+
+  public TestCacheOnWriteInSchema(CacheOnWriteType cowType) {
+    this.cowType = cowType;
+    testDescription = "[cacheOnWrite=" + cowType + "]";
+    System.out.println(testDescription);
+  }
+
+  @Parameters
+  public static Collection<Object[]> getParameters() {
+    List<Object[]> cowTypes = new ArrayList<Object[]>();
+    for (CacheOnWriteType cowType : CacheOnWriteType.values()) {
+      cowTypes.add(new Object[] { cowType });
+    }
+    return cowTypes;
+  }
+
+  @Before
+  public void setUp() throws IOException {
+    conf = TEST_UTIL.getConfiguration();
+    conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
+    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
+    conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false);
+    conf.setBoolean(CacheConfig.CACHE_BLOOM_BLOCKS_ON_WRITE_KEY, false);
+
+    fs = HFileSystem.get(conf);
+
+    // Create the schema
+    HColumnDescriptor hcd = new HColumnDescriptor(family);
+    hcd.setBloomFilterType(BloomType.ROWCOL);
+    cowType.modifyFamilySchema(hcd);
+    HTableDescriptor htd = new HTableDescriptor(table);
+    htd.addFamily(hcd);
+
+    // Create a store based on the schema
+    Path basedir = new Path(DIR);
+    Path logdir = new Path(DIR+"/logs");
+    Path oldLogDir = new Path(basedir, HConstants.HREGION_OLDLOGDIR_NAME);
+    fs.delete(logdir, true);
+    HRegionInfo info = new HRegionInfo(htd.getName(), null, null, false);
+    HLog hlog = new HLog(fs, logdir, oldLogDir, conf);
+    HRegion region = new HRegion(basedir, hlog, fs, conf, info, htd, null);
+    store = new Store(basedir, region, hcd, fs, conf);
+  }
+
+  @After
+  public void tearDown() {
+    try {
+      fs.delete(new Path(DIR), true);
+    } catch (IOException e) {
+      LOG.error("Could not delete " + DIR, e);
+    }
+  }
+
+  @Test
+  public void testCacheOnWriteInSchema() throws IOException {
+    // Write some random data into the store
+    StoreFile.Writer writer = store.createWriterInTmp(Integer.MAX_VALUE,
+        HFile.DEFAULT_COMPRESSION_ALGORITHM, false);
+    writeStoreFile(writer);
+    writer.close();
+    // Verify the block types of interest were cached on write
+    readStoreFile(writer.getPath());
+  }
+
+  private void readStoreFile(Path path) throws IOException {
+    CacheConfig cacheConf = store.getCacheConfig(); 
+    BlockCache cache = cacheConf.getBlockCache();
+    StoreFile sf = new StoreFile(fs, path, conf, cacheConf,
+        BloomType.ROWCOL, null);
+    store.passSchemaMetricsTo(sf);
+    HFileReaderV2 reader = (HFileReaderV2) sf.createReader().getHFileReader();
+    try {
+      // Open a scanner with (on read) caching disabled
+      HFileScanner scanner = reader.getScanner(false, false);
+      assertTrue(testDescription, scanner.seekTo());
+      // Cribbed from io.hfile.TestCacheOnWrite
+      long offset = 0;
+      HFileBlock prevBlock = null;
+      while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
+        long onDiskSize = -1;
+        if (prevBlock != null) {
+          onDiskSize = prevBlock.getNextBlockOnDiskSizeWithHeader();
+        }
+        // Flags: don't cache the block, use pread, this is not a compaction.
+        // Also, pass null for expected block type to avoid checking it.
+        HFileBlock block = reader.readBlock(offset, onDiskSize, false, true,
+          false, null);
+        BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
+          offset);
+        boolean isCached = cache.getBlock(blockCacheKey, true) != null;
+        boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
+        if (shouldBeCached != isCached) {
+          throw new AssertionError(
+            "shouldBeCached: " + shouldBeCached+ "\n" +
+            "isCached: " + isCached + "\n" +
+            "Test description: " + testDescription + "\n" +
+            "block: " + block + "\n" +
+            "blockCacheKey: " + blockCacheKey);
+        }
+        prevBlock = block;
+        offset += block.getOnDiskSizeWithHeader();
+      }
+    } finally {
+      reader.close();
+    }
+  }
+
+  private static KeyValue.Type generateKeyType(Random rand) {
+    if (rand.nextBoolean()) {
+      // Let's make half of KVs puts.
+      return KeyValue.Type.Put;
+    } else {
+      KeyValue.Type keyType =
+          KeyValue.Type.values()[1 + rand.nextInt(NUM_VALID_KEY_TYPES)];
+      if (keyType == KeyValue.Type.Minimum || keyType == KeyValue.Type.Maximum)
+      {
+        throw new RuntimeException("Generated an invalid key type: " + keyType
+            + ". " + "Probably the layout of KeyValue.Type has changed.");
+      }
+      return keyType;
+    }
+  }
+
+  private void writeStoreFile(StoreFile.Writer writer) throws IOException {
+    final int rowLen = 32;
+    for (int i = 0; i < NUM_KV; ++i) {
+      byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
+      byte[] v = TestHFileWriterV2.randomValue(rand);
+      int cfLen = rand.nextInt(k.length - rowLen + 1);
+      KeyValue kv = new KeyValue(
+          k, 0, rowLen,
+          k, rowLen, cfLen,
+          k, rowLen + cfLen, k.length - rowLen - cfLen,
+          rand.nextLong(),
+          generateKeyType(rand),
+          v, 0, v.length);
+      writer.append(kv);
+    }
+  }
+
+  @org.junit.Rule
+  public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
+    new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
+}
+



Mime
View raw message