hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From mbau...@apache.org
Subject svn commit: r1245291 [1/7] - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/ main/java/org/apache/hadoop/hbase/client/ main/java/org/apache/hadoop/hbase/io/ main/java/org/apache/hadoop/hbase/io/encoding/ main/java/org/apache/hadoop/h...
Date Fri, 17 Feb 2012 01:56:35 GMT
Author: mbautin
Date: Fri Feb 17 01:56:33 2012
New Revision: 1245291

URL: http://svn.apache.org/viewvc?rev=1245291&view=rev
Log:
[jira] [HBASE-4218] [89-fb] Porting HFile data block encoding to 89-fb

Summary:
This is the 89-fb version of the data block encoding patch at
https://reviews.facebook.net/D447 (based on Jacek Midgal's work during
his 2011 summer internship at Facebook).  The trunk patch has already
gone through an extensive review cycle.  Please see the JIRA and the
original patch for design/implementation details.

Test Plan: Unit tests, dev cluster, deploy and run load test

Reviewers: kannan, kranganathan, nspiegelberg, gqchen

Reviewed By: kannan

Differential Revision: https://reviews.facebook.net/D1659

Added:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/EncoderBufferTooSmallException.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/TestBufferedDataBlockEncoder.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/TestChangingEncoding.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/encoding/TestUpgradeFromHFileV1ToEncoding.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/EncodedSeekPerformanceTest.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/LoadTestKVGenerator.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java
Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/KeyValue.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV1.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/mapreduce/LoadIncrementalHFiles.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/CompactSplitThread.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/metrics/SchemaConfigured.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
    hbase/branches/0.89-fb/src/main/ruby/hbase/admin.rb
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/client/TestAdmin.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/client/TestFromClientSide.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/TestHeapSize.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/HFileReadWriteTest.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactSelection.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompaction.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestMultiColumnScanner.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithBloomError.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestSeekOptimizations.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java?rev=1245291&r1=1245290&r2=1245291&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java Fri Feb 17 01:56:33 2012
@@ -27,6 +27,7 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
@@ -53,7 +54,17 @@ public class HColumnDescriptor implement
   // Version 6 adds metadata as a map where keys and values are byte[].
   // Version 7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217)
   // Version 8 -- reintroduction of bloom filters, changed from boolean to enum
-  private static final byte COLUMN_DESCRIPTOR_VERSION = (byte)8;
+  // Version 9 -- add data block encoding
+  private static final byte COLUMN_DESCRIPTOR_VERSION = (byte) 9;
+
+  // These constants are used as FileInfo keys
+  public static final String COMPRESSION = "COMPRESSION";
+  public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
+  public static final String ENCODE_ON_DISK =
+      "ENCODE_ON_DISK";
+  public static final String DATA_BLOCK_ENCODING =
+      "DATA_BLOCK_ENCODING";
+  public static final String BLOCKCACHE = "BLOCKCACHE";
 
   /**
    * The type of compression.
@@ -71,8 +82,6 @@ public class HColumnDescriptor implement
     BLOCK
   }
 
-  public static final String COMPRESSION = "COMPRESSION";
-  public static final String BLOCKCACHE = "BLOCKCACHE";
   public static final String BLOCKSIZE = "BLOCKSIZE";
   public static final String LENGTH = "LENGTH";
   public static final String TTL = "TTL";
@@ -88,6 +97,17 @@ public class HColumnDescriptor implement
     Compression.Algorithm.NONE.getName();
 
   /**
+   * Default value of the flag that enables data block encoding on disk, as
+   * opposed to encoding in cache only. We encode blocks everywhere by default,
+   * as long as {@link #DATA_BLOCK_ENCODING} is not NONE.
+   */
+  public static final boolean DEFAULT_ENCODE_ON_DISK = true;
+
+  /** Default data block encoding algorithm. */
+  public static final String DEFAULT_DATA_BLOCK_ENCODING =
+      DataBlockEncoding.NONE.toString();
+
+  /**
    * Default number of versions of a record to keep.
    */
   public static final int DEFAULT_VERSIONS = 3;
@@ -135,6 +155,23 @@ public class HColumnDescriptor implement
    */
   public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
 
+  private final static Map<String, String> DEFAULT_VALUES = new HashMap<String, String>();
+  static {
+      DEFAULT_VALUES.put(BLOOMFILTER, DEFAULT_BLOOMFILTER);
+      DEFAULT_VALUES.put(REPLICATION_SCOPE, String.valueOf(DEFAULT_REPLICATION_SCOPE));
+      DEFAULT_VALUES.put(HConstants.VERSIONS, String.valueOf(DEFAULT_VERSIONS));
+      DEFAULT_VALUES.put(COMPRESSION, DEFAULT_COMPRESSION);
+      DEFAULT_VALUES.put(TTL, String.valueOf(DEFAULT_TTL));
+      DEFAULT_VALUES.put(BLOCKSIZE, String.valueOf(DEFAULT_BLOCKSIZE));
+      DEFAULT_VALUES.put(HConstants.IN_MEMORY, String.valueOf(DEFAULT_IN_MEMORY));
+      DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
+      DEFAULT_VALUES.put(ENCODE_ON_DISK,
+          String.valueOf(DEFAULT_ENCODE_ON_DISK));
+      DEFAULT_VALUES.put(DATA_BLOCK_ENCODING,
+          String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
+  }
+
+
   // Column family name
   private byte [] name;
 
@@ -221,20 +258,47 @@ public class HColumnDescriptor implement
       DEFAULT_BLOCKSIZE, timeToLive, bloomFilter, DEFAULT_REPLICATION_SCOPE);
   }
 
+  /**
+   * Constructor
+   * @param familyName Column family name. Must be 'printable' -- digit or
+   * letter -- and may not contain a <code>:<code>
+   * @param maxVersions Maximum number of versions to keep
+   * @param compression Compression type
+   * @param inMemory If true, column data should be kept in an HRegionServer's
+   * cache
+   * @param blockCacheEnabled If true, MapFile blocks should be cached
+   * @param blocksize Block size to use when writing out storefiles.  Use
+   * smaller block sizes for faster random-access at expense of larger indices
+   * (more memory consumption).  Default is usually 64k.
+   * @param timeToLive Time-to-live of cell contents, in seconds
+   * (use HConstants.FOREVER for unlimited TTL)
+   * @param bloomFilter Bloom filter type for this column
+   * @param scope The scope tag for this column
+   *
+   * @throws IllegalArgumentException if passed a family name that is made of
+   * other than 'word' characters: i.e. <code>[a-zA-Z_0-9]</code> or contains
+   * a <code>:</code>
+   * @throws IllegalArgumentException if the number of versions is &lt;= 0
+   */
   public HColumnDescriptor(final byte [] familyName, final int maxVersions,
-	      final String compression, final boolean inMemory,
-	      final boolean blockCacheEnabled, final int blocksize,
-	      final int timeToLive, final String bloomFilter, final int scope) {
+      final String compression, final boolean inMemory,
+      final boolean blockCacheEnabled, final int blocksize,
+      final int timeToLive, final String bloomFilter, final int scope) {
+    this(familyName, maxVersions,
+        compression, DEFAULT_ENCODE_ON_DISK, DEFAULT_DATA_BLOCK_ENCODING,
+        inMemory, blockCacheEnabled, blocksize, timeToLive, bloomFilter,
+        scope, DEFAULT_BLOOMFILTER_ERROR_RATE);
+  }
 
-	  this(familyName, maxVersions, compression, inMemory, blockCacheEnabled,
-			  blocksize, timeToLive, bloomFilter, scope, DEFAULT_BLOOMFILTER_ERROR_RATE);
-   }
   /**
    * Constructor
    * @param familyName Column family name. Must be 'printable' -- digit or
    * letter -- and may not contain a <code>:<code>
    * @param maxVersions Maximum number of versions to keep
    * @param compression Compression type
+   * @param encodeOnDisk whether to use the specified data block encoding
+   *        on disk. If false, the encoding will be used in cache only.
+   * @param dataBlockEncoding data block encoding
    * @param inMemory If true, column data should be kept in an HRegionServer's
    * cache
    * @param blockCacheEnabled If true, MapFile blocks should be cached
@@ -249,11 +313,12 @@ public class HColumnDescriptor implement
    * a <code>:</code>
    * @throws IllegalArgumentException if the number of versions is &lt;= 0
    */
-
-  public HColumnDescriptor(final byte [] familyName, final int maxVersions,
-      final String compression, final boolean inMemory,
+  public HColumnDescriptor(final byte[] familyName, final int maxVersions,
+      final String compression, final boolean encodeOnDisk,
+      final String dataBlockEncoding, final boolean inMemory,
       final boolean blockCacheEnabled, final int blocksize,
-      final int timeToLive, final String bloomFilter, final int scope, float bloomErrorRate) {
+      final int timeToLive, final String bloomFilter, final int scope,
+      float bloomErrorRate) {
     isLegalFamilyName(familyName);
     this.name = familyName;
 
@@ -268,6 +333,9 @@ public class HColumnDescriptor implement
     setTimeToLive(timeToLive);
     setCompressionType(Compression.Algorithm.
       valueOf(compression.toUpperCase()));
+    setEncodeOnDisk(encodeOnDisk);
+    setDataBlockEncoding(DataBlockEncoding.
+        valueOf(dataBlockEncoding.toUpperCase()));
     setBloomFilterType(StoreFile.BloomType.
       valueOf(bloomFilter.toUpperCase()));
     setBloomFilterErrorRate(bloomErrorRate);
@@ -435,6 +503,57 @@ public class HColumnDescriptor implement
     setValue(COMPRESSION, compressionType);
   }
 
+  /** @return data block encoding algorithm used on disk */
+  public DataBlockEncoding getDataBlockEncodingOnDisk() {
+    String encodeOnDiskStr = getValue(ENCODE_ON_DISK);
+    boolean encodeOnDisk;
+    if (encodeOnDiskStr == null) {
+      encodeOnDisk = DEFAULT_ENCODE_ON_DISK;
+    } else {
+      encodeOnDisk = Boolean.valueOf(encodeOnDiskStr);
+    }
+
+    if (!encodeOnDisk) {
+      // No encoding on disk.
+      return DataBlockEncoding.NONE;
+    }
+    return getDataBlockEncoding();
+  }
+
+  /**
+   * Set the flag indicating that we only want to encode data block in cache
+   * but not on disk.
+   */
+  public void setEncodeOnDisk(boolean encodeOnDisk) {
+    setValue(ENCODE_ON_DISK, String.valueOf(encodeOnDisk));
+  }
+
+  /**
+   * @return the data block encoding algorithm used in block cache and
+   *         optionally on disk
+   */
+  public DataBlockEncoding getDataBlockEncoding() {
+    String type = getValue(DATA_BLOCK_ENCODING);
+    if (type == null) {
+      type = DEFAULT_DATA_BLOCK_ENCODING;
+    }
+    return DataBlockEncoding.valueOf(type);
+  }
+
+  /**
+   * Set data block encoding algorithm used in block cache.
+   * @param type What kind of data block encoding will be used.
+   */
+  public void setDataBlockEncoding(DataBlockEncoding type) {
+    String name;
+    if (type != null) {
+      name = type.toString();
+    } else {
+      name = DataBlockEncoding.NONE.toString();
+    }
+    setValue(DATA_BLOCK_ENCODING, name);
+  }
+
   /**
    * @return True if we are to keep all in use HRegionServer cache.
    */
@@ -669,12 +788,12 @@ public class HColumnDescriptor implement
         ImmutableBytesWritable value = new ImmutableBytesWritable();
         key.readFields(in);
         value.readFields(in);
-        
+
         // in version 8, the BloomFilter setting changed from bool to enum
         if (version < 8 && Bytes.toString(key.get()).equals(BLOOMFILTER)) {
           value.set(Bytes.toBytes(
               Boolean.getBoolean(Bytes.toString(value.get()))
-                ? BloomType.ROW.toString() 
+                ? BloomType.ROW.toString()
                 : BloomType.NONE.toString()));
         }
 
@@ -712,4 +831,4 @@ public class HColumnDescriptor implement
     }
     return result;
   }
-}
+}
\ No newline at end of file

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java?rev=1245291&r1=1245290&r2=1245291&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/HConstants.java Fri Feb 17 01:56:33 2012
@@ -188,6 +188,10 @@ public final class HConstants {
   /** Used to construct the name of the compaction directory during compaction */
   public static final String HREGION_COMPACTIONDIR_NAME = "compaction.dir";
 
+  /** Conf key for the max file size after which we split the region */
+  public static final String HREGION_MAX_FILESIZE =
+      "hbase.hregion.max.filesize";
+
   /** File Extension used while splitting an HLog into regions (HBASE-2312) */
   public static final String HLOG_SPLITTING_EXT = "-splitting";
 
@@ -207,6 +211,10 @@ public final class HConstants {
   /** Default maximum file size */
   public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
 
+  /** Conf key for the memstore size at which we flush the memstore */
+  public static final String HREGION_MEMSTORE_FLUSH_SIZE =
+      "hbase.hregion.memstore.flush.size";
+
   /** Default size of a reservation block   */
   public static final int DEFAULT_SIZE_RESERVATION_BLOCK = 1024 * 1024 * 5;
 

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/KeyValue.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/KeyValue.java?rev=1245291&r1=1245290&r2=1245291&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/KeyValue.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/KeyValue.java Fri Feb 17 01:56:33 2012
@@ -130,16 +130,27 @@ public class KeyValue implements Writabl
     return COMPARATOR.getRawComparator();
   }
 
+  /** Size of the key length field in bytes*/
+  public static final int KEY_LENGTH_SIZE = Bytes.SIZEOF_INT;
+
+  /** Size of the key type field in bytes */
+  public static final int TYPE_SIZE = Bytes.SIZEOF_BYTE;
+
+  /** Size of the row length field in bytes */
+  public static final int ROW_LENGTH_SIZE = Bytes.SIZEOF_SHORT;
+
+  /** Size of the family length field in bytes */
+  public static final int FAMILY_LENGTH_SIZE = Bytes.SIZEOF_BYTE;
+
+  /** Size of the timestamp field in bytes */
+  public static final int TIMESTAMP_SIZE = Bytes.SIZEOF_LONG;
+
   // Size of the timestamp and type byte on end of a key -- a long + a byte.
-  public static final int TIMESTAMP_TYPE_SIZE =
-    Bytes.SIZEOF_LONG /* timestamp */ +
-    Bytes.SIZEOF_BYTE /*keytype*/;
+  public static final int TIMESTAMP_TYPE_SIZE = TIMESTAMP_SIZE + TYPE_SIZE;
 
   // Size of the length shorts and bytes in key.
-  public static final int KEY_INFRASTRUCTURE_SIZE =
-    Bytes.SIZEOF_SHORT /*rowlength*/ +
-    Bytes.SIZEOF_BYTE /*columnfamilylength*/ +
-    TIMESTAMP_TYPE_SIZE;
+  public static final int KEY_INFRASTRUCTURE_SIZE = ROW_LENGTH_SIZE
+      + FAMILY_LENGTH_SIZE + TIMESTAMP_TYPE_SIZE;
 
   // How far into the key the row starts at. First thing to read is the short
   // that says how long the row is.
@@ -233,7 +244,7 @@ public class KeyValue implements Writabl
 
   /** Dragon time over, return to normal business */
 
-  
+
   /** Writable Constructor -- DO NOT USE */
   public KeyValue() {}
 
@@ -611,12 +622,23 @@ public class KeyValue implements Writabl
   }
 
   /**
-   * Use for logging.
-   * @param b Key portion of a KeyValue.
-   * @param o Offset to start of key
-   * @param l Length of key.
-   * @return Key as a String.
+   * Produces a string map for this key/value pair. Useful for programmatic use
+   * and manipulation of the data stored in an HLogKey, for example, printing
+   * as JSON. Values are left out due to their tendency to be large. If needed,
+   * they can be added manually.
+   *
+   * @return the Map<String,?> containing data from this key
    */
+  public Map<String, Object> toStringMap() {
+    Map<String, Object> stringMap = new HashMap<String, Object>();
+    stringMap.put("row", Bytes.toStringBinary(getRow()));
+    stringMap.put("family", Bytes.toStringBinary(getFamily()));
+    stringMap.put("qualifier", Bytes.toStringBinary(getQualifier()));
+    stringMap.put("timestamp", getTimestamp());
+    stringMap.put("vlen", getValueLength());
+    return stringMap;
+  }
+
   public static String keyToString(final byte [] b, final int o, final int l) {
     if (b == null) return "";
     int rowlength = Bytes.toShort(b, o);
@@ -630,31 +652,21 @@ public class KeyValue implements Writabl
       Bytes.toStringBinary(b, columnoffset + familylength,
       columnlength - familylength);
     long timestamp = Bytes.toLong(b, o + (l - TIMESTAMP_TYPE_SIZE));
+    String timestampStr = humanReadableTimestamp(timestamp);
     byte type = b[o + l - 1];
-//    return row + "/" + family +
-//      (family != null && family.length() > 0? COLUMN_FAMILY_DELIMITER: "") +
-//      qualifier + "/" + timestamp + "/" + Type.codeToType(type);
     return row + "/" + family +
       (family != null && family.length() > 0? ":" :"") +
-      qualifier + "/" + timestamp + "/" + Type.codeToType(type);
+      qualifier + "/" + timestampStr + "/" + Type.codeToType(type);
   }
 
-  /**
-   * Produces a string map for this key/value pair. Useful for programmatic use
-   * and manipulation of the data stored in an HLogKey, for example, printing
-   * as JSON. Values are left out due to their tendency to be large. If needed,
-   * they can be added manually.
-   *
-   * @return the Map<String,?> containing data from this key
-   */
-  public Map<String, Object> toStringMap() {
-    Map<String, Object> stringMap = new HashMap<String, Object>();
-    stringMap.put("row", Bytes.toStringBinary(getRow()));
-    stringMap.put("family", Bytes.toStringBinary(getFamily()));
-    stringMap.put("qualifier", Bytes.toStringBinary(getQualifier()));
-    stringMap.put("timestamp", getTimestamp());
-    stringMap.put("vlen", getValueLength());
-    return stringMap;
+  public static String humanReadableTimestamp(final long timestamp) {
+    if (timestamp == HConstants.LATEST_TIMESTAMP) {
+      return "LATEST_TIMESTAMP";
+    }
+    if (timestamp == HConstants.OLDEST_TIMESTAMP) {
+      return "OLDEST_TIMESTAMP";
+    }
+    return String.valueOf(timestamp);
   }
 
   //---------------------------------------------------------------------------
@@ -698,7 +710,7 @@ public class KeyValue implements Writabl
    * @return length of entire KeyValue, in bytes
    */
   private static int getLength(byte [] bytes, int offset) {
-    return (2 * Bytes.SIZEOF_INT) +
+    return ROW_OFFSET +
         Bytes.toInt(bytes, offset) +
         Bytes.toInt(bytes, offset + Bytes.SIZEOF_INT);
   }
@@ -1009,7 +1021,7 @@ public class KeyValue implements Writabl
     System.arraycopy(this.bytes, o, result, 0, l);
     return result;
   }
-  
+
   //---------------------------------------------------------------------------
   //
   //  KeyValue splitter
@@ -1210,7 +1222,7 @@ public class KeyValue implements Writabl
     // KV format:  <keylen:4><valuelen:4><key:keylen><value:valuelen>
     // Rebuild as: <keylen:4><0:4><key:keylen>
     int dataLen = lenAsVal? Bytes.SIZEOF_INT : 0;
-    byte [] newBuffer = new byte[getKeyLength() + (2 * Bytes.SIZEOF_INT) + dataLen];
+    byte [] newBuffer = new byte[getKeyLength() + ROW_OFFSET + dataLen];
     System.arraycopy(this.bytes, this.offset, newBuffer, 0,
         Math.min(newBuffer.length,this.length));
     Bytes.putInt(newBuffer, Bytes.SIZEOF_INT, dataLen);
@@ -1283,6 +1295,10 @@ public class KeyValue implements Writabl
     return index;
   }
 
+  /**
+   * This function is only used in Meta key comparisons so its error message
+   * is specific for meta key errors.
+   */
   static int getRequiredDelimiterInReverse(final byte [] b,
       final int offset, final int length, final int delimiter) {
     int index = getDelimiterInReverse(b, offset, length, delimiter);
@@ -1483,7 +1499,7 @@ public class KeyValue implements Writabl
       short rrowlength = right.getRowLength();
       // TsOffset = end of column data. just comparing Row+CF length of each
       return ((left.getTimestampOffset() - left.getOffset()) ==
-			 (right.getTimestampOffset() - right.getOffset())) &&
+              (right.getTimestampOffset() - right.getOffset())) &&
         matchingRows(left, lrowlength, right, rrowlength) &&
         compareColumns(left, lrowlength, right, rrowlength) == 0;
     }
@@ -1599,7 +1615,7 @@ public class KeyValue implements Writabl
    * Create a KeyValue that is smaller than all other possible KeyValues
    * for the given row. That is any (valid) KeyValue on 'row' would sort
    * _after_ the result.
-   * 
+   *
    * @param row - row key (arbitrary byte array)
    * @return First possible KeyValue on passed <code>row</code>
    */
@@ -1807,9 +1823,9 @@ public class KeyValue implements Writabl
    */
   public static KeyValue createKeyValueFromKey(final byte [] b, final int o,
       final int l) {
-    byte [] newb = new byte[b.length + ROW_OFFSET];
+    byte [] newb = new byte[l + ROW_OFFSET];
     System.arraycopy(b, o, newb, ROW_OFFSET, l);
-    Bytes.putInt(newb, 0, b.length);
+    Bytes.putInt(newb, 0, l);
     Bytes.putInt(newb, Bytes.SIZEOF_INT, 0);
     return new KeyValue(newb);
   }
@@ -1932,9 +1948,23 @@ public class KeyValue implements Writabl
   }
 
   /**
+   * Avoids redundant comparisons for better performance.
+   */
+  public static interface SamePrefixComparator<T> {
+    /**
+     * Compare two keys assuming that the first n bytes are the same.
+     * @param commonPrefix How many bytes are the same.
+     */
+    public int compareIgnoringPrefix(int commonPrefix,
+        T left, int loffset, int llength,
+        T right, int roffset, int rlength);
+  }
+
+  /**
    * Compare key portion of a {@link KeyValue}.
    */
-  public static class KeyComparator implements RawComparator<byte []> {
+  public static class KeyComparator
+      implements RawComparator<byte []>, SamePrefixComparator<byte[]> {
     volatile boolean ignoreTimestamp = false;
     volatile boolean ignoreType = false;
 
@@ -1944,36 +1974,94 @@ public class KeyValue implements Writabl
       short lrowlength = Bytes.toShort(left, loffset);
       short rrowlength = Bytes.toShort(right, roffset);
       int compare = compareRows(left, loffset + Bytes.SIZEOF_SHORT,
-          lrowlength,
-          right, roffset + Bytes.SIZEOF_SHORT, rrowlength);
+          lrowlength, right, roffset + Bytes.SIZEOF_SHORT, rrowlength);
       if (compare != 0) {
         return compare;
       }
 
-      // Compare column family.  Start compare past row and family length.
-      int lcolumnoffset = Bytes.SIZEOF_SHORT + lrowlength + 1 + loffset;
-      int rcolumnoffset = Bytes.SIZEOF_SHORT + rrowlength + 1 + roffset;
+      // Compare the rest of the two KVs without making any assumptions about
+      // the common prefix. This function will not compare rows anyway, so we
+      // don't need to tell it that the common prefix includes the row.
+      return compareWithoutRow(0, left, loffset, llength, right, roffset,
+          rlength, rrowlength);
+    }
+
+    /**
+     * Compare the two key-values, ignoring the prefix of the given length
+     * that is known to be the same between the two.
+     * @param commonPrefix the prefix length to ignore
+     */
+    @Override
+    public int compareIgnoringPrefix(int commonPrefix, byte[] left,
+        int loffset, int llength, byte[] right, int roffset, int rlength) {
+      // Compare row
+      short lrowlength = Bytes.toShort(left, loffset);
+      short rrowlength;
+
+      int comparisonResult = 0;
+      if (commonPrefix < ROW_LENGTH_SIZE) {
+        // almost nothing in common
+        rrowlength = Bytes.toShort(right, roffset);
+        comparisonResult = compareRows(left, loffset + ROW_LENGTH_SIZE,
+            lrowlength, right, roffset + ROW_LENGTH_SIZE, rrowlength);
+      } else { // the row length is the same
+        rrowlength = lrowlength;
+        if (commonPrefix < ROW_LENGTH_SIZE + rrowlength) {
+          // The rows are not the same. Exclude the common prefix and compare
+          // the rest of the two rows.
+          int common = commonPrefix - ROW_LENGTH_SIZE;
+          comparisonResult = compareRows(
+              left, loffset + common + ROW_LENGTH_SIZE, lrowlength - common,
+              right, roffset + common + ROW_LENGTH_SIZE, rrowlength - common);
+        }
+      }
+      if (comparisonResult != 0) {
+        return comparisonResult;
+      }
+
+      assert lrowlength == rrowlength;
+
+      return compareWithoutRow(commonPrefix, left, loffset, llength, right,
+          roffset, rlength, lrowlength);
+    }
+
+    /**
+     * Compare column, timestamp, and key type (everything except the row).
+     * This method is used both in the normal comparator and the "same-prefix"
+     * comparator. Note that we are assuming that row portions of both KVs have
+     * already been parsed and found identical, and we don't validate that
+     * assumption here.
+     * @param commonPrefix the length of the common prefix of the two
+     *          key-values being compared, including row length and row
+     */
+    private int compareWithoutRow(int commonPrefix, byte[] left, int loffset,
+        int llength, byte[] right, int roffset, int rlength, short rowlength) {
+      // Compare column family. Start comparing past row and family length.
+      int lcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE +
+          rowlength + loffset;
+      int rcolumnoffset = ROW_LENGTH_SIZE + FAMILY_LENGTH_SIZE +
+          rowlength + roffset;
       int lcolumnlength = llength - TIMESTAMP_TYPE_SIZE -
-        (lcolumnoffset - loffset);
+          (lcolumnoffset - loffset);
       int rcolumnlength = rlength - TIMESTAMP_TYPE_SIZE -
-        (rcolumnoffset - roffset);
+          (rcolumnoffset - roffset);
 
-      // if row matches, and no column in the 'left' AND put type is 'minimum',
+      // If row matches, and no column in the 'left' AND put type is 'minimum',
       // then return that left is larger than right.
-      
-      // This supports 'last key on a row' - the magic is if there is no column in the
-      // left operand, and the left operand has a type of '0' - magical value,
-      // then we say the left is bigger.  This will let us seek to the last key in
-      // a row.
+
+      // This supports 'last key on a row' - the magic is if there is no column
+      // in the left operand, and the left operand has a type of '0' - magical
+      // value, then we say the left is bigger.  This will let us seek to the
+      // last key in a row.
 
       byte ltype = left[loffset + (llength - 1)];
       byte rtype = right[roffset + (rlength - 1)];
 
-      // If the column is not specified, the "minimum" key type appears
-      // the latest in the sorted order, regardless of the timestamp. This is
-      // used for specifying the last key/value in a given row, because there
-      // is no "lexicographically last column" (it would be infinitely long).
-      // The "maximum" key type does not need this behavior.
+      // If the column is not specified, the "minimum" key type appears the
+      // latest in the sorted order, regardless of the timestamp. This is used
+      // for specifying the last key/value in a given row, because there is no
+      // "lexicographically last column" (it would be infinitely long).  The
+      // "maximum" key type does not need this behavior.
       if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) {
         // left is "bigger", i.e. it appears later in the sorted order
         return 1;
@@ -1982,13 +2070,27 @@ public class KeyValue implements Writabl
         return -1;
       }
 
-      // TODO the family and qualifier should be compared separately
-      compare = Bytes.compareTo(left, lcolumnoffset, lcolumnlength, right,
-          rcolumnoffset, rcolumnlength);
-      if (compare != 0) {
-        return compare;
+      int common = 0;
+      if (commonPrefix > 0) {
+        common = Math.max(0, commonPrefix -
+            rowlength - ROW_LENGTH_SIZE - FAMILY_LENGTH_SIZE);
+        common = Math.min(common, Math.min(lcolumnlength, rcolumnlength));
       }
 
+      final int comparisonResult = Bytes.compareTo(
+          left, lcolumnoffset + common, lcolumnlength - common,
+          right, rcolumnoffset + common, rcolumnlength - common);
+      if (comparisonResult != 0) {
+        return comparisonResult;
+      }
+
+      return compareTimestampAndType(left, loffset, llength, right, roffset,
+          rlength, ltype, rtype);
+    }
+
+    private int compareTimestampAndType(byte[] left, int loffset, int llength,
+        byte[] right, int roffset, int rlength, byte ltype, byte rtype) {
+      int compare;
       if (!this.ignoreTimestamp) {
         // Get timestamps.
         long ltimestamp = Bytes.toLong(left,
@@ -2043,8 +2145,8 @@ public class KeyValue implements Writabl
 
   // HeapSize
   public long heapSize() {
-    return ClassSize.align(ClassSize.OBJECT + ClassSize.REFERENCE + 
-        ClassSize.align(ClassSize.ARRAY + length) + 
+    return ClassSize.align(ClassSize.OBJECT + ClassSize.REFERENCE +
+        ClassSize.align(ClassSize.ARRAY + length) +
         (2 * Bytes.SIZEOF_INT) +
         Bytes.SIZEOF_LONG);
   }
@@ -2069,4 +2171,4 @@ public class KeyValue implements Writabl
     out.writeInt(this.length);
     out.write(this.bytes, this.offset, this.length);
   }
-}
+}
\ No newline at end of file

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java?rev=1245291&r1=1245290&r2=1245291&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java Fri Feb 17 01:56:33 2012
@@ -724,7 +724,6 @@ public class HBaseAdmin {
    * Asynchronous operation.
    *
    * @param tableName name of table
-   * @param columnName name of column to be modified
    * @param descriptor new column descriptor to use
    * @throws IOException if a remote or network exception occurs
    */
@@ -736,6 +735,12 @@ public class HBaseAdmin {
             descriptor)), null);
   }
 
+  public void modifyColumn(final String tableName,
+      HColumnDescriptor descriptor)
+  throws IOException {
+    modifyColumn(tableName, descriptor.getNameAsString(), descriptor);
+  }
+
   /**
    * Modify an existing column family on a table.
    * Asynchronous operation.

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java?rev=1245291&r1=1245290&r2=1245291&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java Fri Feb 17 01:56:33 2012
@@ -265,6 +265,50 @@ public class Result implements Writable,
     return returnMap;
   }
 
+  protected int binarySearch(final KeyValue[] kvs, final byte[] family,
+      final byte[] qualifier) {
+    KeyValue searchTerm =
+        KeyValue.createFirstOnRow(kvs[0].getRow(),
+            family, qualifier);
+
+    // pos === ( -(insertion point) - 1)
+    int pos = Arrays.binarySearch(kvs, searchTerm, KeyValue.COMPARATOR);
+    // never will exact match
+    if (pos < 0) {
+      pos = (pos + 1) * -1;
+      // pos is now insertion point
+    }
+    if (pos == kvs.length) {
+      return -1; // doesn't exist
+    }
+    return pos;
+  }
+
+  /**
+   * The KeyValue for the most recent for a given column. If the column does
+   * not exist in the result set - if it wasn't selected in the query (Get/Scan)
+   * or just does not exist in the row the return value is null.
+   *
+   * @param family
+   * @param qualifier
+   * @return KeyValue for the column or null
+   */
+  public KeyValue getColumnLatest(byte [] family, byte [] qualifier) {
+    KeyValue [] kvs = raw(); // side effect possibly.
+    if (kvs == null || kvs.length == 0) {
+      return null;
+    }
+    int pos = binarySearch(kvs, family, qualifier);
+    if (pos == -1) {
+      return null;
+    }
+    KeyValue kv = kvs[pos];
+    if (kv.matchingColumn(family, qualifier)) {
+      return kv;
+    }
+    return null;
+  }
+
   /**
    * Get the latest version of the specified column.
    * @param family family name

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java?rev=1245291&r1=1245290&r2=1245291&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java Fri Feb 17 01:56:33 2012
@@ -27,6 +27,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
@@ -60,9 +61,9 @@ public class HalfStoreFileReader extends
    * @throws IOException
    */
   public HalfStoreFileReader(final FileSystem fs, final Path p,
-      final CacheConfig cacheConf, final Reference r)
-  throws IOException {
-    super(fs, p, cacheConf);
+      final CacheConfig cacheConf, final Reference r,
+      DataBlockEncoding preferredEncodingInCache) throws IOException {
+    super(fs, p, cacheConf, preferredEncodingInCache);
     // This is not actual midkey for this half-file; its just border
     // around which we split top and bottom.  Have to look in files to find
     // actual last and first keys for bottom and top halves.  Half-files don't

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java?rev=1245291&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java Fri Feb 17 01:56:33 2012
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.io.encoding;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.KeyValue.SamePrefixComparator;
+import org.apache.hadoop.hbase.util.ByteBufferUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.RawComparator;
+import org.apache.hadoop.io.WritableUtils;
+
+/**
+ * Base class for all data block encoders that use a buffer.
+ */
+abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
+
+  private static int INITIAL_KEY_BUFFER_SIZE = 512;
+
+  @Override
+  public ByteBuffer uncompressKeyValues(DataInputStream source,
+      boolean includesMemstoreTS) throws IOException {
+    return uncompressKeyValues(source, 0, 0, includesMemstoreTS);
+  }
+
+  protected static class SeekerState {
+    protected int valueOffset = -1;
+    protected int keyLength;
+    protected int valueLength;
+    protected int lastCommonPrefix;
+
+    /** We need to store a copy of the key. */
+    protected byte[] keyBuffer = new byte[INITIAL_KEY_BUFFER_SIZE];
+
+    protected long memstoreTS;
+    protected int nextKvOffset;
+
+    protected boolean isValid() {
+      return valueOffset != -1;
+    }
+
+    protected void invalidate() {
+      valueOffset = -1;
+    }
+
+    protected void ensureSpaceForKey() {
+      if (keyLength > keyBuffer.length) {
+        // rare case, but we need to handle arbitrary length of key
+        int newKeyBufferLength = Math.max(keyBuffer.length, 1) * 2;
+        while (keyLength > newKeyBufferLength) {
+          newKeyBufferLength *= 2;
+        }
+        byte[] newKeyBuffer = new byte[newKeyBufferLength];
+        System.arraycopy(keyBuffer, 0, newKeyBuffer, 0, keyBuffer.length);
+        keyBuffer = newKeyBuffer;
+      }
+    }
+
+    /**
+     * Copy the state from the next one into this instance (the previous state
+     * placeholder). Used to save the previous state when we are advancing the
+     * seeker to the next key/value.
+     */
+    protected void copyFromNext(SeekerState nextState) {
+      if (keyBuffer.length != nextState.keyBuffer.length) {
+        keyBuffer = nextState.keyBuffer.clone();
+      } else if (!isValid()) {
+        // Note: we can only call isValid before we override our state, so this
+        // comes before all the assignments at the end of this method.
+        System.arraycopy(nextState.keyBuffer, 0, keyBuffer, 0,
+             nextState.keyLength);
+      } else {
+        // don't copy the common prefix between this key and the previous one
+        System.arraycopy(nextState.keyBuffer, nextState.lastCommonPrefix,
+            keyBuffer, nextState.lastCommonPrefix, nextState.keyLength
+                - nextState.lastCommonPrefix);
+      }
+
+      valueOffset = nextState.valueOffset;
+      keyLength = nextState.keyLength;
+      valueLength = nextState.valueLength;
+      lastCommonPrefix = nextState.lastCommonPrefix;
+      nextKvOffset = nextState.nextKvOffset;
+      memstoreTS = nextState.memstoreTS;
+    }
+
+  }
+
+  protected abstract static class
+      BufferedEncodedSeeker<STATE extends SeekerState>
+      implements EncodedSeeker {
+
+    protected final RawComparator<byte[]> comparator;
+    protected final SamePrefixComparator<byte[]> samePrefixComparator;
+    protected ByteBuffer currentBuffer;
+    protected STATE current = createSeekerState(); // always valid
+    protected STATE previous = createSeekerState(); // may not be valid
+
+    @SuppressWarnings("unchecked")
+    public BufferedEncodedSeeker(RawComparator<byte[]> comparator) {
+      this.comparator = comparator;
+      if (comparator instanceof SamePrefixComparator) {
+        this.samePrefixComparator = (SamePrefixComparator<byte[]>) comparator;
+      } else {
+        this.samePrefixComparator = null;
+      }
+    }
+
+    @Override
+    public void setCurrentBuffer(ByteBuffer buffer) {
+      currentBuffer = buffer;
+      decodeFirst();
+      previous.invalidate();
+    }
+
+    @Override
+    public ByteBuffer getKeyDeepCopy() {
+      ByteBuffer keyBuffer = ByteBuffer.allocate(current.keyLength);
+      keyBuffer.put(current.keyBuffer, 0, current.keyLength);
+      return keyBuffer;
+    }
+
+    @Override
+    public ByteBuffer getValueShallowCopy() {
+      return ByteBuffer.wrap(currentBuffer.array(),
+          currentBuffer.arrayOffset() + current.valueOffset,
+          current.valueLength);
+    }
+
+    @Override
+    public ByteBuffer getKeyValueBuffer() {
+      ByteBuffer kvBuffer = ByteBuffer.allocate(
+          2 * Bytes.SIZEOF_INT + current.keyLength + current.valueLength);
+      kvBuffer.putInt(current.keyLength);
+      kvBuffer.putInt(current.valueLength);
+      kvBuffer.put(current.keyBuffer, 0, current.keyLength);
+      kvBuffer.put(currentBuffer.array(),
+          currentBuffer.arrayOffset() + current.valueOffset,
+          current.valueLength);
+      return kvBuffer;
+    }
+
+    @Override
+    public KeyValue getKeyValue() {
+      ByteBuffer kvBuf = getKeyValueBuffer();
+      KeyValue kv = new KeyValue(kvBuf.array(), kvBuf.arrayOffset());
+      kv.setMemstoreTS(current.memstoreTS);
+      return kv;
+    }
+
+    @Override
+    public void rewind() {
+      currentBuffer.rewind();
+      decodeFirst();
+      previous.invalidate();
+    }
+
+    @Override
+    public boolean next() {
+      if (!currentBuffer.hasRemaining()) {
+        return false;
+      }
+      decodeNext();
+      previous.invalidate();
+      return true;
+    }
+
+    @Override
+    public int seekToKeyInBlock(byte[] key, int offset, int length,
+        boolean seekBefore) {
+      int commonPrefix = 0;
+      previous.invalidate();
+      do {
+        int comp;
+        if (samePrefixComparator != null) {
+          commonPrefix = Math.min(commonPrefix, current.lastCommonPrefix);
+
+          // extend commonPrefix
+          commonPrefix += ByteBufferUtils.findCommonPrefix(
+              key, offset + commonPrefix, length - commonPrefix,
+              current.keyBuffer, commonPrefix,
+              current.keyLength - commonPrefix);
+
+          comp = samePrefixComparator.compareIgnoringPrefix(commonPrefix, key,
+              offset, length, current.keyBuffer, 0, current.keyLength);
+        } else {
+          comp = comparator.compare(key, offset, length,
+              current.keyBuffer, 0, current.keyLength);
+        }
+
+        if (comp == 0) { // exact match
+          if (seekBefore) {
+            if (!previous.isValid()) {
+              // The caller (seekBefore) has to ensure that we are not at the
+              // first key in the block.
+              throw new IllegalStateException("Cannot seekBefore if " +
+                  "positioned at the first key in the block: key=" +
+                  Bytes.toStringBinary(key, offset, length));
+            }
+            moveToPrevious();
+            return 1;
+          }
+          return 0;
+        }
+
+        if (comp < 0) { // already too large, check previous
+          if (previous.isValid()) {
+            moveToPrevious();
+          }
+          return 1;
+        }
+
+        // move to next, if more data is available
+        if (currentBuffer.hasRemaining()) {
+          previous.copyFromNext(current);
+          decodeNext();
+        } else {
+          break;
+        }
+      } while (true);
+
+      // we hit the end of the block, not an exact match
+      return 1;
+    }
+
+    private void moveToPrevious() {
+      if (!previous.isValid()) {
+        throw new IllegalStateException(
+            "Can move back only once and not in first key in the block.");
+      }
+
+      STATE tmp = previous;
+      previous = current;
+      current = tmp;
+
+      // move after last key value
+      currentBuffer.position(current.nextKvOffset);
+
+      previous.invalidate();
+    }
+
+    @SuppressWarnings("unchecked")
+    protected STATE createSeekerState() {
+      // This will fail for non-default seeker state if the subclass does not
+      // override this method.
+      return (STATE) new SeekerState();
+    }
+
+    abstract protected void decodeFirst();
+    abstract protected void decodeNext();
+  }
+
+  protected final void afterEncodingKeyValue(ByteBuffer in,
+      DataOutputStream out, boolean includesMemstoreTS) {
+    if (includesMemstoreTS) {
+      // Copy memstore timestamp from the byte buffer to the output stream.
+      long memstoreTS = -1;
+      try {
+        memstoreTS = ByteBufferUtils.readVLong(in);
+        WritableUtils.writeVLong(out, memstoreTS);
+      } catch (IOException ex) {
+        throw new RuntimeException("Unable to copy memstore timestamp " +
+            memstoreTS + " after encoding a key/value");
+      }
+    }
+  }
+
+  protected final void afterDecodingKeyValue(DataInputStream source,
+      ByteBuffer dest, boolean includesMemstoreTS) {
+    if (includesMemstoreTS) {
+      long memstoreTS = -1;
+      try {
+        // Copy memstore timestamp from the data input stream to the byte
+        // buffer.
+        memstoreTS = WritableUtils.readVLong(source);
+        ByteBufferUtils.writeVLong(dest, memstoreTS);
+      } catch (IOException ex) {
+        throw new RuntimeException("Unable to copy memstore timestamp " +
+            memstoreTS + " after decoding a key/value");
+      }
+    }
+  }
+
+}

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java?rev=1245291&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CompressionState.java Fri Feb 17 01:56:33 2012
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.io.encoding;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.util.ByteBufferUtils;
+
+/**
+ * Stores the state of data block encoder at the beginning of new key.
+ */
+class CompressionState {
+  int keyLength;
+  int valueLength;
+
+  short rowLength;
+  int prevOffset = FIRST_KEY;
+  byte familyLength;
+  int qualifierLength;
+  byte type;
+
+  private final static int FIRST_KEY = -1;
+
+  boolean isFirst() {
+    return prevOffset == FIRST_KEY;
+  }
+
+  /**
+   * Analyze the key and fill the state.
+   * Uses mark() and reset() in ByteBuffer.
+   * @param in Buffer at the position where key starts
+   * @param keyLength Length of key in bytes
+   * @param valueLength Length of values in bytes
+   */
+  void readKey(ByteBuffer in, int keyLength, int valueLength) {
+    readKey(in, keyLength, valueLength, 0, null);
+  }
+
+  /**
+   * Analyze the key and fill the state assuming we know previous state.
+   * Uses mark() and reset() in ByteBuffer to avoid moving the position.
+   * <p>
+   * This method overrides all the fields of this instance, except
+   * {@link #prevOffset}, which is usually manipulated directly by encoders
+   * and decoders.
+   * @param in Buffer at the position where key starts
+   * @param keyLength Length of key in bytes
+   * @param valueLength Length of values in bytes
+   * @param commonPrefix how many first bytes are common with previous KeyValue
+   * @param previousState State from previous KeyValue
+   */
+  void readKey(ByteBuffer in, int keyLength, int valueLength,
+      int commonPrefix, CompressionState previousState) {
+    this.keyLength = keyLength;
+    this.valueLength = valueLength;
+
+    // fill the state
+    in.mark(); // mark beginning of key
+
+    if (commonPrefix < KeyValue.ROW_LENGTH_SIZE) {
+      rowLength = in.getShort();
+      ByteBufferUtils.skip(in, rowLength);
+
+      familyLength = in.get();
+
+      qualifierLength = keyLength - rowLength - familyLength -
+          KeyValue.KEY_INFRASTRUCTURE_SIZE;
+      ByteBufferUtils.skip(in, familyLength + qualifierLength);
+    } else {
+      rowLength = previousState.rowLength;
+      familyLength = previousState.familyLength;
+      qualifierLength = previousState.qualifierLength +
+          keyLength - previousState.keyLength;
+      ByteBufferUtils.skip(in, (KeyValue.ROW_LENGTH_SIZE +
+          KeyValue.FAMILY_LENGTH_SIZE) +
+          rowLength + familyLength + qualifierLength);
+    }
+
+    readTimestamp(in);
+
+    type = in.get();
+
+    in.reset();
+  }
+
+  protected void readTimestamp(ByteBuffer in) {
+    // used in subclasses to add timestamp to state
+    ByteBufferUtils.skip(in, KeyValue.TIMESTAMP_SIZE);
+  }
+
+  void copyFrom(CompressionState state) {
+    keyLength = state.keyLength;
+    valueLength = state.valueLength;
+
+    rowLength = state.rowLength;
+    prevOffset = state.prevOffset;
+    familyLength = state.familyLength;
+    qualifierLength = state.qualifierLength;
+    type = state.type;
+  }
+}

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java?rev=1245291&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java Fri Feb 17 01:56:33 2012
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.io.encoding;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.util.ByteBufferUtils;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.RawComparator;
+
+/**
+ * Just copy data, do not do any kind of compression. Use for comparison and
+ * benchmarking.
+ */
+public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
+  @Override
+  public void compressKeyValues(DataOutputStream out,
+      ByteBuffer in, boolean includesMemstoreTS) throws IOException {
+    in.rewind();
+    ByteBufferUtils.putInt(out, in.limit());
+    ByteBufferUtils.moveBufferToStream(out, in, in.limit());
+  }
+
+  @Override
+  public ByteBuffer uncompressKeyValues(DataInputStream source,
+      int preserveHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
+      throws IOException {
+    int decompressedSize = source.readInt();
+    ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
+        preserveHeaderLength);
+    buffer.position(preserveHeaderLength);
+    ByteBufferUtils.copyFromStreamToBuffer(buffer, source, decompressedSize);
+
+    return buffer;
+  }
+
+  @Override
+  public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
+    int keyLength = block.getInt(Bytes.SIZEOF_INT);
+    return ByteBuffer.wrap(block.array(),
+        block.arrayOffset() + 3 * Bytes.SIZEOF_INT, keyLength).slice();
+  }
+
+
+  @Override
+  public String toString() {
+    return CopyKeyDataBlockEncoder.class.getSimpleName();
+  }
+
+  @Override
+  public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
+      final boolean includesMemstoreTS) {
+    return new BufferedEncodedSeeker<SeekerState>(comparator) {
+      @Override
+      protected void decodeNext() {
+        current.keyLength = currentBuffer.getInt();
+        current.valueLength = currentBuffer.getInt();
+        current.ensureSpaceForKey();
+        currentBuffer.get(current.keyBuffer, 0, current.keyLength);
+        current.valueOffset = currentBuffer.position();
+        ByteBufferUtils.skip(currentBuffer, current.valueLength);
+        if (includesMemstoreTS) {
+          current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
+        } else {
+          current.memstoreTS = 0;
+        }
+        current.nextKvOffset = currentBuffer.position();
+      }
+
+      @Override
+      protected void decodeFirst() {
+        ByteBufferUtils.skip(currentBuffer, Bytes.SIZEOF_INT);
+        current.lastCommonPrefix = 0;
+        decodeNext();
+      }
+    };
+  }
+}

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java?rev=1245291&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java Fri Feb 17 01:56:33 2012
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.io.encoding;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.io.RawComparator;
+
+/**
+ * Encoding of KeyValue. It aims to be fast and efficient using assumptions:
+ * <ul>
+ * <li>the KeyValues are stored sorted by key</li>
+ * <li>we know the structure of KeyValue</li>
+ * <li>the values are always iterated forward from beginning of block</li>
+ * <li>knowledge of Key Value format</li>
+ * </ul>
+ * It is designed to work fast enough to be feasible as in memory compression.
+ */
+public interface DataBlockEncoder {
+  /**
+   * Compress KeyValues and write them to output buffer.
+   * @param out Where to write compressed data.
+   * @param in Source of KeyValue for compression.
+   * @param includesMemstoreTS true if including memstore timestamp after every
+   *          key-value pair
+   * @throws IOException If there is an error writing to output stream.
+   */
+  public void compressKeyValues(DataOutputStream out,
+      ByteBuffer in, boolean includesMemstoreTS) throws IOException;
+
+  /**
+   * Uncompress.
+   * @param source Compressed stream of KeyValues.
+   * @param includesMemstoreTS true if including memstore timestamp after every
+   *          key-value pair
+   * @return Uncompressed block of KeyValues.
+   * @throws IOException If there is an error in source.
+   */
+  public ByteBuffer uncompressKeyValues(DataInputStream source,
+      boolean includesMemstoreTS) throws IOException;
+
+  /**
+   * Uncompress.
+   * @param source Compressed stream of KeyValues.
+   * @param allocateHeaderLength allocate this many bytes for the header.
+   * @param skipLastBytes Do not copy n last bytes.
+   * @param includesMemstoreTS true if including memstore timestamp after every
+   *          key-value pair
+   * @return Uncompressed block of KeyValues.
+   * @throws IOException If there is an error in source.
+   */
+  public ByteBuffer uncompressKeyValues(DataInputStream source,
+      int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
+      throws IOException;
+
+  /**
+   * Return first key in block. Useful for indexing. Typically does not make
+   * a deep copy but returns a buffer wrapping a segment of the actual block's
+   * byte array. This is because the first key in block is usually stored
+   * unencoded.
+   * @param block encoded block we want index, the position will not change
+   * @return First key in block.
+   */
+  public ByteBuffer getFirstKeyInBlock(ByteBuffer block);
+
+  /**
+   * Create a HFileBlock seeker which find KeyValues within a block.
+   * @param comparator what kind of comparison should be used
+   * @param includesMemstoreTS true if including memstore timestamp after every
+   *          key-value pair
+   * @return A newly created seeker.
+   */
+  public EncodedSeeker createSeeker(RawComparator<byte[]> comparator,
+      boolean includesMemstoreTS);
+
+  /**
+   * An interface which enable to seek while underlying data is encoded.
+   *
+   * It works on one HFileBlock, but it is reusable. See
+   * {@link #setCurrentBuffer(ByteBuffer)}.
+   */
+  public static interface EncodedSeeker {
+    /**
+     * Set on which buffer there will be done seeking.
+     * @param buffer Used for seeking.
+     */
+    public void setCurrentBuffer(ByteBuffer buffer);
+
+    /**
+     * Does a deep copy of the key at the current position. A deep copy is
+     * necessary because buffers are reused in the decoder.
+     * @return key at current position
+     */
+    public ByteBuffer getKeyDeepCopy();
+
+    /**
+     * Does a shallow copy of the value at the current position. A shallow
+     * copy is possible because the returned buffer refers to the backing array
+     * of the original encoded buffer.
+     * @return value at current position
+     */
+    public ByteBuffer getValueShallowCopy();
+
+    /** @return key value at current position. */
+    public ByteBuffer getKeyValueBuffer();
+
+    /**
+     * @return the KeyValue object at the current position. Includes memstore
+     *         timestamp.
+     */
+    public KeyValue getKeyValue();
+
+    /** Set position to beginning of given block */
+    public void rewind();
+
+    /**
+     * Move to next position
+     * @return true on success, false if there is no more positions.
+     */
+    public boolean next();
+
+    /**
+     * Moves the seeker position within the current block to:
+     * <ul>
+     * <li>the last key that that is less than or equal to the given key if
+     * <code>seekBefore</code> is false</li>
+     * <li>the last key that is strictly less than the given key if <code>
+     * seekBefore</code> is true. The caller is responsible for loading the
+     * previous block if the requested key turns out to be the first key of the
+     * current block.</li>
+     * </ul>
+     * @param key byte array containing the key
+     * @param offset key position the array
+     * @param length key length in bytes
+     * @param seekBefore find the key strictly less than the given key in case
+     *          of an exact match. Does not matter in case of an inexact match.
+     * @return 0 on exact match, 1 on inexact match.
+     */
+    public int seekToKeyInBlock(byte[] key, int offset, int length,
+        boolean seekBefore);
+  }
+}

Added: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java?rev=1245291&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java (added)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoding.java Fri Feb 17 01:56:33 2012
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.io.encoding;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Provide access to all data block encoding algorithms. All of the algorithms
+ * are required to have unique id which should <b>NEVER</b> be changed. If you
+ * want to add a new algorithm/version, assign it a new id. Announce the new id
+ * in the HBase mailing list to prevent collisions.
+ */
+public enum DataBlockEncoding {
+
+  /** Disable data block encoding. */
+  NONE(0, null),
+  // id 1 is reserved for the BITSET algorithm to be added later
+  PREFIX(2, new PrefixKeyDeltaEncoder()),
+  DIFF(3, new DiffKeyDeltaEncoder()),
+  FAST_DIFF(4, new FastDiffDeltaEncoder());
+
+  private final short id;
+  private final byte[] idInBytes;
+  private final DataBlockEncoder encoder;
+
+  public static final int ID_SIZE = Bytes.SIZEOF_SHORT;
+
+  /** Maps data block encoding ids to enum instances. */
+  private static Map<Short, DataBlockEncoding> idToEncoding =
+      new HashMap<Short, DataBlockEncoding>();
+
+  static {
+    for (DataBlockEncoding algo : values()) {
+      if (idToEncoding.containsKey(algo.id)) {
+        throw new RuntimeException(String.format(
+            "Two data block encoder algorithms '%s' and '%s' have " +
+            "the same id %d",
+            idToEncoding.get(algo.id).toString(), algo.toString(),
+            (int) algo.id));
+      }
+      idToEncoding.put(algo.id, algo);
+    }
+  }
+
+  private DataBlockEncoding(int id, DataBlockEncoder encoder) {
+    if (id < Short.MIN_VALUE || id > Short.MAX_VALUE) {
+      throw new AssertionError(
+          "Data block encoding algorithm id is out of range: " + id);
+    }
+    this.id = (short) id;
+    this.idInBytes = Bytes.toBytes(this.id);
+    if (idInBytes.length != ID_SIZE) {
+      // White this may seem redundant, if we accidentally serialize
+      // the id as e.g. an int instead of a short, all encoders will break.
+      throw new RuntimeException("Unexpected length of encoder ID byte " +
+          "representation: " + Bytes.toStringBinary(idInBytes));
+    }
+    this.encoder = encoder;
+  }
+
+  /**
+   * @return name converted to bytes.
+   */
+  public byte[] getNameInBytes() {
+    return Bytes.toBytes(toString());
+  }
+
+  /**
+   * @return The id of a data block encoder.
+   */
+  public short getId() {
+    return id;
+  }
+
+  /**
+   * Writes id in bytes.
+   * @param stream where the id should be written.
+   */
+  public void writeIdInBytes(OutputStream stream) throws IOException {
+    stream.write(idInBytes);
+  }
+
+  /**
+   * Return new data block encoder for given algorithm type.
+   * @return data block encoder if algorithm is specified, null if none is
+   *         selected.
+   */
+  public DataBlockEncoder getEncoder() {
+    return encoder;
+  }
+
+  /**
+   * Provide access to all data block encoders, even those which are not
+   * exposed in the enum. Useful for testing and benchmarking.
+   * @return list of all data block encoders.
+   */
+  public static List<DataBlockEncoder> getAllEncoders() {
+    ArrayList<DataBlockEncoder> encoders = new ArrayList<DataBlockEncoder>();
+    for (DataBlockEncoding algo : values()) {
+      DataBlockEncoder encoder = algo.getEncoder();
+      if (encoder != null) {
+        encoders.add(encoder);
+      }
+    }
+
+    // Add encoders that are only used in testing.
+    encoders.add(new CopyKeyDataBlockEncoder());
+    return encoders;
+  }
+
+  /**
+   * Find and create data block encoder for given id;
+   * @param encoderId id of data block encoder.
+   * @return Newly created data block encoder.
+   */
+  public static DataBlockEncoder getDataBlockEncoderById(short encoderId) {
+    if (!idToEncoding.containsKey(encoderId)) {
+      throw new IllegalArgumentException(String.format(
+          "There is no data block encoder for given id '%d'",
+          (int) encoderId));
+    }
+
+    return idToEncoding.get(encoderId).getEncoder();
+  }
+
+  /**
+   * Find and return the name of data block encoder for the given id.
+   * @param encoderId id of data block encoder
+   * @return name, same as used in options in column family
+   */
+  public static String getNameFromId(short encoderId) {
+    return idToEncoding.get(encoderId).toString();
+  }
+
+  /**
+   * Check if given encoder has this id.
+   * @param encoder encoder which id will be checked
+   * @param encoderId id which we except
+   * @return true if id is right for given encoder, false otherwise
+   * @exception IllegalArgumentException
+   *            thrown when there is no matching data block encoder
+   */
+  public static boolean isCorrectEncoder(DataBlockEncoder encoder,
+      short encoderId) {
+    if (!idToEncoding.containsKey(encoderId)) {
+      throw new IllegalArgumentException(String.format(
+          "There is no data block encoder for given id '%d'",
+          (int) encoderId));
+    }
+
+    DataBlockEncoding algorithm = idToEncoding.get(encoderId);
+    return algorithm.getClass().equals(encoder.getClass());
+  }
+
+  public static DataBlockEncoding getEncodingById(short dataBlockEncodingId) {
+    return idToEncoding.get(dataBlockEncodingId);
+  }
+
+}



Mime
View raw message