hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r795230 [1/2] - in /hadoop/hbase/trunk/src: java/org/apache/hadoop/hbase/ java/org/apache/hadoop/hbase/io/ java/org/apache/hadoop/hbase/migration/ java/org/apache/hadoop/hbase/migration/nineteen/ java/org/apache/hadoop/hbase/migration/ninet...
Date Fri, 17 Jul 2009 21:23:19 GMT
Author: stack
Date: Fri Jul 17 21:23:11 2009
New Revision: 795230

URL: http://svn.apache.org/viewvc?rev=795230&view=rev
Log:
HBASE-1215 Migration.  Pull in classes I need to read old file types.  Removed old classes no longer needed from io package (e.g. SequenceFile and MapFile)

Added:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreKey.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/BloomFilterMapFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HBaseMapFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HalfMapFileReader.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/Reference.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/BloomFilter.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/package-info.java
Removed:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BlockFSInputStream.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/DataOutputBuffer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/MapFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/SequenceFile.java
Modified:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HConstants.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Store.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Migrate.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/MigrationTest.java

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HConstants.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HConstants.java Fri Jul 17 21:23:11 2009
@@ -49,8 +49,8 @@
    * Version 6 enables blockcaching on catalog tables.
    * Version 7 introduces hfile -- hbase 0.19 to 0.20..
    */
-  public static final String FILE_SYSTEM_VERSION = "6";
-  // public static final String FILE_SYSTEM_VERSION = "7";
+  // public static final String FILE_SYSTEM_VERSION = "6";
+  public static final String FILE_SYSTEM_VERSION = "7";
   
   // Configuration parameters
   

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreKey.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreKey.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreKey.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/HStoreKey.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,738 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen;
+
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.ColumnNameParseException;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+/**
+ * A Key for a stored row.
+ */
+public class HStoreKey implements WritableComparable<HStoreKey>, HeapSize {
+  /**
+   * Colon character in UTF-8
+   */
+  public static final char COLUMN_FAMILY_DELIMITER = ':';
+  
+  private byte [] row = HConstants.EMPTY_BYTE_ARRAY;
+  private byte [] column = HConstants.EMPTY_BYTE_ARRAY;
+  private long timestamp = Long.MAX_VALUE;
+
+  /*
+   * regionInfo is only used as a hack to compare HSKs.
+   * It is not serialized.  See https://issues.apache.org/jira/browse/HBASE-832
+   */
+  private HRegionInfo regionInfo = null;
+  
+  /**
+   * Estimated size tax paid for each instance of HSK.  Estimate based on
+   * study of jhat and jprofiler numbers.
+   */
+  // In jprofiler, says shallow size is 48 bytes.  Add to it cost of two
+  // byte arrays and then something for the HRI hosting.
+  public static final int ESTIMATED_HEAP_TAX = 48;
+
+  /** Default constructor used in conjunction with Writable interface */
+  public HStoreKey() {
+    super();
+  }
+  
+  /**
+   * Create an HStoreKey specifying only the row
+   * The column defaults to the empty string, the time stamp defaults to
+   * Long.MAX_VALUE and the table defaults to empty string
+   * 
+   * @param row - row key
+   */
+  public HStoreKey(final byte [] row) {
+    this(row, Long.MAX_VALUE);
+  }
+
+  /**
+   * Create an HStoreKey specifying only the row
+   * The column defaults to the empty string, the time stamp defaults to
+   * Long.MAX_VALUE and the table defaults to empty string
+   * 
+   * @param row - row key
+   */
+  public HStoreKey(final String row) {
+    this(row, Long.MAX_VALUE);
+  }
+
+  /**
+   * Create an HStoreKey specifying the row and timestamp
+   * The column and table names default to the empty string
+   * 
+   * @param row row key
+   * @param hri
+   */
+  public HStoreKey(final byte [] row, final HRegionInfo hri) {
+    this(row, HConstants.EMPTY_BYTE_ARRAY, hri);
+  }
+ 
+  /**
+   * Create an HStoreKey specifying the row and timestamp
+   * The column and table names default to the empty string
+   * 
+   * @param row row key
+   * @param timestamp timestamp value
+   * @param hri HRegionInfo
+   */
+  public HStoreKey(final byte [] row, long timestamp, final HRegionInfo hri) {
+    this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp, hri);
+  }
+
+  /**
+   * Create an HStoreKey specifying the row and timestamp
+   * The column and table names default to the empty string
+   * 
+   * @param row row key
+   * @param timestamp timestamp value
+   */
+  public HStoreKey(final byte [] row, long timestamp) {
+    this(row, HConstants.EMPTY_BYTE_ARRAY, timestamp);
+  }
+
+  /**
+   * Create an HStoreKey specifying the row and timestamp
+   * The column and table names default to the empty string
+   * 
+   * @param row row key
+   * @param timestamp timestamp value
+   */
+  public HStoreKey(final String row, long timestamp) {
+    this (row, "", timestamp, new HRegionInfo());
+  }
+
+  /**
+   * Create an HStoreKey specifying the row and column names
+   * The timestamp defaults to LATEST_TIMESTAMP
+   * and table name defaults to the empty string
+   * 
+   * @param row row key
+   * @param column column key
+   */
+  public HStoreKey(final String row, final String column) {
+    this(row, column, HConstants.LATEST_TIMESTAMP, new HRegionInfo());
+  }
+
+  /**
+   * Create an HStoreKey specifying the row and column names
+   * The timestamp defaults to LATEST_TIMESTAMP
+   * and table name defaults to the empty string
+   * 
+   * @param row row key
+   * @param column column key
+   */
+  public HStoreKey(final byte [] row, final byte [] column) {
+    this(row, column, HConstants.LATEST_TIMESTAMP);
+  }
+  
+  /**
+   * Create an HStoreKey specifying the row, column names and table name
+   * The timestamp defaults to LATEST_TIMESTAMP
+   * 
+   * @param row row key
+   * @param column column key
+   * @param regionInfo region info
+   */
+  public HStoreKey(final byte [] row, 
+      final byte [] column, final HRegionInfo regionInfo) {
+    this(row, column, HConstants.LATEST_TIMESTAMP, regionInfo);
+  }
+
+  /**
+   * Create an HStoreKey specifying all the fields
+   * Does not make copies of the passed byte arrays. Presumes the passed 
+   * arrays immutable.
+   * @param row row key
+   * @param column column key
+   * @param timestamp timestamp value
+   * @param regionInfo region info
+   */
+  public HStoreKey(final String row, 
+      final String column, long timestamp, final HRegionInfo regionInfo) {
+    this (Bytes.toBytes(row), Bytes.toBytes(column), 
+        timestamp, regionInfo);
+  }
+
+  /**
+   * Create an HStoreKey specifying all the fields with unspecified table
+   * Does not make copies of the passed byte arrays. Presumes the passed 
+   * arrays immutable.
+   * @param row row key
+   * @param column column key
+   * @param timestamp timestamp value
+   */
+  public HStoreKey(final byte [] row, final byte [] column, long timestamp) {
+    this(row, column, timestamp, null);
+  }
+  
+  /**
+   * Create an HStoreKey specifying all the fields with specified table
+   * Does not make copies of the passed byte arrays. Presumes the passed 
+   * arrays immutable.
+   * @param row row key
+   * @param column column key
+   * @param timestamp timestamp value
+   * @param regionInfo region info
+   */
+  public HStoreKey(final byte [] row, 
+      final byte [] column, long timestamp, final HRegionInfo regionInfo) {
+    // Make copies
+    this.row = row;
+    this.column = column;
+    this.timestamp = timestamp;
+    this.regionInfo = regionInfo;
+  }
+
+  /**
+   * Constructs a new HStoreKey from another
+   * 
+   * @param other the source key
+   */
+  public HStoreKey(HStoreKey other) {
+    this(other.getRow(), other.getColumn(), other.getTimestamp(),
+      other.getHRegionInfo());
+  }
+  
+  /**
+   * Change the value of the row key
+   * 
+   * @param newrow new row key value
+   */
+  public void setRow(byte [] newrow) {
+    this.row = newrow;
+  }
+  
+  /**
+   * Change the value of the column in this key
+   * 
+   * @param c new column family value
+   */
+  public void setColumn(byte [] c) {
+    this.column = c;
+  }
+
+  /**
+   * Change the value of the timestamp field
+   * 
+   * @param timestamp new timestamp value
+   */
+  public void setVersion(long timestamp) {
+    this.timestamp = timestamp;
+  }
+  
+  /**
+   * Set the value of this HStoreKey from the supplied key
+   * 
+   * @param k key value to copy
+   */
+  public void set(HStoreKey k) {
+    this.row = k.getRow();
+    this.column = k.getColumn();
+    this.timestamp = k.getTimestamp();
+  }
+  
+  /** @return value of row key */
+  public byte [] getRow() {
+    return row;
+  }
+  
+  /** @return value of column */
+  public byte [] getColumn() {
+    return this.column;
+  }
+
+  /** @return value of timestamp */
+  public long getTimestamp() {
+    return this.timestamp;
+  }
+  
+  /** @return value of regioninfo */
+  public HRegionInfo getHRegionInfo() {
+    return this.regionInfo;
+  }
+  
+  /**
+   * @param hri
+   */
+  public void setHRegionInfo(final HRegionInfo hri) {
+    this.regionInfo = hri;
+  }
+  
+  /**
+   * Compares the row and column of two keys
+   * @param other Key to compare against. Compares row and column.
+   * @return True if same row and column.
+   * @see #matchesWithoutColumn(HStoreKey)
+   * @see #matchesRowFamily(HStoreKey)
+   */ 
+  public boolean matchesRowCol(HStoreKey other) {
+    return HStoreKey.equalsTwoRowKeys(getHRegionInfo(), getRow(), other.getRow()) &&
+      Bytes.equals(getColumn(), other.getColumn());
+  }
+  
+  /**
+   * Compares the row and timestamp of two keys
+   * 
+   * @param other Key to copmare against. Compares row and timestamp.
+   * 
+   * @return True if same row and timestamp is greater than <code>other</code>
+   * @see #matchesRowCol(HStoreKey)
+   * @see #matchesRowFamily(HStoreKey)
+   */
+  public boolean matchesWithoutColumn(HStoreKey other) {
+    return equalsTwoRowKeys(getHRegionInfo(), getRow(), other.getRow()) &&
+      getTimestamp() >= other.getTimestamp();
+  }
+  
+  /**
+   * Compares the row and column family of two keys
+   * 
+   * @param that Key to compare against. Compares row and column family
+   * 
+   * @return true if same row and column family
+   * @see #matchesRowCol(HStoreKey)
+   * @see #matchesWithoutColumn(HStoreKey)
+   */
+  public boolean matchesRowFamily(HStoreKey that) {
+    int delimiterIndex = getFamilyDelimiterIndex(getColumn());
+    return equalsTwoRowKeys(getHRegionInfo(), getRow(), that.getRow()) &&
+      Bytes.compareTo(getColumn(), 0, delimiterIndex, that.getColumn(), 0,
+        delimiterIndex) == 0;
+  }
+  
+  @Override
+  public String toString() {
+    return Bytes.toString(this.row) + "/" + Bytes.toString(this.column) + "/" +
+      timestamp;
+  }
+  
+  @Override
+  public boolean equals(Object obj) {
+    HStoreKey other = (HStoreKey)obj;
+    // Do a quick check.
+    if (this.row.length != other.row.length ||
+        this.column.length != other.column.length ||
+        this.timestamp != other.timestamp) {
+      return false;
+    }
+    return compareTo(other) == 0;
+  }
+  
+  @Override
+  public int hashCode() {
+    int result = Bytes.hashCode(getRow());
+    result ^= Bytes.hashCode(getColumn());
+    result ^= getTimestamp();
+    return result;
+  }
+
+  // Comparable
+
+  public int compareTo(final HStoreKey o) {
+    return compareTo(this.regionInfo, this, o);
+  }
+  
+  static int compareTo(final HRegionInfo hri, final HStoreKey left,
+      final HStoreKey right) {
+    // We can be passed null
+    if (left == null && right == null) return 0;
+    if (left == null) return -1;
+    if (right == null) return 1;
+    
+    int result = compareTwoRowKeys(hri, left.getRow(), right.getRow());
+    if (result != 0) {
+      return result;
+    }
+    result = left.getColumn() == null && right.getColumn() == null? 0:
+      left.getColumn() == null && right.getColumn() != null? -1:
+        left.getColumn() != null && right.getColumn() == null? 1:
+      Bytes.compareTo(left.getColumn(), right.getColumn());
+    if (result != 0) {
+      return result;
+    }
+    // The below older timestamps sorting ahead of newer timestamps looks
+    // wrong but it is intentional. This way, newer timestamps are first
+    // found when we iterate over a memcache and newer versions are the
+    // first we trip over when reading from a store file.
+    if (left.getTimestamp() < right.getTimestamp()) {
+      result = 1;
+    } else if (left.getTimestamp() > right.getTimestamp()) {
+      result = -1;
+    }
+    // Because of HBASE-877, our BeforeThisStoreKey trick no longer works in
+    // mapfiles and so instead we need to do this weird check here below.
+    return result == 0 && left instanceof BeforeThisStoreKey? -1:
+      result == 0 && right instanceof BeforeThisStoreKey? 1:
+      result;
+  }
+
+  /**
+   * @param column
+   * @return New byte array that holds <code>column</code> family prefix only
+   * (Does not include the colon DELIMITER).
+   * @throws ColumnNameParseException 
+   * @see #parseColumn(byte[])
+   */
+  public static byte [] getFamily(final byte [] column)
+  throws ColumnNameParseException {
+    int index = getFamilyDelimiterIndex(column);
+    if (index <= 0) {
+      throw new ColumnNameParseException("Missing ':' delimiter between " +
+        "column family and qualifier in the passed column name <" +
+        Bytes.toString(column) + ">");
+    }
+    byte [] result = new byte[index];
+    System.arraycopy(column, 0, result, 0, index);
+    return result;
+  }
+  
+  /**
+   * @param column
+   * @return Return hash of family portion of passed column.
+   */
+  public static Integer getFamilyMapKey(final byte [] column) {
+    int index = getFamilyDelimiterIndex(column);
+    // If index < -1, presume passed column is a family name absent colon
+    // delimiter
+    return Bytes.mapKey(column, index > 0? index: column.length);
+  }
+  
+  /**
+   * @param family
+   * @param column
+   * @return True if <code>column</code> has a family of <code>family</code>.
+   */
+  public static boolean matchingFamily(final byte [] family,
+      final byte [] column) {
+    // Make sure index of the ':' is at same offset.
+    int index = getFamilyDelimiterIndex(column);
+    if (index != family.length) {
+      return false;
+    }
+    return Bytes.compareTo(family, 0, index, column, 0, index) == 0;
+  }
+  
+  /**
+   * @param family
+   * @return Return <code>family</code> plus the family delimiter.
+   */
+  public static byte [] addDelimiter(final byte [] family) {
+    // Manufacture key by adding delimiter to the passed in colFamily.
+    byte [] familyPlusDelimiter = new byte [family.length + 1];
+    System.arraycopy(family, 0, familyPlusDelimiter, 0, family.length);
+    familyPlusDelimiter[family.length] = HStoreKey.COLUMN_FAMILY_DELIMITER;
+    return familyPlusDelimiter;
+  }
+
+  /**
+   * @param column
+   * @return New byte array that holds <code>column</code> qualifier suffix.
+   * @see #parseColumn(byte[])
+   */
+  public static byte [] getQualifier(final byte [] column) {
+    int index = getFamilyDelimiterIndex(column);
+    int len = column.length - (index + 1);
+    byte [] result = new byte[len];
+    System.arraycopy(column, index + 1, result, 0, len);
+    return result;
+  }
+
+  /**
+   * @param c Column name
+   * @return Return array of size two whose first element has the family
+   * prefix of passed column <code>c</code> and whose second element is the
+   * column qualifier.
+   * @throws ColumnNameParseException 
+   */
+  public static byte [][] parseColumn(final byte [] c)
+  throws ColumnNameParseException {
+    byte [][] result = new byte [2][];
+    int index = getFamilyDelimiterIndex(c);
+    if (index == -1) {
+      throw new ColumnNameParseException("Impossible column name: " + c);
+    }
+    result[0] = new byte [index];
+    System.arraycopy(c, 0, result[0], 0, index);
+    int len = c.length - (index + 1);
+    result[1] = new byte[len];
+    System.arraycopy(c, index + 1 /*Skip delimiter*/, result[1], 0,
+      len);
+    return result;
+  }
+  
+  /**
+   * @param b
+   * @return Index of the family-qualifier colon delimiter character in passed
+   * buffer.
+   */
+  public static int getFamilyDelimiterIndex(final byte [] b) {
+    if (b == null) {
+      throw new NullPointerException();
+    }
+    int result = -1;
+    for (int i = 0; i < b.length; i++) {
+      if (b[i] == COLUMN_FAMILY_DELIMITER) {
+        result = i;
+        break;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Returns row and column bytes out of an HStoreKey.
+   * @param hsk Store key.
+   * @return byte array encoding of HStoreKey
+   */
+  public static byte[] getBytes(final HStoreKey hsk) {
+    return Bytes.add(hsk.getRow(), hsk.getColumn());
+  }
+  
+  /**
+   * Utility method to compare two row keys.
+   * This is required because of the meta delimiters.
+   * This is a hack.
+   * @param regionInfo
+   * @param rowA
+   * @param rowB
+   * @return value of the comparison
+   */
+  public static int compareTwoRowKeys(HRegionInfo regionInfo, 
+      byte[] rowA, byte[] rowB) {
+    if (regionInfo != null && regionInfo.isMetaRegion()) {
+      byte[][] keysA = stripStartKeyMeta(rowA);
+      byte[][] KeysB = stripStartKeyMeta(rowB);
+      int rowCompare = Bytes.compareTo(keysA[0], KeysB[0]);
+      if(rowCompare == 0)
+        rowCompare = Bytes.compareTo(keysA[1], KeysB[1]);
+      return rowCompare;
+    }
+    return Bytes.compareTo(rowA, rowB);
+  }
+  
+  /**
+   * Utility method to check if two row keys are equal.
+   * This is required because of the meta delimiters
+   * This is a hack
+   * @param regionInfo
+   * @param rowA
+   * @param rowB
+   * @return if it's equal
+   */
+  public static boolean equalsTwoRowKeys(HRegionInfo regionInfo, 
+      byte[] rowA, byte[] rowB) {
+    return ((rowA == null) && (rowB == null)) ? true:
+      (rowA == null) || (rowB == null) || (rowA.length != rowB.length) ? false:
+        compareTwoRowKeys(regionInfo,rowA,rowB) == 0;
+  }
+  
+  private static byte[][] stripStartKeyMeta(byte[] rowKey) {
+    int offset = -1;
+    for (int i = rowKey.length - 1; i > 0; i--) {
+      if (rowKey[i] == HConstants.META_ROW_DELIMITER) {
+        offset = i;
+        break;
+      }
+    }
+    byte [] row = rowKey;
+    byte [] timestamp = HConstants.EMPTY_BYTE_ARRAY;
+    if (offset != -1) {
+      row = new byte[offset];
+      System.arraycopy(rowKey, 0, row, 0, offset);
+      timestamp = new byte[rowKey.length - offset - 1];
+      System.arraycopy(rowKey, offset+1, timestamp, 0,rowKey.length - offset - 1);
+    }
+    byte[][] elements = new byte[2][];
+    elements[0] = row;
+    elements[1] = timestamp;
+    return elements;
+  }
+  
+  // Writable
+
+  public void write(DataOutput out) throws IOException {
+    Bytes.writeByteArray(out, this.row);
+    Bytes.writeByteArray(out, this.column);
+    out.writeLong(timestamp);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    this.row = Bytes.readByteArray(in);
+    this.column = Bytes.readByteArray(in);
+    this.timestamp = in.readLong();
+  }
+
+  public long heapSize() {
+    return getRow().length + Bytes.ESTIMATED_HEAP_TAX +
+      getColumn().length + Bytes.ESTIMATED_HEAP_TAX +
+      ESTIMATED_HEAP_TAX;
+  }
+
+  /**
+   * Passed as comparator for memcache and for store files.  See HBASE-868.
+   */
+  public static class HStoreKeyWritableComparator extends WritableComparator {
+    private final HRegionInfo hri;
+    
+    /** @param hri */
+    public HStoreKeyWritableComparator(final HRegionInfo hri) {
+      super(HStoreKey.class);
+      this.hri = hri;
+    }
+    
+    @SuppressWarnings("unchecked")
+    @Override
+    public int compare(final WritableComparable left,
+        final WritableComparable right) {
+      return compareTo(this.hri, (HStoreKey)left, (HStoreKey)right);
+    }
+  }
+  
+  /**
+   * Pass this class into {@link org.apache.hadoop.io.MapFile}.getClosest when
+   * searching for the key that comes BEFORE this one but NOT this one.  This
+   * class will return > 0 when asked to compare against itself rather than 0.
+   * This is a hack for case where getClosest returns a deleted key and we want
+   * to get the previous.  Can't unless use use this class; it'll just keep
+   * returning us the deleted key (getClosest gets exact or nearest before when
+   * you pass true argument).  TODO: Throw this class away when MapFile has
+   * a real 'previous' method.  See HBASE-751.
+   */
+  public static class BeforeThisStoreKey extends HStoreKey {
+    private final HStoreKey beforeThisKey;
+
+    /**
+     * @param beforeThisKey 
+     */
+    public BeforeThisStoreKey(final HStoreKey beforeThisKey) {
+      super();
+      this.beforeThisKey = beforeThisKey;
+    }
+    
+    @Override
+    public int compareTo(final HStoreKey o) {
+      int result = this.beforeThisKey.compareTo(o);
+      return result == 0? -1: result;
+    }
+    
+    @Override
+    public boolean equals(Object obj) {
+      return false;
+    }
+
+    @Override
+    public byte[] getColumn() {
+      return this.beforeThisKey.getColumn();
+    }
+
+    @Override
+    public byte[] getRow() {
+      return this.beforeThisKey.getRow();
+    }
+
+    @Override
+    public long heapSize() {
+      return this.beforeThisKey.heapSize();
+    }
+
+    @Override
+    public long getTimestamp() {
+      return this.beforeThisKey.getTimestamp();
+    }
+
+    @Override
+    public int hashCode() {
+      return this.beforeThisKey.hashCode();
+    }
+
+    @Override
+    public boolean matchesRowCol(HStoreKey other) {
+      return this.beforeThisKey.matchesRowCol(other);
+    }
+
+    @Override
+    public boolean matchesRowFamily(HStoreKey that) {
+      return this.beforeThisKey.matchesRowFamily(that);
+    }
+
+    @Override
+    public boolean matchesWithoutColumn(HStoreKey other) {
+      return this.beforeThisKey.matchesWithoutColumn(other);
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      this.beforeThisKey.readFields(in);
+    }
+
+    @Override
+    public void set(HStoreKey k) {
+      this.beforeThisKey.set(k);
+    }
+
+    @Override
+    public void setColumn(byte[] c) {
+      this.beforeThisKey.setColumn(c);
+    }
+
+    @Override
+    public void setRow(byte[] newrow) {
+      this.beforeThisKey.setRow(newrow);
+    }
+
+    @Override
+    public void setVersion(long timestamp) {
+      this.beforeThisKey.setVersion(timestamp);
+    }
+
+    @Override
+    public String toString() {
+      return this.beforeThisKey.toString();
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      this.beforeThisKey.write(out);
+    }
+    
+    @Override
+    public HRegionInfo getHRegionInfo() {
+      return this.beforeThisKey.getHRegionInfo();
+    }
+    
+    @Override
+    public void setHRegionInfo(final HRegionInfo hri) {
+      this.beforeThisKey.setHRegionInfo(hri);
+    }
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/BloomFilterMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/BloomFilterMapFile.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/BloomFilterMapFile.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/BloomFilterMapFile.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,249 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.io;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.util.Hash;
+import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
+import org.apache.hadoop.hbase.migration.nineteen.onelab.filter.BloomFilter;
+import org.apache.hadoop.hbase.migration.nineteen.onelab.filter.Key;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * On write, all keys are added to a bloom filter.  On read, all keys are
+ * tested first against bloom filter. Keys are HStoreKey.  If passed bloom
+ * filter is null, just passes invocation to parent.
+ */
+// TODO should be fixed generic warnings from MapFile methods
+@SuppressWarnings("unchecked")
+public class BloomFilterMapFile extends HBaseMapFile {
+  @SuppressWarnings("hiding")
+  static final Log LOG = LogFactory.getLog(BloomFilterMapFile.class);
+  protected static final String BLOOMFILTER_FILE_NAME = "filter";
+
+  public static class Reader extends HBaseReader {
+    private final BloomFilter bloomFilter;
+
+    /**
+     * @param fs
+     * @param dirName
+     * @param conf
+     * @param filter
+     * @param blockCacheEnabled
+     * @param hri
+     * @throws IOException
+     */
+    public Reader(FileSystem fs, String dirName, Configuration conf,
+        final boolean filter, final boolean blockCacheEnabled, 
+        HRegionInfo hri)
+    throws IOException {
+      super(fs, dirName, conf, blockCacheEnabled, hri);
+      if (filter) {
+        this.bloomFilter = loadBloomFilter(fs, dirName);
+      } else {
+        this.bloomFilter = null;
+      }
+    }
+
+    private BloomFilter loadBloomFilter(FileSystem fs, String dirName)
+    throws IOException {
+      Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME);
+      if(!fs.exists(filterFile)) {
+        LOG.warn("FileNotFound: " + filterFile + "; proceeding without");
+        return null;
+      }
+      BloomFilter filter = new BloomFilter();
+      FSDataInputStream in = fs.open(filterFile);
+      try {
+        filter.readFields(in);
+      } finally {
+        in.close();
+      }
+      return filter;
+    }
+    
+    @Override
+    public Writable get(WritableComparable key, Writable val)
+    throws IOException {
+      if (bloomFilter == null) {
+        return super.get(key, val);
+      }
+      if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("bloom filter reported that key exists");
+        }
+        return super.get(key, val);
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("bloom filter reported that key does not exist");
+      }
+      return null;
+    }
+
+    @Override
+    public WritableComparable getClosest(WritableComparable key,
+        Writable val) throws IOException {
+      if (bloomFilter == null) {
+        return super.getClosest(key, val);
+      }
+      // Note - the key being passed to us is always a HStoreKey
+      if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("bloom filter reported that key exists");
+        }
+        return super.getClosest(key, val);
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("bloom filter reported that key does not exist");
+      }
+      return null;
+    }
+    
+    /**
+     * @return size of the bloom filter
+     */
+   public int getBloomFilterSize() {
+      return bloomFilter == null ? 0 : bloomFilter.getVectorSize();
+    }
+  }
+  
+  public static class Writer extends HBaseWriter {
+    private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0;
+    private final BloomFilter bloomFilter;
+    private final String dirName;
+    private final FileSystem fs;
+    
+    /**
+     * @param conf
+     * @param fs
+     * @param dirName
+     * @param compression
+     * @param filter
+     * @param nrows
+     * @param hri
+     * @throws IOException
+     */
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+      SequenceFile.CompressionType compression, final boolean filter,
+      int nrows, final HRegionInfo hri)
+    throws IOException {
+      super(conf, fs, dirName, compression, hri);
+      this.dirName = dirName;
+      this.fs = fs;
+      if (filter) {
+        /* 
+         * There is no way to automatically determine the vector size and the
+         * number of hash functions to use. In particular, bloom filters are
+         * very sensitive to the number of elements inserted into them. For
+         * HBase, the number of entries depends on the size of the data stored
+         * in the column. Currently the default region size is 256MB, so the
+         * number of entries is approximately 
+         * 256MB / (average value size for column).
+         * 
+         * If m denotes the number of bits in the Bloom filter (vectorSize),
+         * n denotes the number of elements inserted into the Bloom filter and
+         * k represents the number of hash functions used (nbHash), then
+         * according to Broder and Mitzenmacher,
+         * 
+         * ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf )
+         * 
+         * the probability of false positives is minimized when k is
+         * approximately m/n ln(2).
+         * 
+         * If we fix the number of hash functions and know the number of
+         * entries, then the optimal vector size m = (k * n) / ln(2)
+         */
+        BloomFilter f = null;
+        try {
+          f  = new BloomFilter(
+            (int) Math.ceil(
+                (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
+                Math.log(2.0)),
+            (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
+            Hash.getHashType(conf)
+          );
+        } catch (IllegalArgumentException e) {
+          LOG.warn("Failed creating bloomfilter; proceeding without", e);
+        }
+        this.bloomFilter = f;
+      } else {
+        this.bloomFilter = null;
+      }
+    }
+
+    @Override
+    public void append(WritableComparable key, Writable val)
+    throws IOException {
+      if (bloomFilter != null) {
+        bloomFilter.add(getBloomFilterKey(key));
+      }
+      super.append(key, val);
+    }
+
+    @Override
+    public synchronized void close() throws IOException {
+      super.close();
+      if (this.bloomFilter != null) {
+        flushBloomFilter();
+      }
+    }
+    
+    /**
+     * Flushes bloom filter to disk
+     * 
+     * @throws IOException
+     */
+    private void flushBloomFilter() throws IOException {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("flushing bloom filter for " + this.dirName);
+      }
+      FSDataOutputStream out =
+        fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME));
+      try {
+        bloomFilter.write(out);
+      } finally {
+        out.close();
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("flushed bloom filter for " + this.dirName);
+      }
+    }
+  }
+
+  /**
+   * Custom bloom filter key maker.
+   * @param key
+   * @return Key made of bytes of row only.
+   */
+  protected static Key getBloomFilterKey(WritableComparable key) {
+    return new Key(((HStoreKey) key).getRow());
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HBaseMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HBaseMapFile.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HBaseMapFile.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HBaseMapFile.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,112 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * HBase customizations of MapFile.
+ */
+public class HBaseMapFile extends MapFile {
+  // TODO not used. remove?!
+  //  private static final Log LOG = LogFactory.getLog(HBaseMapFile.class);
+  
+  /**
+   * Values are instances of this class.
+   */
+  public static final Class<? extends Writable>  VALUE_CLASS =
+    ImmutableBytesWritable.class;
+
+  /**
+   * A reader capable of reading and caching blocks of the data file.
+   */
+  public static class HBaseReader extends MapFile.Reader {
+    private final boolean blockCacheEnabled;
+
+    /**
+     * @param fs
+     * @param dirName
+     * @param conf
+     * @param hri
+     * @throws IOException
+     */
+    public HBaseReader(FileSystem fs, String dirName, Configuration conf,
+      HRegionInfo hri)
+    throws IOException {
+      this(fs, dirName, conf, false, hri);
+    }
+    
+    /**
+     * @param fs
+     * @param dirName
+     * @param conf
+     * @param blockCacheEnabled
+     * @param hri
+     * @throws IOException
+     */
+    public HBaseReader(FileSystem fs, String dirName, Configuration conf,
+        boolean blockCacheEnabled, HRegionInfo hri)
+    throws IOException {
+      super(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), 
+          conf, false); // defer opening streams
+      this.blockCacheEnabled = blockCacheEnabled;
+      open(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), conf);
+      
+      // Force reading of the mapfile index by calling midKey. Reading the
+      // index will bring the index into memory over here on the client and
+      // then close the index file freeing up socket connection and resources
+      // in the datanode. Usually, the first access on a MapFile.Reader will
+      // load the index force the issue in HStoreFile MapFiles because an
+      // access may not happen for some time; meantime we're using up datanode
+      // resources (See HADOOP-2341). midKey() goes to index. Does not seek.
+      midKey();
+    }
+  }
+
+  public static class HBaseWriter extends MapFile.Writer {
+    /**
+     * @param conf
+     * @param fs
+     * @param dirName
+     * @param compression
+     * @param hri
+     * @throws IOException
+     */
+    public HBaseWriter(Configuration conf, FileSystem fs, String dirName,
+        SequenceFile.CompressionType compression, final HRegionInfo hri)
+    throws IOException {
+      super(conf, fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri),
+         VALUE_CLASS, compression);
+      // Default for mapfiles is 128.  Makes random reads faster if we
+      // have more keys indexed and we're not 'next'-ing around in the
+      // mapfile.
+      setIndexInterval(conf.getInt("hbase.io.index.interval", 128));
+    }
+  }
+}

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HalfMapFileReader.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HalfMapFileReader.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HalfMapFileReader.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/HalfMapFileReader.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,228 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.io;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
+import org.apache.hadoop.hbase.migration.nineteen.io.Reference.Range;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * A facade for a {@link org.apache.hadoop.io.MapFile.Reader} that serves up
+ * either the top or bottom half of a MapFile where 'bottom' is the first half
+ * of the file containing the keys that sort lowest and 'top' is the second half
+ * of the file with keys that sort greater than those of the bottom half.
+ * The top includes the split files midkey, of the key that follows if it does
+ * not exist in the file.
+ * 
+ * <p>This type works in tandem with the {@link Reference} type.  This class
+ * is used reading while Reference is used writing.
+ * 
+ * <p>This file is not splitable.  Calls to {@link #midKey()} return null.
+ */
+//TODO should be fixed generic warnings from MapFile methods
+public class HalfMapFileReader extends BloomFilterMapFile.Reader {
+  private static final Log LOG = LogFactory.getLog(HalfMapFileReader.class);
+
+  private final boolean top;
+  private final HStoreKey midkey;
+  private boolean firstNextCall = true;
+  
+  /**
+   * @param fs
+   * @param dirName
+   * @param conf
+   * @param r
+   * @param mk
+   * @param hri
+   * @throws IOException
+   */
+  public HalfMapFileReader(final FileSystem fs, final String dirName, 
+      final Configuration conf, final Range r,
+      final WritableComparable<HStoreKey> mk,
+      final HRegionInfo hri)
+  throws IOException {
+    this(fs, dirName, conf, r, mk, false, false, hri);
+  }
+  
+  /**
+   * @param fs
+   * @param dirName
+   * @param conf
+   * @param r
+   * @param mk
+   * @param filter
+   * @param blockCacheEnabled
+   * @param hri
+   * @throws IOException
+   */
+  public HalfMapFileReader(final FileSystem fs, final String dirName, 
+      final Configuration conf, final Range r,
+      final WritableComparable<HStoreKey> mk, final boolean filter,
+      final boolean blockCacheEnabled,
+      final HRegionInfo hri)
+  throws IOException {
+    super(fs, dirName, conf, filter, blockCacheEnabled, hri);
+    // This is not actual midkey for this half-file; its just border
+    // around which we split top and bottom.  Have to look in files to find
+    // actual last and first keys for bottom and top halves.  Half-files don't
+    // have an actual midkey themselves. No midkey is how we indicate file is
+    // not splittable.
+    this.midkey = new HStoreKey((HStoreKey)mk);
+    this.midkey.setHRegionInfo(hri);
+    // Is it top or bottom half?
+    this.top = Reference.isTopFileRegion(r);
+  }
+  
+  /*
+   * Check key is not bleeding into wrong half of the file.
+   * @param key
+   * @throws IOException
+   */
+  private void checkKey(final WritableComparable<HStoreKey> key)
+  throws IOException {
+    if (top) {
+      if (key.compareTo(midkey) < 0) {
+        throw new IOException("Illegal Access: Key is less than midKey of " +
+        "backing mapfile");
+      }
+    } else if (key.compareTo(midkey) >= 0) {
+      throw new IOException("Illegal Access: Key is greater than or equal " +
+      "to midKey of backing mapfile");
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized void finalKey(WritableComparable key)
+  throws IOException {
+    if (top) {
+      super.finalKey(key); 
+    } else {
+      Writable value = new ImmutableBytesWritable();
+      WritableComparable found = super.getClosest(midkey, value, true);
+      Writables.copyWritable(found, key);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized Writable get(WritableComparable key, Writable val)
+  throws IOException {
+    checkKey(key);
+    return super.get(key, val);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized WritableComparable getClosest(WritableComparable key,
+    Writable val)
+  throws IOException {
+    WritableComparable closest = null;
+    if (top) {
+      // If top, the lowest possible key is first key.  Do not have to check
+      // what comes back from super getClosest.  Will return exact match or
+      // greater.
+      closest = (key.compareTo(this.midkey) < 0)?
+        this.midkey: super.getClosest(key, val);
+      // we know that we just went past the midkey
+      firstNextCall = false;
+    } else {
+      // We're serving bottom of the file.
+      if (key.compareTo(this.midkey) < 0) {
+        // Check key is within range for bottom.
+        closest = super.getClosest(key, val);
+        // midkey was made against largest store file at time of split. Smaller
+        // store files could have anything in them.  Check return value is
+        // not beyond the midkey (getClosest returns exact match or next after)
+        if (closest != null && closest.compareTo(this.midkey) >= 0) {
+          // Don't let this value out.
+          closest = null;
+        }
+      }
+      // Else, key is > midkey so let out closest = null.
+    }
+    return closest;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized WritableComparable midKey() throws IOException {
+    // Returns null to indicate file is not splitable.
+    return null;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized boolean next(WritableComparable key, Writable val)
+  throws IOException {
+    if (firstNextCall) {
+      firstNextCall = false;
+      if (this.top) {
+        // Seek to midkey.  Midkey may not exist in this file.  That should be
+        // fine.  Then we'll either be positioned at end or start of file.
+        WritableComparable nearest = getClosest(this.midkey, val);
+        // Now copy the midkey into the passed key.
+        if (nearest != null) {
+          Writables.copyWritable(nearest, key);
+          return true;
+        }
+        return false; 
+      }
+    }
+    boolean result = super.next(key, val);
+    int cmpresult = key.compareTo(midkey);
+
+    if (top && cmpresult < 0) {
+      LOG.error("BUG BUG BUG. HalfMapFileReader wanted to return key out of range. DANGER");
+      throw new IOException("BUG BUG BUG. HalfMapFileReader wanted to return key out of range. DANGER");
+    } else if (!top && cmpresult >= 0) {
+      result = false;
+    }
+    return result;
+  }
+  
+  @Override
+  public synchronized void reset() throws IOException {
+    if (top) {
+      firstNextCall = true;
+      return;
+    }
+    super.reset();
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized boolean seek(WritableComparable key)
+  throws IOException {
+    checkKey(key);
+    return super.seek(key);
+  }
+}

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/Reference.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/Reference.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/Reference.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/io/Reference.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,117 @@
+/**
+ * 
+ */
+package org.apache.hadoop.hbase.migration.nineteen.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.migration.nineteen.HStoreKey;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A reference to a part of a store file.  The file referenced usually lives
+ * under a different region.  The part referenced is usually the top or bottom
+ * half of the file.  References are made at region split time.  Being lazy
+ * about copying data between the parent of the split and the split daughters
+ * makes splitting faster.
+ * 
+ * <p>References work with {@link HalfMapFileReader}.  References know how to
+ * write out the reference format in the file system and are whats juggled when
+ * references are mixed in with direct store files.  The
+ * {@link HalfMapFileReader} is used reading the referred to file.
+ *
+ * <p>References to store files located over in some other region look like
+ * this in the file system
+ * <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
+ * i.e. an id followed by the name of the referenced region.  The data
+ * ('mapfiles') of references are empty. The accompanying <code>info</code> file
+ * contains the <code>midkey</code> that demarks top and bottom of the
+ * referenced storefile, the id of the remote store we're referencing and
+ * whether we're to serve the top or bottom region of the remote store file.
+ * Note, a region is itself not splitable if it has instances of store file
+ * references.  References are cleaned up by compactions.
+ */
+public class Reference implements Writable {
+  // TODO: see if it makes sense making a ReferenceMapFile whose Writer is this
+  // class and whose Reader is the {@link HalfMapFileReader}.
+
+  private int encodedRegionName;
+  private long fileid;
+  private Range region;
+  private HStoreKey midkey;
+  
+  /** 
+   * For split HStoreFiles, it specifies if the file covers the lower half or
+   * the upper half of the key range
+   */
+  public static enum Range {
+    /** HStoreFile contains upper half of key range */
+    top,
+    /** HStoreFile contains lower half of key range */
+    bottom
+  }
+  
+ public Reference(final int ern, final long fid, final HStoreKey m,
+      final Range fr) {
+    this.encodedRegionName = ern;
+    this.fileid = fid;
+    this.region = fr;
+    this.midkey = m;
+  }
+  
+ public Reference() {
+    this(-1, -1, null, Range.bottom);
+  }
+
+  public long getFileId() {
+    return fileid;
+  }
+
+  public Range getFileRegion() {
+    return region;
+  }
+  
+  public HStoreKey getMidkey() {
+    return midkey;
+  }
+  
+  public int getEncodedRegionName() {
+    return this.encodedRegionName;
+  }
+
+  @Override
+  public String toString() {
+    return encodedRegionName + "/" + fileid + "/" + region;
+  }
+
+  // Make it serializable.
+
+  public void write(DataOutput out) throws IOException {
+    // Write out the encoded region name as a String.  Doing it as a String
+    // keeps a Reference's serialization backword compatible with
+    // pre-HBASE-82 serializations.  ALternative is rewriting all
+    // info files in hbase (Serialized References are written into the
+    // 'info' file that accompanies HBase Store files).
+    out.writeUTF(Integer.toString(encodedRegionName));
+    out.writeLong(fileid);
+    // Write true if we're doing top of the file.
+    out.writeBoolean(isTopFileRegion(region));
+    this.midkey.write(out);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    this.encodedRegionName = Integer.parseInt(in.readUTF());
+    fileid = in.readLong();
+    boolean tmp = in.readBoolean();
+    // If true, set region to top.
+    region = tmp? Range.top: Range.bottom;
+    midkey = new HStoreKey();
+    midkey.readFields(in);
+  }
+  
+  public static boolean isTopFileRegion(final Range r) {
+    return r.equals(Range.top);
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/BloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/BloomFilter.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/BloomFilter.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/BloomFilter.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,240 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import java.util.BitSet;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>Bloom filter</i>, as defined by Bloom in 1970.
+ * <p>
+ * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by 
+ * the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ * offers for the transmission of set membership information between networked hosts.  A sender encodes 
+ * the information into a bit vector, the Bloom filter, that is more compact than a conventional 
+ * representation. Computation and space costs for construction are linear in the number of elements.  
+ * The receiver uses the filter to test whether various elements are members of the set. Though the 
+ * filter will occasionally return a false positive, it will never return a false negative. When creating 
+ * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. 
+ * 
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 2 Feb. 07
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * 
+ * @see <a href="http://portal.acm.org/citation.cfm?id=362692&dl=ACM&coll=portal">Space/Time Trade-Offs in Hash Coding with Allowable Errors</a>
+ */
+public class BloomFilter extends Filter {
+  private static final byte[] bitvalues = new byte[] {
+    (byte)0x01,
+    (byte)0x02,
+    (byte)0x04,
+    (byte)0x08,
+    (byte)0x10,
+    (byte)0x20,
+    (byte)0x40,
+    (byte)0x80
+  };
+  
+  /** The bit vector. */
+  BitSet bits;
+
+  /** Default constructor - use with readFields */
+  public BloomFilter() {
+    super();
+  }
+  
+  /**
+   * Constructor
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  public BloomFilter(int vectorSize, int nbHash, int hashType){
+    super(vectorSize, nbHash, hashType);
+
+    bits = new BitSet(this.vectorSize);
+  }//end constructor
+
+  @Override
+  public void add(Key key) {
+    if(key == null) {
+      throw new NullPointerException("key cannot be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      bits.set(h[i]);
+    }
+  }//end add()
+
+  @Override
+  public void and(Filter filter){
+    if(filter == null
+        || !(filter instanceof BloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+
+    this.bits.and(((BloomFilter) filter).bits);
+  }//end and()
+
+  @Override
+  public boolean membershipTest(Key key){
+    if(key == null) {
+      throw new NullPointerException("key cannot be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+    for(int i = 0; i < nbHash; i++) {
+      if(!bits.get(h[i])) {
+        return false;
+      }
+    }
+    return true;
+  }//end memberhsipTest()
+
+  @Override
+  public void not(){
+    bits.flip(0, vectorSize - 1);
+  }//end not()
+
+  @Override
+  public void or(Filter filter){
+    if(filter == null
+        || !(filter instanceof BloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+    bits.or(((BloomFilter) filter).bits);
+  }//end or()
+
+  @Override
+  public void xor(Filter filter){
+    if(filter == null
+        || !(filter instanceof BloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be xor-ed");
+    }
+    bits.xor(((BloomFilter) filter).bits);
+  }//and xor()
+
+  @Override
+  public String toString(){
+    return bits.toString();
+  }//end toString()
+
+  @Override
+  public Object clone(){
+    BloomFilter bf = new BloomFilter(vectorSize, nbHash, hashType);
+    bf.or(this);
+    return bf;
+  }//end clone()
+  
+  /**
+   * @return size of the the bloomfilter
+   */
+  public int getVectorSize() {
+    return this.vectorSize;
+  }
+
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    byte[] bytes = new byte[getNBytes()];
+    for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
+      if (bitIndex == 8) {
+        bitIndex = 0;
+        byteIndex++;
+      }
+      if (bitIndex == 0) {
+        bytes[byteIndex] = 0;
+      }
+      if (bits.get(i)) {
+        bytes[byteIndex] |= bitvalues[bitIndex];
+      }
+    }
+    out.write(bytes);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    bits = new BitSet(this.vectorSize);
+    byte[] bytes = new byte[getNBytes()];
+    in.readFully(bytes);
+    for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) {
+      if (bitIndex == 8) {
+        bitIndex = 0;
+        byteIndex++;
+      }
+      if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) {
+        bits.set(i);
+      }
+    }
+  }
+  
+  /* @return number of bytes needed to hold bit vector */
+  private int getNBytes() {
+    return (vectorSize + 7) / 8;
+  }
+}//end class

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,314 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;                        //TODO: remove
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>counting Bloom filter</i>, as defined by Fan et al. in a ToN
+ * 2000 paper.
+ * <p>
+ * A counting Bloom filter is an improvement to standard a Bloom filter as it
+ * allows dynamic additions and deletions of set membership information.  This 
+ * is achieved through the use of a counting vector instead of a bit vector.
+ * 
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.1 - 19 Jan. 08
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * 
+ * @see <a href="http://portal.acm.org/citation.cfm?id=343571.343572">Summary cache: a scalable wide-area web cache sharing protocol</a>
+ */
+public final class CountingBloomFilter extends Filter {
+  /** Storage for the counting buckets */
+  private long[] buckets;
+
+  /** We are using 4bit buckets, so each bucket can count to 15 */
+  private final static long BUCKET_MAX_VALUE = 15;
+
+  /** Default constructor - use with readFields */
+  public CountingBloomFilter() {}
+  
+  /**
+   * Constructor
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  public CountingBloomFilter(int vectorSize, int nbHash, int hashType){
+    super(vectorSize, nbHash, hashType);
+    buckets = new long[buckets2words(vectorSize)];
+  }//end constructor
+
+  /** returns the number of 64 bit words it would take to hold vectorSize buckets */
+  private static int buckets2words(int vectorSize) {
+   return ((vectorSize - 1) >>> 4) + 1;
+  }
+
+
+  @Override
+  public void add(Key key) {
+    if(key == null) {
+      throw new NullPointerException("key can not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      // only increment if the count in the bucket is less than BUCKET_MAX_VALUE
+      if(bucketValue < BUCKET_MAX_VALUE) {
+        // increment by 1
+        buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
+      }
+    }
+  }//end add()
+
+  /**
+   * Removes a specified key from <i>this</i> counting Bloom filter.
+   * <p>
+   * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter.
+   * @param key The key to remove.
+   */
+  public void delete(Key key) {
+    if(key == null) {
+      throw new NullPointerException("Key may not be null");
+    }
+    if(!membershipTest(key)) {
+      throw new IllegalArgumentException("Key is not a member");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
+      if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
+        // decrement by 1
+        buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
+      }
+    }
+  }//end delete
+
+  @Override
+  public void and(Filter filter){
+    if(filter == null
+        || !(filter instanceof CountingBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+    CountingBloomFilter cbf = (CountingBloomFilter)filter;
+    
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      this.buckets[i] &= cbf.buckets[i];
+    }
+  }//end and()
+
+  @Override
+  public boolean membershipTest(Key key){
+    if(key == null) {
+      throw new NullPointerException("Key may not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+
+      long bucketMask = 15L << bucketShift;
+
+      if((buckets[wordNum] & bucketMask) == 0) {
+        return false;
+      }
+    }
+
+    return true;
+  }//end membershipTest()
+
+  /**
+   * This method calculates an approximate count of the key, i.e. how many
+   * times the key was added to the filter. This allows the filter to be
+   * used as an approximate <code>key -&gt; count</code> map.
+   * <p>NOTE: due to the bucket size of this filter, inserting the same
+   * key more than 15 times will cause an overflow at all filter positions
+   * associated with this key, and it will significantly increase the error
+   * rate for this and other keys. For this reason the filter can only be
+   * used to store small count values <code>0 &lt;= N &lt;&lt; 15</code>.
+   * @param key key to be tested
+   * @return 0 if the key is not present. Otherwise, a positive value v will
+   * be returned such that <code>v == count</code> with probability equal to the
+   * error rate of this filter, and <code>v &gt; count</code> otherwise.
+   * Additionally, if the filter experienced an underflow as a result of
+   * {@link #delete(Key)} operation, the return value may be lower than the
+   * <code>count</code> with the probability of the false negative rate of such
+   * filter.
+   */
+  public int approximateCount(Key key) {
+    int res = Integer.MAX_VALUE;
+    int[] h = hash.hash(key);
+    hash.clear();
+    for (int i = 0; i < nbHash; i++) {
+      // find the bucket
+      int wordNum = h[i] >> 4;          // div 16
+      int bucketShift = (h[i] & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      if (bucketValue < res) res = (int)bucketValue;
+    }
+    if (res != Integer.MAX_VALUE) {
+      return res;
+    } else {
+      return 0;
+    }
+  }
+
+  @Override
+  public void not(){
+    throw new UnsupportedOperationException("not() is undefined for "
+        + this.getClass().getName());
+  }//end not()
+
+  @Override
+  public void or(Filter filter){
+    if(filter == null
+        || !(filter instanceof CountingBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+
+    CountingBloomFilter cbf = (CountingBloomFilter)filter;
+
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      this.buckets[i] |= cbf.buckets[i];
+    }
+  }//end or()
+
+  @Override
+  @SuppressWarnings("unused")
+  public void xor(Filter filter){
+    throw new UnsupportedOperationException("xor() is undefined for "
+        + this.getClass().getName());
+  }//end xor()
+
+  @Override
+  public String toString(){
+    StringBuilder res = new StringBuilder();
+
+    for(int i = 0; i < vectorSize; i++) {
+      if(i > 0) {
+        res.append(" ");
+      }
+      
+      int wordNum = i >> 4;          // div 16
+      int bucketShift = (i & 0x0f) << 2;  // (mod 16) * 4
+      
+      long bucketMask = 15L << bucketShift;
+      long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+      
+      res.append(bucketValue);
+    }
+
+    return res.toString();
+  }//end toString()
+
+  @Override
+  public Object clone(){
+    CountingBloomFilter cbf = new CountingBloomFilter(vectorSize, nbHash, hashType);
+    cbf.buckets = this.buckets.clone();
+    return cbf;
+  }//end clone()
+
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    int sizeInWords = buckets2words(vectorSize);
+    for(int i = 0; i < sizeInWords; i++) {
+      out.writeLong(buckets[i]);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    int sizeInWords = buckets2words(vectorSize);
+    buckets = new long[sizeInWords];
+    for(int i = 0; i < sizeInWords; i++) {
+      buckets[i] = in.readLong();
+    }
+  }
+}//end class

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,303 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
+ * <p>
+ * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
+ * each of the <code>s</code> rows is a standard Bloom filter. The creation 
+ * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
+ * bit matrix, i.e., it is composed of a single standard Bloom filter.
+ * It assumes that <code>n<sub>r</sub></code> elements are recorded in the 
+ * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
+ * the cardinality of the set <code>A</code> to record in the filter).  
+ * <p>
+ * As the size of <code>A</code> grows during the execution of the application,
+ * several keys must be inserted in the DBF.  When inserting a key into the DBF,
+ * one must first get an active Bloom filter in the matrix.  A Bloom filter is
+ * active when the number of recorded keys, <code>n<sub>r</sub></code>, is 
+ * strictly less than the current cardinality of <code>A</code>, <code>n</code>.
+ * If an active Bloom filter is found, the key is inserted and 
+ * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
+ * is no active Bloom filter, a new one is created (i.e., a new row is added to
+ * the matrix) according to the current size of <code>A</code> and the element
+ * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
+ * this new Bloom filter is set to one.  A given key is said to belong to the
+ * DBF if the <code>k</code> positions are set to one in one of the matrix rows.
+ * 
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 6 Feb. 07
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * @see org.onelab.filter.BloomFilter A Bloom filter
+ * 
+ * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a>
+ */
+public class DynamicBloomFilter extends Filter {
+  /** 
+   * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
+   */
+  private int nr;
+
+  /**
+   * The number of keys recorded in the current standard active Bloom filter.
+   */
+  private int currentNbRecord;
+
+  /**
+   * The matrix of Bloom filter.
+   */
+  private BloomFilter[] matrix;
+
+  /**
+   * Zero-args constructor for the serialization.
+   */
+  public DynamicBloomFilter() { }
+
+  /**
+   * Constructor.
+   * <p>
+   * Builds an empty Dynamic Bloom filter.
+   * @param vectorSize The number of bits in the vector.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   * @param nr The threshold for the maximum number of keys to record in a dynamic Bloom filter row.
+   */
+  public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
+    super(vectorSize, nbHash, hashType);
+
+    this.nr = nr;
+    this.currentNbRecord = 0;
+
+    matrix = new BloomFilter[1];
+    matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
+  }//end constructor
+
+  @Override
+  public void add(Key key){
+    if(key == null) {
+      throw new NullPointerException("Key can not be null");
+    }
+
+    BloomFilter bf = getActiveStandardBF();
+
+    if(bf == null){
+      addRow();
+      bf = matrix[matrix.length - 1];
+      currentNbRecord = 0;
+    }
+
+    bf.add(key);
+
+    currentNbRecord++;
+  }//end add()
+
+  @Override
+  public void and(Filter filter) {
+    if(filter == null
+        || !(filter instanceof DynamicBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+
+    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+    if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+      throw new IllegalArgumentException("filters cannot be and-ed");
+    }
+
+    for(int i = 0; i < matrix.length; i++) {
+      matrix[i].and(dbf.matrix[i]);
+    }
+  }//end and()
+
+  @Override
+  public boolean membershipTest(Key key){
+    if(key == null) {
+      return true;
+    }
+
+    for(int i = 0; i < matrix.length; i++) {
+      if(matrix[i].membershipTest(key)) {
+        return true;
+      }
+    }
+
+    return false;
+  }//end membershipTest()
+
+  @Override
+  public void not(){
+    for(int i = 0; i < matrix.length; i++) {
+      matrix[i].not();
+    }
+  }//end not()
+
+  @Override
+  public void or(Filter filter){
+    if(filter == null
+        || !(filter instanceof DynamicBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+
+    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+    if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+      throw new IllegalArgumentException("filters cannot be or-ed");
+    }
+    for(int i = 0; i < matrix.length; i++) {
+      matrix[i].or(dbf.matrix[i]);
+    }
+  }//end or()
+
+  @Override
+  public void xor(Filter filter){
+    if(filter == null
+        || !(filter instanceof DynamicBloomFilter)
+        || filter.vectorSize != this.vectorSize
+        || filter.nbHash != this.nbHash) {
+      throw new IllegalArgumentException("filters cannot be xor-ed");
+    }
+    DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+    if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+      throw new IllegalArgumentException("filters cannot be xor-ed");
+    }
+
+      for(int i = 0; i<matrix.length; i++) {
+        matrix[i].xor(dbf.matrix[i]);
+    }
+  }//end xor()
+
+  @Override
+  public String toString(){
+    StringBuilder res = new StringBuilder();
+
+    for(int i=0; i<matrix.length; i++) {
+      res.append(matrix[i]);
+      res.append(Character.LINE_SEPARATOR);
+    }
+    return res.toString();
+  }//end toString()
+
+  @Override
+  public Object clone(){
+    DynamicBloomFilter dbf = new DynamicBloomFilter(vectorSize, nbHash, hashType, nr);
+    dbf.currentNbRecord = this.currentNbRecord;
+    dbf.matrix = new BloomFilter[this.matrix.length];
+    for(int i = 0; i < this.matrix.length; i++) {
+      dbf.matrix[i] = (BloomFilter)this.matrix[i].clone();
+    }
+    return dbf;
+  }//end clone()
+
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    out.writeInt(nr);
+    out.writeInt(currentNbRecord);
+    out.writeInt(matrix.length);
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i].write(out);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    nr = in.readInt();
+    currentNbRecord = in.readInt();
+    int len = in.readInt();
+    matrix = new BloomFilter[len];
+    for (int i = 0; i < matrix.length; i++) {
+      matrix[i] = new BloomFilter();
+      matrix[i].readFields(in);
+    }
+  }
+
+  /**
+   * Adds a new row to <i>this</i> dynamic Bloom filter.
+   */
+  private void addRow(){
+    BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
+
+    for(int i = 0; i < matrix.length; i++) {
+      tmp[i] = (BloomFilter)matrix[i].clone();
+    }
+
+    tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
+
+    matrix = tmp;
+  }//end addRow()
+
+  /**
+   * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
+   * @return BloomFilter The active standard Bloom filter.
+   * 			 <code>Null</code> otherwise.
+   */
+  private BloomFilter getActiveStandardBF() {
+    if(currentNbRecord >= nr) {
+      return null;
+    }
+
+    return matrix[matrix.length - 1];
+  }//end getActiveStandardBF()
+}//end class



Mime
View raw message