hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r795230 [2/2] - in /hadoop/hbase/trunk/src: java/org/apache/hadoop/hbase/ java/org/apache/hadoop/hbase/io/ java/org/apache/hadoop/hbase/migration/ java/org/apache/hadoop/hbase/migration/nineteen/ java/org/apache/hadoop/hbase/migration/ninet...
Date Fri, 17 Jul 2009 21:23:19 GMT
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,216 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ * 
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.hbase.util.Hash;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Defines the general behavior of a filter.
+ * <p>
+ * A filter is a data structure which aims at offering a lossy summary of a set <code>A</code>.  The
+ * key idea is to map entries of <code>A</code> (also called <i>keys</i>) into several positions 
+ * in a vector through the use of several hash functions.
+ * <p>
+ * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension).
+ * <p>
+ * It must be extended in order to define the real behavior.
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ *
+ * @version 1.0 - 2 Feb. 07
+ * 
+ * @see org.onelab.filter.Key The general behavior of a key
+ * @see org.onelab.filter.HashFunction A hash function
+ */
+public abstract class Filter implements Writable {
+  private static final int VERSION = -1; // negative to accommodate for old format 
+  /** The vector size of <i>this</i> filter. */
+  protected int vectorSize;
+
+  /** The hash function used to map a key to several positions in the vector. */
+  protected HashFunction hash;
+
+  /** The number of hash function to consider. */
+  protected int nbHash;
+  
+  /** Type of hashing function to use. */
+  protected int hashType;
+
+  protected Filter() {}
+  
+  /** 
+   * Constructor.
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash functions to consider.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  protected Filter(int vectorSize, int nbHash, int hashType) {
+    this.vectorSize = vectorSize;
+    this.nbHash = nbHash;
+    this.hashType = hashType;
+    this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
+  }//end constructor
+
+  /**
+   * Adds a key to <i>this</i> filter.
+   * @param key The key to add.
+   */
+  public abstract void add(Key key);
+
+  /**
+   * Determines wether a specified key belongs to <i>this</i> filter.
+   * @param key The key to test.
+   * @return boolean True if the specified key belongs to <i>this</i> filter.
+   * 		     False otherwise.
+   */
+  public abstract boolean membershipTest(Key key);
+
+  /**
+   * Peforms a logical AND between <i>this</i> filter and a specified filter.
+   * <p>
+   * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+   * @param filter The filter to AND with.
+   */
+  public abstract void and(Filter filter);
+
+  /**
+   * Peforms a logical OR between <i>this</i> filter and a specified filter.
+   * <p>
+   * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+   * @param filter The filter to OR with.
+   */
+  public abstract void or(Filter filter);
+
+  /**
+   * Peforms a logical XOR between <i>this</i> filter and a specified filter.
+   * <p>
+   * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+   * @param filter The filter to XOR with.
+   */
+  public abstract void xor(Filter filter);
+
+  /**
+   * Performs a logical NOT on <i>this</i> filter.
+   * <p>
+   * The result is assigned to <i>this</i> filter.
+   */
+  public abstract void not();
+
+  /**
+   * Adds a list of keys to <i>this</i> filter.
+   * @param keys The list of keys.
+   */
+  public void add(List<Key> keys){
+    if(keys == null) {
+      throw new IllegalArgumentException("ArrayList<Key> may not be null");
+    }
+
+    for(Key key: keys) {
+      add(key);
+    }
+  }//end add()
+
+  /**
+   * Adds a collection of keys to <i>this</i> filter.
+   * @param keys The collection of keys.
+   */
+  public void add(Collection<Key> keys){
+    if(keys == null) {
+      throw new IllegalArgumentException("Collection<Key> may not be null");
+    }
+    for(Key key: keys) {
+      add(key);
+    }
+  }//end add()
+
+  /**
+   * Adds an array of keys to <i>this</i> filter.
+   * @param keys The array of keys.
+   */
+  public void add(Key[] keys){
+    if(keys == null) {
+      throw new IllegalArgumentException("Key[] may not be null");
+    }
+    for(int i = 0; i < keys.length; i++) {
+      add(keys[i]);
+    }
+  }//end add()
+  
+  // Writable interface
+  
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(VERSION);
+    out.writeInt(this.nbHash);
+    out.writeByte(this.hashType);
+    out.writeInt(this.vectorSize);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    int ver = in.readInt();
+    if (ver > 0) { // old unversioned format
+      this.nbHash = ver;
+      this.hashType = Hash.JENKINS_HASH;
+    } else if (ver == VERSION) {
+      this.nbHash = in.readInt();
+      this.hashType = in.readByte();
+    } else {
+      throw new IOException("Unsupported version: " + ver);
+    }
+    this.vectorSize = in.readInt();
+    this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
+  }
+}//end class

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,127 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 
+ * (http://www.one-lab.org)
+ * 
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a hash object that returns a certain number of hashed values.
+ * <p>
+ * It is based on the SHA-1 algorithm. 
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ *
+ * @version 1.0 - 2 Feb. 07
+ * 
+ * @see org.onelab.filter.Key The general behavior of a key being stored in a filter
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * 
+ * @see <a href="http://www.itl.nist.gov/fipspubs/fip180-1.htm">SHA-1 algorithm</a>
+ */
+public final class HashFunction {
+  /** The number of hashed values. */
+  private int nbHash;
+
+  /** The maximum highest returned value. */
+  private int maxValue;
+
+  /** Hashing algorithm to use. */
+  private Hash hashFunction;
+  
+  /**
+   * Constructor.
+   * <p>
+   * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+   * @param maxValue The maximum highest returned value.
+   * @param nbHash The number of resulting hashed values.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  public HashFunction(int maxValue, int nbHash, int hashType) {
+    if(maxValue <= 0) {
+      throw new IllegalArgumentException("maxValue must be > 0");
+    }
+    
+    if(nbHash <= 0) {
+      throw new IllegalArgumentException("nbHash must be > 0");
+    }
+
+    this.maxValue = maxValue;
+    this.nbHash = nbHash;
+    this.hashFunction = Hash.getInstance(hashType);
+    if (this.hashFunction == null)
+      throw new IllegalArgumentException("hashType must be known");
+  }//end constructor
+
+  /** Clears <i>this</i> hash function. A NOOP */
+  public void clear() {
+  }
+
+  /**
+   * Hashes a specified key into several integers.
+   * @param k The specified key.
+   * @return The array of hashed values.
+   */
+  public int[] hash(Key k){
+      byte[] b = k.getBytes();
+      if(b == null) {
+        throw new NullPointerException("buffer reference is null");
+      }
+      if(b.length == 0) {
+        throw new IllegalArgumentException("key length must be > 0");
+      }
+      int[] result = new int[nbHash];
+      for (int i = 0, initval = 0; i < nbHash; i++) {
+	  initval = hashFunction.hash(b, initval);
+	  result[i] = Math.abs(initval) % maxValue;
+      }
+      return result;
+  }//end hash() 
+
+}//end class

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,176 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * The general behavior of a key that must be stored in a filter.
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ */
+public class Key implements WritableComparable {
+  /** Byte value of key */
+  byte[] bytes;
+  
+  /**
+   * The weight associated to <i>this</i> key.
+   * <p>
+   * <b>Invariant</b>: if it is not specified, each instance of 
+   * <code>Key</code> will have a default weight of 1.0
+   */
+  double weight;
+
+  /** default constructor - use with readFields */
+  public Key() {}
+
+  /**
+   * Constructor.
+   * <p>
+   * Builds a key with a default weight.
+   * @param value The byte value of <i>this</i> key.
+   */
+  public Key(byte[] value) {
+    this(value, 1.0);
+  }//end constructor
+
+  /**
+   * Constructor.
+   * <p>
+   * Builds a key with a specified weight.
+   * @param value The value of <i>this</i> key.
+   * @param weight The weight associated to <i>this</i> key.
+   */
+  public Key(byte[] value, double weight) {
+    set(value, weight);
+  }//end constructor
+
+  /**
+   * @param value
+   * @param weight
+   */
+  public void set(byte[] value, double weight) {
+    if(value == null) {
+      throw new IllegalArgumentException("value can not be null");
+    }
+    this.bytes = value;
+    this.weight = weight;
+  }
+  
+  /** @return byte[] The value of <i>this</i> key. */
+  public byte[] getBytes() {
+    return this.bytes;
+  }
+
+  /** @return Returns the weight associated to <i>this</i> key. */
+  public double getWeight(){
+    return weight;
+  }//end getWeight()
+
+  /**
+   * Increments the weight of <i>this</i> key with a specified value. 
+   * @param weight The increment.
+   */
+  public void incrementWeight(double weight){
+    this.weight += weight;
+  }//end incrementWeight()
+
+  /** Increments the weight of <i>this</i> key by one. */
+  public void incrementWeight(){
+    this.weight++;
+  }//end incrementWeight()
+
+  @Override
+  public boolean equals(Object o) {
+    return this.compareTo(o) == 0;
+  }
+  
+  @Override
+  public int hashCode() {
+    int result = 0;
+    for(int i = 0; i < bytes.length; i++) {
+      result ^= Byte.valueOf(bytes[i]).hashCode();
+    }
+    result ^= Double.valueOf(weight).hashCode();
+    return result;
+  }
+
+  // Writable
+
+  public void write(DataOutput out) throws IOException {
+    out.writeInt(bytes.length);
+    out.write(bytes);
+    out.writeDouble(weight);
+  }
+  
+  public void readFields(DataInput in) throws IOException {
+    this.bytes = new byte[in.readInt()];
+    in.readFully(this.bytes);
+    weight = in.readDouble();
+  }
+  
+  // Comparable
+  
+  public int compareTo(Object o) {
+    Key other = (Key)o;
+
+    int result = this.bytes.length - other.getBytes().length;
+    for(int i = 0; result == 0 && i < bytes.length; i++) {
+      result = this.bytes[i] - other.bytes[i];
+    }
+    
+    if(result == 0) {
+      result = Double.valueOf(this.weight - other.weight).intValue();
+    }
+    return result;
+  }
+}//end class

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,91 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ * 
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+/**
+ * Defines the different remove scheme for retouched Bloom filters.
+ * 
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 7 Feb. 07
+ */
+public interface RemoveScheme {
+  /**
+   * Random selection.
+   * <p>
+   * The idea is to randomly select a bit to reset.
+   */
+  public final static short RANDOM = 0;
+
+  /**
+   * MinimumFN Selection.
+   * <p>
+   * The idea is to select the bit to reset that will generate the minimum
+   * number of false negative.
+   */
+  public final static short MINIMUM_FN = 1;
+
+  /**
+   * MaximumFP Selection.
+   * <p>
+   * The idea is to select the bit to reset that will remove the maximum number
+   * of false positive.
+   */
+  public final static short MAXIMUM_FP = 2;
+
+  /**
+   * Ratio Selection.
+   * <p>
+   * The idea is to select the bit to reset that will, at the same time, remove
+   * the maximum number of false positve while minimizing the amount of false
+   * negative generated.
+   */
+  public final static short RATIO = 3;
+}//end interface

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,450 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or 
+ * without modification, are permitted provided that the following 
+ * conditions are met:
+ *  - Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in 
+ *    the documentation and/or other materials provided with the distribution.
+ *  - Neither the name of the University Catholique de Louvain - UCL
+ *    nor the names of its contributors may be used to endorse or 
+ *    promote products derived from this software without specific prior 
+ *    written permission.
+ *    
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>retouched Bloom filter</i>, as defined in the CoNEXT 2006 paper.
+ * <p>
+ * It allows the removal of selected false positives at the cost of introducing
+ * random false negatives, and with the benefit of eliminating some random false
+ * positives at the same time.
+ * 
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 7 Feb. 07
+ * 
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * @see org.onelab.filter.BloomFilter A Bloom filter
+ * @see org.onelab.filter.RemoveScheme The different selective clearing algorithms
+ * 
+ * @see <a href="http://www-rp.lip6.fr/site_npa/site_rp/_publications/740-rbf_cameraready.pdf">Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives</a>
+ */
+public final class RetouchedBloomFilter extends BloomFilter
+implements RemoveScheme {
+  /**
+   * KeyList vector (or ElementList Vector, as defined in the paper) of false positives.
+   */
+  List<Key>[] fpVector;
+
+  /**
+   * KeyList vector of keys recorded in the filter.
+   */
+  List<Key>[] keyVector;
+
+  /**
+   * Ratio vector.
+   */
+  double[] ratio;
+  
+  private Random rand;
+
+  /** Default constructor - use with readFields */
+  public RetouchedBloomFilter() {}
+  
+  /**
+   * Constructor
+   * @param vectorSize The vector size of <i>this</i> filter.
+   * @param nbHash The number of hash function to consider.
+   * @param hashType type of the hashing function (see {@link Hash}).
+   */
+  public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) {
+    super(vectorSize, nbHash, hashType);
+
+    this.rand = null;
+    createVector();
+  }//end constructor
+
+  @Override
+  public void add(Key key){
+    if(key == null) {
+      throw new NullPointerException("key can not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      bits.set(h[i]);
+      keyVector[h[i]].add(key);
+    }//end for - i
+  }//end add()
+
+  /**
+   * Adds a false positive information to <i>this</i> retouched Bloom filter.
+   * <p>
+   * <b>Invariant</b>: if the false positive is <code>null</code>, nothing happens.
+   * @param key The false positive key to add.
+   */
+  public void addFalsePositive(Key key){
+    if(key == null) {
+      throw new NullPointerException("key can not be null");
+    }
+
+    int[] h = hash.hash(key);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      fpVector[h[i]].add(key);
+    }
+  }//end addFalsePositive()
+
+  /**
+   * Adds a collection of false positive information to <i>this</i> retouched Bloom filter.
+   * @param coll The collection of false positive.
+   */
+  public void addFalsePositive(Collection<Key> coll) {
+    if(coll == null) {
+      throw new NullPointerException("Collection<Key> can not be null");
+    }
+    
+    for(Key k: coll) {
+      addFalsePositive(k);
+    }
+  }//end addFalsePositive()
+
+  /**
+   * Adds a list of false positive information to <i>this</i> retouched Bloom filter.
+   * @param keys The list of false positive.
+   */
+  public void addFalsePositive(List<Key> keys){
+    if(keys == null) {
+      throw new NullPointerException("ArrayList<Key> can not be null");
+    }
+
+    for(Key k: keys) {
+      addFalsePositive(k);
+    }
+  }//end addFalsePositive()
+
+  /**
+   * Adds an array of false positive information to <i>this</i> retouched Bloom filter.
+   * @param keys The array of false positive.
+   */
+  public void addFalsePositive(Key[] keys){
+    if(keys == null) {
+      throw new NullPointerException("Key[] can not be null");
+    }
+
+    for(int i = 0; i < keys.length; i++) {
+      addFalsePositive(keys[i]);
+    }
+  }//end addFalsePositive()
+
+  /**
+   * Performs the selective clearing for a given key.
+   * @param k The false positive key to remove from <i>this</i> retouched Bloom filter.
+   * @param scheme The selective clearing scheme to apply.
+   */
+  public void selectiveClearing(Key k, short scheme) {
+    if(k == null) {
+      throw new NullPointerException("Key can not be null");
+    }
+
+    if(!membershipTest(k)) {
+      throw new IllegalArgumentException("Key is not a member");
+    }
+
+    int index = 0;
+    int[] h = hash.hash(k);
+
+    switch(scheme) {
+
+    case RANDOM:
+      index = randomRemove();
+      break;
+    
+    case MINIMUM_FN:
+      index = minimumFnRemove(h);
+      break;
+    
+    case MAXIMUM_FP:
+      index = maximumFpRemove(h);
+      break;
+    
+    case RATIO:
+      index = ratioRemove(h);
+      break;
+    
+    default:
+      throw new AssertionError("Undefined selective clearing scheme");
+
+    }//end switch
+
+    clearBit(index);
+  }//end selectiveClearing()
+
+  private int randomRemove() {
+    if(rand == null) {
+      rand = new Random();
+    }
+
+    return rand.nextInt(nbHash);
+  }//end randomRemove()
+
+  /**
+   * Chooses the bit position that minimizes the number of false negative generated.
+   * @param h The different bit positions.
+   * @return int The position that minimizes the number of false negative generated.
+   */
+  private int minimumFnRemove(int[] h) {
+    int minIndex = Integer.MAX_VALUE;
+    double minValue = Double.MAX_VALUE;
+
+    for(int i = 0; i < nbHash; i++) {
+      double keyWeight = getWeight(keyVector[h[i]]);
+
+      if(keyWeight < minValue) {
+        minIndex = h[i];
+        minValue = keyWeight;
+      }
+
+    }//end for - i
+
+    return minIndex;
+  }//end minimumFnRemove()
+
+  /**
+   * Chooses the bit position that maximizes the number of false positive removed.
+   * @param h The different bit positions.
+   * @return int The position that maximizes the number of false positive removed.
+   */
+  private int maximumFpRemove(int[] h){
+    int maxIndex = Integer.MIN_VALUE;
+    double maxValue = Double.MIN_VALUE;
+
+    for(int i = 0; i < nbHash; i++) {
+      double fpWeight = getWeight(fpVector[h[i]]);
+
+      if(fpWeight > maxValue) {
+        maxValue = fpWeight;
+        maxIndex = h[i];
+      }
+    }
+
+    return maxIndex;
+  }//end maximumFpRemove()
+
+  /**
+   * Chooses the bit position that minimizes the number of false negative generated while maximizing.
+   * the number of false positive removed.
+   * @param h The different bit positions.
+   * @return int The position that minimizes the number of false negative generated while maximizing.
+   */
+  private int ratioRemove(int[] h){
+    computeRatio();
+    int minIndex = Integer.MAX_VALUE;
+    double minValue = Double.MAX_VALUE;
+
+    for(int i = 0; i < nbHash; i++) {
+      if(ratio[h[i]] < minValue) {
+        minValue = ratio[h[i]];
+        minIndex = h[i];
+      }
+    }//end for - i
+
+    return minIndex;
+  }//end ratioRemove()
+
+  /**
+   * Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors.
+   * @param index The position of the bit to clear.
+   */
+  private void clearBit(int index){
+    if(index < 0 || index >= vectorSize) {
+      throw new ArrayIndexOutOfBoundsException(index);
+    }
+
+    List<Key> kl = keyVector[index];
+    List<Key> fpl = fpVector[index];
+
+    // update key list
+    int listSize = kl.size();
+    for(int i = 0; i < listSize && !kl.isEmpty(); i++) {
+      removeKey(kl.get(0), keyVector);
+    }
+
+    kl.clear();
+    keyVector[index].clear();
+
+    //update false positive list
+    listSize = fpl.size();
+    for(int i = 0; i < listSize && !fpl.isEmpty(); i++) {
+      removeKey(fpl.get(0), fpVector);
+    }
+
+    fpl.clear();
+    fpVector[index].clear();
+
+    //update ratio
+    ratio[index] = 0.0;
+
+    //update bit vector
+    bits.clear(index);
+  }//end clearBit()
+
+  /**
+   * Removes a given key from <i>this</i> filer.
+   * @param k The key to remove.
+   * @param vector The counting vector associated to the key.
+   */
+  private void removeKey(Key k, List<Key>[] vector) {
+    if(k == null) {
+      throw new NullPointerException("Key can not be null");
+    }
+    if(vector == null) {
+      throw new NullPointerException("ArrayList<Key>[] can not be null");
+    }
+
+    int[] h = hash.hash(k);
+    hash.clear();
+
+    for(int i = 0; i < nbHash; i++) {
+      vector[h[i]].remove(k);
+    }
+  }//end removeKey()
+
+  /**
+   * Computes the ratio A/FP.
+   */
+  private void computeRatio() {
+    for(int i = 0; i < vectorSize; i++) {
+      double keyWeight = getWeight(keyVector[i]);
+      double fpWeight = getWeight(fpVector[i]);
+
+      if(keyWeight > 0 && fpWeight > 0) {
+        ratio[i] = keyWeight/fpWeight;
+      }
+    }//end for - i
+  }//end computeRatio()
+
+  private double getWeight(List<Key> keyList) {
+    double weight = 0.0;
+    for(Key k: keyList) {
+      weight += k.getWeight();
+    }
+    return weight;
+  }
+  
+  /**
+   * Creates and initialises the various vectors.
+   */
+  @SuppressWarnings("unchecked")
+  private void createVector() {
+    fpVector = new List[vectorSize];
+    keyVector = new List[vectorSize];
+    ratio = new double[vectorSize];
+
+    for(int i = 0; i < vectorSize; i++) {
+      fpVector[i] = Collections.synchronizedList(new ArrayList<Key>());
+      keyVector[i] = Collections.synchronizedList(new ArrayList<Key>());
+      ratio[i] = 0.0;
+    }//end for -i
+  }//end createVector()
+  
+  // Writable
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    for(int i = 0; i < fpVector.length; i++) {
+      List<Key> list = fpVector[i];
+      out.writeInt(list.size());
+      for(Key k: list) {
+        k.write(out);
+      }
+    }
+    for(int i = 0; i < keyVector.length; i++) {
+      List<Key> list = keyVector[i];
+      out.writeInt(list.size());
+      for(Key k: list) {
+        k.write(out);
+      }
+    }
+    for(int i = 0; i < ratio.length; i++) {
+      out.writeDouble(ratio[i]);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    createVector();
+    for(int i = 0; i < fpVector.length; i++) {
+      List<Key> list = fpVector[i];
+      int size = in.readInt();
+      for(int j = 0; j < size; j++) {
+        Key k = new Key();
+        k.readFields(in);
+        list.add(k);
+      }
+    }
+    for(int i = 0; i < keyVector.length; i++) {
+      List<Key> list = keyVector[i];
+      int size = in.readInt();
+      for(int j = 0; j < size; j++) {
+        Key k = new Key();
+        k.readFields(in);
+        list.add(k);
+      }
+    }
+    for(int i = 0; i < ratio.length; i++) {
+      ratio[i] = in.readDouble();
+    }
+  }
+}//end class

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,558 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.regionserver;
+
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.migration.nineteen.io.BloomFilterMapFile;
+import org.apache.hadoop.hbase.migration.nineteen.io.HalfMapFileReader;
+import org.apache.hadoop.hbase.migration.nineteen.io.Reference;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.SequenceFile;
+
+/**
+ * A HStore data file.  HStores usually have one or more of these files.  They
+ * are produced by flushing the memcache to disk.
+ *
+ * <p>Each HStore maintains a bunch of different data files. The filename is a
+ * mix of the parent dir, the region name, the column name, and a file
+ * identifier. The name may also be a reference to a store file located
+ * elsewhere. This class handles all that path-building stuff for you.
+ * 
+ * <p>An HStoreFile usually tracks 4 things: its parent dir, the region
+ * identifier, the column family, and the file identifier.  If you know those
+ * four things, you know how to obtain the right HStoreFile.  HStoreFiles may
+ * also reference store files in another region serving either from
+ * the top-half of the remote file or from the bottom-half.  Such references
+ * are made fast splitting regions.
+ * 
+ * <p>Plain HStoreFiles are named for a randomly generated id as in:
+ * <code>1278437856009925445</code>  A file by this name is made in both the
+ * <code>mapfiles</code> and <code>info</code> subdirectories of a
+ * HStore columnfamily directoy: E.g. If the column family is 'anchor:', then
+ * under the region directory there is a subdirectory named 'anchor' within
+ * which is a 'mapfiles' and 'info' subdirectory.  In each will be found a
+ * file named something like <code>1278437856009925445</code>, one to hold the
+ * data in 'mapfiles' and one under 'info' that holds the sequence id for this
+ * store file.
+ * 
+ * <p>References to store files located over in some other region look like
+ * this:
+ * <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
+ * i.e. an id followed by the name of the referenced region.  The data
+ * ('mapfiles') of HStoreFile references are empty. The accompanying
+ * <code>info</code> file contains the
+ * midkey, the id of the remote store we're referencing and whether we're
+ * to serve the top or bottom region of the remote store file.  Note, a region
+ * is not splitable if it has instances of store file references (References
+ * are cleaned up by compactions).
+ * 
+ * <p>When merging or splitting HRegions, we might want to modify one of the 
+ * params for an HStoreFile (effectively moving it elsewhere).
+ */
+public class HStoreFile implements HConstants {
+  static final Log LOG = LogFactory.getLog(HStoreFile.class.getName());
+  static final byte INFO_SEQ_NUM = 0;
+  static final byte MAJOR_COMPACTION = INFO_SEQ_NUM + 1;
+  static final String HSTORE_DATFILE_DIR = "mapfiles";
+  static final String HSTORE_INFO_DIR = "info";
+  static final String HSTORE_FILTER_DIR = "filter";
+  
+  private final static Random rand = new Random();
+
+  private final Path basedir;
+  private final int encodedRegionName;
+  private final byte [] colFamily;
+  private final long fileId;
+  private final HBaseConfiguration conf;
+  private final FileSystem fs;
+  private final Reference reference;
+  private final HRegionInfo hri;
+  /* If true, this file was product of a major compaction.
+   */
+  private boolean majorCompaction = false;
+  private long indexLength;
+
+  /**
+   * Constructor that fully initializes the object
+   * @param conf Configuration object
+   * @param basedir qualified path that is parent of region directory
+   * @param colFamily name of the column family
+   * @param fileId file identifier
+   * @param ref Reference to another HStoreFile.
+   * @param hri The region info for this file (HACK HBASE-868). TODO: Fix.
+   * @throws IOException
+   */
+  HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
+      final HRegionInfo hri, byte [] colFamily, long fileId,
+      final Reference ref)
+  throws IOException {
+    this(conf, fs, basedir, hri, colFamily, fileId, ref, false);
+  }
+  
+  /**
+   * Constructor that fully initializes the object
+   * @param conf Configuration object
+   * @param basedir qualified path that is parent of region directory
+   * @param colFamily name of the column family
+   * @param fileId file identifier
+   * @param ref Reference to another HStoreFile.
+   * @param hri The region info for this file (HACK HBASE-868). TODO: Fix.
+   * @param mc Try if this file was result of a major compression.
+   * @throws IOException
+   */
+  HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
+      final HRegionInfo hri, byte [] colFamily, long fileId,
+      final Reference ref, final boolean mc)
+  throws IOException {
+    this.conf = conf;
+    this.fs = fs;
+    this.basedir = basedir;
+    this.encodedRegionName = hri.getEncodedName();
+    this.colFamily = colFamily;
+    this.hri = hri;
+    
+    long id = fileId;
+    if (id == -1) {
+      Path mapdir = HStoreFile.getMapDir(basedir, encodedRegionName, colFamily);
+      Path testpath = null;
+      do {
+        id = Math.abs(rand.nextLong());
+        testpath = new Path(mapdir, createHStoreFilename(id, -1));
+      } while(fs.exists(testpath));
+    }
+    this.fileId = id;
+    
+    // If a reference, construction does not write the pointer files.  Thats
+    // done by invocations of writeReferenceFiles(hsf, fs). Happens at split.
+    this.reference = ref;
+    this.majorCompaction = mc;
+  }
+
+  /** @return the region name */
+  boolean isReference() {
+    return reference != null;
+  }
+  
+  Reference getReference() {
+    return reference;
+  }
+
+  int getEncodedRegionName() {
+    return this.encodedRegionName;
+  }
+
+  /** @return the column family */
+  byte [] getColFamily() {
+    return colFamily;
+  }
+
+  /** @return the file identifier */
+  long getFileId() {
+    return fileId;
+  }
+
+  // Build full filenames from those components
+  
+  /** @return path for MapFile */
+  Path getMapFilePath() {
+    if (isReference()) {
+      return getMapFilePath(encodedRegionName, fileId,
+          reference.getEncodedRegionName());
+    }
+    return getMapFilePath(this.encodedRegionName, fileId);
+  }
+
+  private Path getMapFilePath(final Reference r) {
+    if (r == null) {
+      return getMapFilePath();
+    }
+    return getMapFilePath(r.getEncodedRegionName(), r.getFileId());
+  }
+
+  private Path getMapFilePath(final int encodedName, final long fid) {
+    return getMapFilePath(encodedName, fid, HRegionInfo.NO_HASH);
+  }
+  
+  private Path getMapFilePath(final int encodedName, final long fid,
+      final int ern) {
+    return new Path(HStoreFile.getMapDir(basedir, encodedName, colFamily), 
+      createHStoreFilename(fid, ern));
+  }
+
+  /** @return path for info file */
+  Path getInfoFilePath() {
+    if (isReference()) {
+      return getInfoFilePath(encodedRegionName, fileId,
+          reference.getEncodedRegionName());
+ 
+    }
+    return getInfoFilePath(encodedRegionName, fileId);
+  }
+  
+  private Path getInfoFilePath(final int encodedName, final long fid) {
+    return getInfoFilePath(encodedName, fid, HRegionInfo.NO_HASH);
+  }
+  
+  private Path getInfoFilePath(final int encodedName, final long fid,
+      final int ern) {
+    return new Path(HStoreFile.getInfoDir(basedir, encodedName, colFamily), 
+      createHStoreFilename(fid, ern));
+  }
+
+  // File handling
+
+  /*
+   * Split by making two new store files that reference top and bottom regions
+   * of original store file.
+   * @param midKey
+   * @param dstA
+   * @param dstB
+   * @param fs
+   * @param c
+   * @throws IOException
+   *
+   * @param midKey the key which will be the starting key of the second region
+   * @param dstA the file which will contain keys from the start of the source
+   * @param dstB the file which will contain keys from midKey to end of source
+   * @param fs file system
+   * @param c configuration
+   * @throws IOException
+   */
+  void splitStoreFile(final HStoreFile dstA, final HStoreFile dstB,
+      final FileSystem fs)
+  throws IOException {
+    dstA.writeReferenceFiles(fs);
+    dstB.writeReferenceFiles(fs);
+  }
+  
+  void writeReferenceFiles(final FileSystem fs)
+  throws IOException {
+    createOrFail(fs, getMapFilePath());
+    writeSplitInfo(fs);
+  }
+  
+  /*
+   * If reference, create and write the remote store file id, the midkey and
+   * whether we're going against the top file region of the referent out to
+   * the info file. 
+   * @param p Path to info file.
+   * @param hsf
+   * @param fs
+   * @throws IOException
+   */
+  private void writeSplitInfo(final FileSystem fs) throws IOException {
+    Path p = getInfoFilePath();
+    if (fs.exists(p)) {
+      throw new IOException("File already exists " + p.toString());
+    }
+    FSDataOutputStream out = fs.create(p);
+    try {
+      reference.write(out);
+    } finally {
+      out.close();
+   }
+  }
+
+  /**
+   * @see #writeSplitInfo(FileSystem fs)
+   */
+  static Reference readSplitInfo(final Path p, final FileSystem fs)
+  throws IOException {
+    FSDataInputStream in = fs.open(p);
+    try {
+      Reference r = new Reference();
+      r.readFields(in);
+      return r;
+    } finally {
+      in.close();
+    }
+  }
+
+  private void createOrFail(final FileSystem fs, final Path p)
+  throws IOException {
+    if (fs.exists(p)) {
+      throw new IOException("File already exists " + p.toString());
+    }
+    if (!fs.createNewFile(p)) {
+      throw new IOException("Failed create of " + p);
+    }
+  }
+
+  /** 
+   * Reads in an info file
+   *
+   * @param filesystem file system
+   * @return The sequence id contained in the info file
+   * @throws IOException
+   */
+  long loadInfo(final FileSystem filesystem) throws IOException {
+    Path p = null;
+    if (isReference()) {
+      p = getInfoFilePath(reference.getEncodedRegionName(),
+        this.reference.getFileId());
+    } else {
+      p = getInfoFilePath();
+    }
+    long length = filesystem.getFileStatus(p).getLen();
+    boolean hasMoreThanSeqNum = length > (Byte.SIZE + Bytes.SIZEOF_LONG);
+    DataInputStream in = new DataInputStream(filesystem.open(p));
+    try {
+      byte flag = in.readByte();
+      if (flag == INFO_SEQ_NUM) {
+        if (hasMoreThanSeqNum) {
+          flag = in.readByte();
+          if (flag == MAJOR_COMPACTION) {
+            this.majorCompaction = in.readBoolean();
+          }
+        }
+        return in.readLong();
+      }
+      throw new IOException("Cannot process log file: " + p);
+    } finally {
+      in.close();
+    }
+  }
+  
+  /**
+   * Writes the file-identifier to disk
+   * 
+   * @param filesystem file system
+   * @param infonum file id
+   * @throws IOException
+   */
+  void writeInfo(final FileSystem filesystem, final long infonum)
+  throws IOException {
+    writeInfo(filesystem, infonum, false);
+  }
+  
+  /**
+   * Writes the file-identifier to disk
+   * 
+   * @param filesystem file system
+   * @param infonum file id
+   * @param mc True if this file is product of a major compaction
+   * @throws IOException
+   */
+  void writeInfo(final FileSystem filesystem, final long infonum,
+    final boolean mc)
+  throws IOException {
+    Path p = getInfoFilePath();
+    FSDataOutputStream out = filesystem.create(p);
+    try {
+      out.writeByte(INFO_SEQ_NUM);
+      out.writeLong(infonum);
+      if (mc) {
+        // Set whether major compaction flag on this file.
+        this.majorCompaction = mc;
+        out.writeByte(MAJOR_COMPACTION);
+        out.writeBoolean(mc);
+      }
+    } finally {
+      out.close();
+    }
+  }
+
+  /**
+   * Delete store map files.
+   * @throws IOException 
+   */
+  public void delete() throws IOException {
+    fs.delete(getMapFilePath(), true);
+    fs.delete(getInfoFilePath(), true);
+  }
+  
+  /**
+   * Renames the mapfiles and info directories under the passed
+   * <code>hsf</code> directory.
+   * @param fs
+   * @param hsf
+   * @return True if succeeded.
+   * @throws IOException
+   */
+  public boolean rename(final FileSystem fs, final HStoreFile hsf)
+  throws IOException {
+    Path src = getMapFilePath();
+    if (!fs.exists(src)) {
+      throw new FileNotFoundException(src.toString());
+    }
+    boolean success = fs.rename(src, hsf.getMapFilePath());
+    if (!success) {
+      LOG.warn("Failed rename of " + src + " to " + hsf.getMapFilePath());
+    } else {
+      src = getInfoFilePath();
+      if (!fs.exists(src)) {
+        throw new FileNotFoundException(src.toString());
+      }
+      success = fs.rename(src, hsf.getInfoFilePath());
+      if (!success) {
+        LOG.warn("Failed rename of " + src + " to " + hsf.getInfoFilePath());
+      }
+    }
+    return success;
+  }
+  
+  /**
+   * Get reader for the store file map file.
+   * Client is responsible for closing file when done.
+   * @param fs
+   * @param bloomFilter If true, a bloom filter exists
+   * @param blockCacheEnabled If true, MapFile blocks should be cached.
+   * @return BloomFilterMapFile.Reader
+   * @throws IOException
+   */
+  public synchronized BloomFilterMapFile.Reader getReader(final FileSystem fs,
+      final boolean bloomFilter, final boolean blockCacheEnabled)
+  throws IOException {
+    if (isReference()) {
+      return new HalfMapFileReader(fs,
+          getMapFilePath(reference).toString(), conf, 
+          reference.getFileRegion(), reference.getMidkey(), bloomFilter,
+          blockCacheEnabled, this.hri);
+    }
+    return new BloomFilterMapFile.Reader(fs, getMapFilePath().toString(),
+        conf, bloomFilter, blockCacheEnabled, this.hri);
+  }
+
+  /**
+   * Get a store file writer.
+   * Client is responsible for closing file when done.
+   * @param fs
+   * @param compression Pass <code>SequenceFile.CompressionType.NONE</code>
+   * for none.
+   * @param bloomFilter If true, create a bloom filter
+   * @param nrows number of rows expected. Required if bloomFilter is true.
+   * @return MapFile.Writer
+   * @throws IOException
+   */
+  public MapFile.Writer getWriter(final FileSystem fs,
+      final SequenceFile.CompressionType compression,
+      final boolean bloomFilter, int nrows)
+  throws IOException {
+    if (isReference()) {
+      throw new IOException("Illegal Access: Cannot get a writer on a" +
+        "HStoreFile reference");
+    }
+    return new BloomFilterMapFile.Writer(conf, fs,
+      getMapFilePath().toString(), compression, bloomFilter, nrows, this.hri);
+  }
+
+  /**
+   * @return Length of the store map file.  If a reference, size is
+   * approximation.
+   * @throws IOException
+   */
+  public long length() throws IOException {
+    Path p = new Path(getMapFilePath(reference), MapFile.DATA_FILE_NAME);
+    long l = p.getFileSystem(conf).getFileStatus(p).getLen();
+    return (isReference())? l / 2: l;
+  }
+
+  /**
+   * @return Length of the store map file index.
+   * @throws IOException
+   */
+  public synchronized long indexLength() throws IOException {
+    if (indexLength == 0) {
+      Path p = new Path(getMapFilePath(reference), MapFile.INDEX_FILE_NAME);
+      indexLength = p.getFileSystem(conf).getFileStatus(p).getLen();
+    }
+    return indexLength;
+  }
+
+  @Override
+  public String toString() {
+    return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +
+      (isReference()? "-" + reference.toString(): "");
+  }
+  
+  /**
+   * @return True if this file was made by a major compaction.
+   */
+  public boolean isMajorCompaction() {
+    return this.majorCompaction;
+  }
+
+  private static String createHStoreFilename(final long fid,
+      final int encodedRegionName) {
+    return Long.toString(fid) + 
+      ((encodedRegionName != HRegionInfo.NO_HASH)?
+        "." + encodedRegionName : "");
+  }
+
+  /**
+   * @param dir Base directory
+   * @param encodedRegionName Encoding of region name.
+   * @param f Column family.
+   * @return path for map file directory
+   */
+  public static Path getMapDir(Path dir, int encodedRegionName,
+      final byte [] f) {
+    return getFamilySubDir(dir, encodedRegionName, f, HSTORE_DATFILE_DIR);
+  }
+
+  /**
+   * @param dir Base directory
+   * @param encodedRegionName Encoding of region name.
+   * @param f Column family.
+   * @return the info directory path
+   */
+  public static Path getInfoDir(Path dir, int encodedRegionName, byte [] f) {
+    return getFamilySubDir(dir, encodedRegionName, f, HSTORE_INFO_DIR);
+  }
+
+  /**
+   * @param dir Base directory
+   * @param encodedRegionName Encoding of region name.
+   * @param f Column family.
+   * @return the bloom filter directory path
+   */
+  @Deprecated
+  public static Path getFilterDir(Path dir, int encodedRegionName,
+      final byte [] f) {
+    return getFamilySubDir(dir, encodedRegionName, f, HSTORE_FILTER_DIR);
+  }
+  
+  /*
+   * @param base Base directory
+   * @param encodedRegionName Encoding of region name.
+   * @param f Column family.
+   * @param subdir Subdirectory to create under column family/store directory.
+   * @return
+   */
+  private static Path getFamilySubDir(final Path base,
+      final int encodedRegionName, final byte [] f, final String subdir) {
+    return new Path(base, new Path(Integer.toString(encodedRegionName),
+        new Path(Bytes.toString(f), subdir)));
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/package-info.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/package-info.java?rev=795230&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/package-info.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/migration/package-info.java Fri Jul 17 21:23:11 2009
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2009The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+Provides classes from old hbase versions used migrating data.
+Nineteen package has classes from hbase 0.19.
+*/
+package org.apache.hadoop.hbase.migration.nineteen;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Fri Jul 17 21:23:11 2009
@@ -1241,12 +1241,12 @@
       Integer lid = getLock(lockid, row);
       byte [] now = Bytes.toBytes(System.currentTimeMillis());
       try {
-        for(Map.Entry<byte[], List<KeyValue>> entry : 
-          put.getFamilyMap().entrySet()) {
+        for (Map.Entry<byte[], List<KeyValue>> entry : 
+            put.getFamilyMap().entrySet()) {
           byte [] family = entry.getKey();
           checkFamily(family);
           List<KeyValue> puts = entry.getValue();
-          if(updateKeys(puts, now)) {
+          if (updateKeys(puts, now)) {
             put(family, puts, writeToWAL);
           }
         }

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Store.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Store.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Store.java Fri Jul 17 21:23:11 2009
@@ -50,7 +50,7 @@
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.io.HeapSize;
-import org.apache.hadoop.hbase.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.hbase.io.hfile.Compression;
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileScanner;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/FSUtils.java Fri Jul 17 21:23:11 2009
@@ -281,22 +281,22 @@
       final Path hbaseRootDir)
   throws IOException {
     // Presumes any directory under hbase.rootdir is a table.
-    FileStatus [] directories = fs.listStatus(hbaseRootDir, new DirFilter(fs));
-    for (int i = 0; i < directories.length; i++) {
+    FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
+    for (int i = 0; i < tableDirs.length; i++) {
       // Skip the .log directory.  All others should be tables.  Inside a table,
       // there are compaction.dir directories to skip.  Otherwise, all else
       // should be regions.  Then in each region, should only be family
       // directories.  Under each of these, should be one file only.
-      Path d = directories[i].getPath();
+      Path d = tableDirs[i].getPath();
       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) continue;
-      FileStatus [] tablesubdirectories = fs.listStatus(d, new DirFilter(fs));
-      for (int j = 0; j < tablesubdirectories.length; j++) {
-        Path dd = tablesubdirectories[j].getPath();
+      FileStatus [] regionDirs = fs.listStatus(d, new DirFilter(fs));
+      for (int j = 0; j < regionDirs.length; j++) {
+        Path dd = regionDirs[j].getPath();
         if (dd.equals(HConstants.HREGION_COMPACTIONDIR_NAME)) continue;
         // Else its a region name.  Now look in region for families.
-        FileStatus [] familydirectories = fs.listStatus(dd, new DirFilter(fs));
-        for (int k = 0; k < familydirectories.length; k++) {
-          Path family = familydirectories[k].getPath();
+        FileStatus [] familyDirs = fs.listStatus(dd, new DirFilter(fs));
+        for (int k = 0; k < familyDirs.length; k++) {
+          Path family = familyDirs[k].getPath();
           // Now in family make sure only one file.
           FileStatus [] familyStatus = fs.listStatus(family);
           if (familyStatus.length > 1) {
@@ -320,8 +320,8 @@
   public static boolean isPre020FileLayout(final FileSystem fs,
     final Path hbaseRootDir)
   throws IOException {
-    Path mapfiles = new Path(new Path(new Path(hbaseRootDir, "-ROOT-"),
-      "70236052"), "mapfiles");
+    Path mapfiles = new Path(new Path(new Path(new Path(hbaseRootDir, "-ROOT-"),
+      "70236052"), "info"), "mapfiles");
     return fs.exists(mapfiles);
   }
 
@@ -340,22 +340,38 @@
       final Path hbaseRootDir)
   throws IOException {
     // Presumes any directory under hbase.rootdir is a table.
-    FileStatus [] directories = fs.listStatus(hbaseRootDir, new DirFilter(fs));
-    for (int i = 0; i < directories.length; i++) {
+    FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
+    for (int i = 0; i < tableDirs.length; i++) {
       // Inside a table, there are compaction.dir directories to skip.
       // Otherwise, all else should be regions.  Then in each region, should
       // only be family directories.  Under each of these, should be a mapfile
       // and info directory and in these only one file.
-      Path d = directories[i].getPath();
+      Path d = tableDirs[i].getPath();
       if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) continue;
-      FileStatus [] tablesubdirectories = fs.listStatus(d, new DirFilter(fs));
-      for (int j = 0; j < tablesubdirectories.length; j++) {
-        Path dd = tablesubdirectories[j].getPath();
+      FileStatus [] regionDirs = fs.listStatus(d, new DirFilter(fs));
+      for (int j = 0; j < regionDirs.length; j++) {
+        Path dd = regionDirs[j].getPath();
         if (dd.equals(HConstants.HREGION_COMPACTIONDIR_NAME)) continue;
         // Else its a region name.  Now look in region for families.
-        FileStatus [] familydirectories = fs.listStatus(dd, new DirFilter(fs));
-        for (int k = 0; k < familydirectories.length; k++) {
-          Path family = familydirectories[k].getPath();
+        FileStatus [] familyDirs = fs.listStatus(dd, new DirFilter(fs));
+        for (int k = 0; k < familyDirs.length; k++) {
+          Path family = familyDirs[k].getPath();
+          FileStatus [] infoAndMapfile = fs.listStatus(family);
+          // Assert that only info and mapfile in family dir.
+          if (infoAndMapfile.length != 0 && infoAndMapfile.length != 2) {
+            LOG.debug(family.toString() +
+              " has more than just info and mapfile: " + infoAndMapfile.length);
+            return false;
+          }
+          // Make sure directory named info or mapfile.
+          for (int ll = 0; ll < 2; ll++) {
+            if (infoAndMapfile[ll].getPath().getName().equals("info") ||
+                infoAndMapfile[ll].getPath().getName().equals("mapfiles"))
+              continue;
+            LOG.debug("Unexpected directory name: " +
+              infoAndMapfile[ll].getPath());
+            return false;
+          }
           // Now in family, there are 'mapfile' and 'info' subdirs.  Just
           // look in the 'mapfile' subdir.
           FileStatus [] familyStatus =

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Migrate.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Migrate.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Migrate.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/util/Migrate.java Fri Jul 17 21:23:11 2009
@@ -20,12 +20,14 @@
 
 package org.apache.hadoop.hbase.util;
 
+import java.io.FileNotFoundException;
 import java.io.IOException;
 
 import org.apache.commons.cli.Options;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -36,6 +38,7 @@
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.util.FSUtils.DirFilter;
 import org.apache.hadoop.util.GenericOptionsParser;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -224,7 +227,7 @@
         System.out.println(msg);
         throw new IOException(msg);
       }
-      rewrite(fs, hbaseRootDir);
+      rewrite(hbaseRootDir);
     }
     LOG.info("Checking filesystem is major compacted");
     // Below check is good for both making sure that we are major compacted
@@ -285,10 +288,57 @@
   
   /*
    * Rewrite all under hbase root dir.
+   * Presumes that {@link FSUtils#isMajorCompactedPre020(FileSystem, Path)}
+   * has been run before this method is called.
    * @param fs
    * @param hbaseRootDir
+   * @throws IOException
    */
-  private void rewrite(final FileSystem fs, final Path hbaseRootDir) {
+  private void rewrite(final Path hbaseRootDir) throws IOException {
+    FileStatus [] tableDirs = fs.listStatus(hbaseRootDir, new DirFilter(fs));
+    for (int i = 0; i < tableDirs.length; i++) {
+      // Inside a table, there are compaction.dir directories to skip.
+      // Otherwise, all else should be regions.  Then in each region, should
+      // only be family directories.  Under each of these, should be a mapfile
+      // and info directory and in these only one file.
+      Path d = tableDirs[i].getPath();
+      if (d.getName().equals(HConstants.HREGION_LOGDIR_NAME)) continue;
+      FileStatus [] regionDirs = fs.listStatus(d, new DirFilter(fs));
+      for (int j = 0; j < regionDirs.length; j++) {
+        Path dd = regionDirs[j].getPath();
+        if (dd.equals(HConstants.HREGION_COMPACTIONDIR_NAME)) continue;
+        // Else its a region name.  Now look in region for families.
+        FileStatus [] familyDirs = fs.listStatus(dd, new DirFilter(fs));
+        for (int k = 0; k < familyDirs.length; k++) {
+          Path family = familyDirs[k].getPath();
+          Path mfdir = new Path(family, "mapfiles");
+          FileStatus [] mfs = fs.listStatus(mfdir);
+          if (mfs.length > 1) {
+            throw new IOException("Should only be one directory in: " + mfdir);
+          }
+          Path mf = mfs[0].getPath();
+          Path infofile = new Path(new Path(family, "info"), mf.getName());
+          rewrite(this.fs, mf, infofile);
+        }
+      }
+    }
+  }
+  
+  /**
+   * Rewrite the passed mapfile
+   * @param mapfiledir
+   * @param infofile
+   * @throws IOExcepion
+   */
+  public static void rewrite (final FileSystem fs, final Path mapfiledir,
+      final Path infofile)
+  throws IOException {
+    if (!fs.exists(mapfiledir)) {
+      throw new FileNotFoundException(mapfiledir.toString());
+    }
+    if (!fs.exists(infofile)) {
+      throw new FileNotFoundException(infofile.toString());
+    }
     
   }
 

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java Fri Jul 17 21:23:11 2009
@@ -30,7 +30,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
-import org.apache.hadoop.hbase.io.MapFile;
+import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.WritableComparable;
 

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/MigrationTest.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/MigrationTest.java?rev=795230&r1=795229&r2=795230&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/MigrationTest.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/util/MigrationTest.java Fri Jul 17 21:23:11 2009
@@ -58,14 +58,14 @@
    */
   public void testMigration() throws IOException {
     Path rootdir = getUnitTestdir(getName());
-    FileSystem fs = FileSystem.get(this.conf);
     Path hbasedir = loadTestData(fs, rootdir);
     assertTrue(fs.exists(hbasedir));
     Migrate migrator = new Migrate(this.conf);
     Path qualified = fs.makeQualified(hbasedir);
     String uri = qualified.toString();
     this.conf.set("hbase.rootdir", uri);
-    migrator.run(new String [] {"upgrade"});
+    int result = migrator.run(new String [] {"upgrade"});
+    assertEquals(0, result);
   }
   
   /*



Mime
View raw message