hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r711155 [1/2] - in /hadoop/hbase/trunk: ./ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/io/ src/java/org/apache/hadoop/hbase/regionserver/ src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/regio...
Date Tue, 04 Nov 2008 01:28:53 GMT
Author: stack
Date: Mon Nov  3 17:28:53 2008
New Revision: 711155

URL: http://svn.apache.org/viewvc?rev=711155&view=rev
Log:
HBASE-975  Improve MapFile performance for start and end key

Added:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/Reference.java
Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BlockFSInputStream.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/ImmutableBytesWritable.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseClusterTestCase.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestSplit.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Nov  3 17:28:53 2008
@@ -96,6 +96,7 @@
                instead (requires hadoop 0.19)
    HBASE-81    When a scanner lease times out, throw a more "user friendly" exception
    HBASE-978   Remove BloomFilterDescriptor. It is no longer used.
+   HBASE-975   Improve MapFile performance for start and end key
         
   NEW FEATURES
    HBASE-875   Use MurmurHash instead of JenkinsHash [in bloomfilters]

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/HTable.java Mon Nov  3 17:28:53 2008
@@ -225,7 +225,6 @@
    * @return Array of region starting row keys
    * @throws IOException
    */
-  @SuppressWarnings("null")
   public byte[][] getStartKeys() throws IOException {
     final List<byte[]> keyList = new ArrayList<byte[]>();
 
@@ -1154,7 +1153,6 @@
     private byte[][] columns;
     private byte [] startRow;
     protected long scanTime;
-    @SuppressWarnings("hiding")
     private boolean closed = false;
     private HRegionInfo currentRegion = null;
     private ScannerCallable callable = null;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BlockFSInputStream.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BlockFSInputStream.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BlockFSInputStream.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BlockFSInputStream.java Mon Nov  3 17:28:53 2008
@@ -59,7 +59,6 @@
    * @param fileLength
    * @param blockSize the size of each block in bytes.
    */
-  @SuppressWarnings({"unchecked",  "serial"})
   public BlockFSInputStream(InputStream in, long fileLength, int blockSize) {
     this.in = in;
     if (!(in instanceof Seekable) || !(in instanceof PositionedReadable)) {
@@ -157,12 +156,12 @@
     long blockLength = targetBlockEnd - targetBlockStart + 1;
     long offsetIntoBlock = target - targetBlockStart;
 
-    byte[] block = blocks.get(targetBlockStart);
+    byte[] block = blocks.get(Long.valueOf(targetBlockStart));
     if (block == null) {
       block = new byte[blockSize];
       ((PositionedReadable) in).readFully(targetBlockStart, block, 0,
           (int) blockLength);
-      blocks.put(targetBlockStart, block);
+      blocks.put(Long.valueOf(targetBlockStart), block);
     }
     
     this.pos = target;

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java?rev=711155&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/BloomFilterMapFile.java Mon Nov  3 17:28:53 2008
@@ -0,0 +1,231 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.util.Hash;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.onelab.filter.BloomFilter;
+
+/**
+ * On write, all keys are added to a bloom filter.  On read, all keys are
+ * tested first against bloom filter. Keys are HStoreKey.  If passed bloom
+ * filter is null, just passes invocation to parent.
+ */
+public class BloomFilterMapFile extends HBaseMapFile {
+  private static final Log LOG = LogFactory.getLog(BloomFilterMapFile.class);
+  protected static final String BLOOMFILTER_FILE_NAME = "filter";
+
+  public static class Reader extends HBaseReader {
+    private final BloomFilter bloomFilter;
+
+    /**
+     * @param fs
+     * @param dirName
+     * @param conf
+     * @param filter
+     * @param blockCacheEnabled
+     * @param hri
+     * @throws IOException
+     */
+    public Reader(FileSystem fs, String dirName, Configuration conf,
+        final boolean filter, final boolean blockCacheEnabled, 
+        HRegionInfo hri)
+    throws IOException {
+      super(fs, dirName, conf, blockCacheEnabled, hri);
+      if (filter) {
+        this.bloomFilter = loadBloomFilter(fs, dirName);
+      } else {
+        this.bloomFilter = null;
+      }
+    }
+
+    private BloomFilter loadBloomFilter(FileSystem fs, String dirName)
+    throws IOException {
+      Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME);
+      if(!fs.exists(filterFile)) {
+        throw new FileNotFoundException("Could not find bloom filter: " +
+            filterFile);
+      }
+      BloomFilter filter = new BloomFilter();
+      FSDataInputStream in = fs.open(filterFile);
+      try {
+        filter.readFields(in);
+      } finally {
+        in.close();
+      }
+      return filter;
+    }
+    
+    @Override
+    public Writable get(WritableComparable key, Writable val)
+    throws IOException {
+      if (bloomFilter == null) {
+        return super.get(key, val);
+      }
+      if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("bloom filter reported that key exists");
+        }
+        return super.get(key, val);
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("bloom filter reported that key does not exist");
+      }
+      return null;
+    }
+
+    @Override
+    public WritableComparable getClosest(WritableComparable key,
+        Writable val) throws IOException {
+      if (bloomFilter == null) {
+        return super.getClosest(key, val);
+      }
+      // Note - the key being passed to us is always a HStoreKey
+      if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("bloom filter reported that key exists");
+        }
+        return super.getClosest(key, val);
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("bloom filter reported that key does not exist");
+      }
+      return null;
+    }
+    
+    /**
+     * @return size of the bloom filter
+     */
+   public int getBloomFilterSize() {
+      return bloomFilter == null ? 0 : bloomFilter.getVectorSize();
+    }
+  }
+  
+  public static class Writer extends HBaseWriter {
+    private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0;
+    private final BloomFilter bloomFilter;
+    private final String dirName;
+    private final FileSystem fs;
+    
+    /**
+     * @param conf
+     * @param fs
+     * @param dirName
+     * @param compression
+     * @param filter
+     * @param nrows
+     * @param hri
+     * @throws IOException
+     */
+    @SuppressWarnings("unchecked")
+    public Writer(Configuration conf, FileSystem fs, String dirName,
+      SequenceFile.CompressionType compression, final boolean filter,
+      int nrows, final HRegionInfo hri)
+    throws IOException {
+      super(conf, fs, dirName, compression, hri);
+      this.dirName = dirName;
+      this.fs = fs;
+      if (filter) {
+        /* 
+         * There is no way to automatically determine the vector size and the
+         * number of hash functions to use. In particular, bloom filters are
+         * very sensitive to the number of elements inserted into them. For
+         * HBase, the number of entries depends on the size of the data stored
+         * in the column. Currently the default region size is 256MB, so the
+         * number of entries is approximately 
+         * 256MB / (average value size for column).
+         * 
+         * If m denotes the number of bits in the Bloom filter (vectorSize),
+         * n denotes the number of elements inserted into the Bloom filter and
+         * k represents the number of hash functions used (nbHash), then
+         * according to Broder and Mitzenmacher,
+         * 
+         * ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf )
+         * 
+         * the probability of false positives is minimized when k is
+         * approximately m/n ln(2).
+         * 
+         * If we fix the number of hash functions and know the number of
+         * entries, then the optimal vector size m = (k * n) / ln(2)
+         */
+        this.bloomFilter = new BloomFilter(
+            (int) Math.ceil(
+                (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
+                Math.log(2.0)),
+            (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
+            Hash.getHashType(conf)
+        );
+      } else {
+        this.bloomFilter = null;
+      }
+    }
+
+    @Override
+    public void append(WritableComparable key, Writable val)
+    throws IOException {
+      if (bloomFilter != null) {
+        bloomFilter.add(getBloomFilterKey(key));
+      }
+      super.append(key, val);
+    }
+
+    @Override
+    public synchronized void close() throws IOException {
+      super.close();
+      if (this.bloomFilter != null) {
+        flushBloomFilter();
+      }
+    }
+    
+    /**
+     * Flushes bloom filter to disk
+     * 
+     * @throws IOException
+     */
+    private void flushBloomFilter() throws IOException {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("flushing bloom filter for " + this.dirName);
+      }
+      FSDataOutputStream out =
+        fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME));
+      try {
+        bloomFilter.write(out);
+      } finally {
+        out.close();
+      }
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("flushed bloom filter for " + this.dirName);
+      }
+    }
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java?rev=711155&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HBaseMapFile.java Mon Nov  3 17:28:53 2008
@@ -0,0 +1,219 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.onelab.filter.Key;
+
+/**
+ * Hbase customizations of MapFile.
+ */
+public class HBaseMapFile extends MapFile {
+  private static final Log LOG = LogFactory.getLog(HBaseMapFile.class);
+  public static final Class<? extends Writable>  VALUE_CLASS =
+    ImmutableBytesWritable.class;
+
+  /**
+   * Custom bloom filter key maker.
+   * @param key
+   * @return Key made of bytes of row only.
+   */
+  protected static Key getBloomFilterKey(WritableComparable key) {
+    return new Key(((HStoreKey) key).getRow());
+  }
+
+  /**
+   * A reader capable of reading and caching blocks of the data file.
+   */
+  public static class HBaseReader extends MapFile.Reader {
+    private final boolean blockCacheEnabled;
+    private final HStoreKey firstKey;
+    private final HStoreKey finalKey;
+    private final String dirName;
+
+    /**
+     * @param fs
+     * @param dirName
+     * @param conf
+     * @param hri
+     * @throws IOException
+     */
+    public HBaseReader(FileSystem fs, String dirName, Configuration conf,
+      HRegionInfo hri)
+    throws IOException {
+      this(fs, dirName, conf, false, hri);
+    }
+    
+    /**
+     * @param fs
+     * @param dirName
+     * @param conf
+     * @param blockCacheEnabled
+     * @param hri
+     * @throws IOException
+     */
+    public HBaseReader(FileSystem fs, String dirName, Configuration conf,
+        boolean blockCacheEnabled, HRegionInfo hri)
+    throws IOException {
+      super(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), 
+          conf, false); // defer opening streams
+      this.dirName = dirName;
+      this.blockCacheEnabled = blockCacheEnabled;
+      open(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), conf);
+      
+      // Force reading of the mapfile index by calling midKey.
+      // Reading the index will bring the index into memory over
+      // here on the client and then close the index file freeing
+      // up socket connection and resources in the datanode. 
+      // Usually, the first access on a MapFile.Reader will load the
+      // index force the issue in HStoreFile MapFiles because an
+      // access may not happen for some time; meantime we're
+      // using up datanode resources.  See HADOOP-2341.
+      // Of note, midKey just goes to index.  Does not seek.
+      midKey();
+
+      // Read in the first and last key.  Cache them.  Make sure we are at start
+      // of the file.
+      reset();
+      HStoreKey key = new HStoreKey();
+      super.next(key, new ImmutableBytesWritable());
+      key.setHRegionInfo(hri);
+      this.firstKey = key;
+      // Set us back to start of file.  Call to finalKey restores whatever
+      // the previous position.
+      reset();
+
+      // Get final key.
+      key = new HStoreKey();
+      super.finalKey(key);
+      key.setHRegionInfo(hri);
+      this.finalKey = key;
+    }
+
+    @Override
+    protected org.apache.hadoop.io.SequenceFile.Reader createDataFileReader(
+        FileSystem fs, Path dataFile, Configuration conf)
+    throws IOException {
+      if (!blockCacheEnabled) {
+        return super.createDataFileReader(fs, dataFile, conf);
+      }
+      final int blockSize = conf.getInt("hbase.hstore.blockCache.blockSize",
+          64 * 1024);
+      return new SequenceFile.Reader(fs, dataFile,  conf) {
+        @Override
+        protected FSDataInputStream openFile(FileSystem fs, Path file,
+            int bufferSize, long length) throws IOException {
+          
+          return new FSDataInputStream(new BlockFSInputStream(
+                  super.openFile(fs, file, bufferSize, length), length,
+                  blockSize));
+        }
+      };
+    }
+    
+    @Override
+    public synchronized void finalKey(final WritableComparable fk)
+    throws IOException {
+      Writables.copyWritable(this.finalKey, fk);
+    }
+    
+    /**
+     * @param hsk
+     * @return True if the file *could* contain <code>hsk</code> and false if
+     * outside the bounds of this files' start and end keys.
+     */
+    public boolean containsKey(final HStoreKey hsk) {
+      return this.firstKey.compareTo(hsk) <= 0 &&
+         this.finalKey.compareTo(hsk) >= 0;
+    }
+    
+    public String toString() {
+      HStoreKey midkey = null;
+      try {
+        midkey = (HStoreKey)midKey();
+      } catch (IOException ioe) {
+        LOG.warn("Failed get of midkey", ioe);
+      }
+      return "dirName=" + this.dirName + ", firstKey=" +
+        this.firstKey.toString() + ", midKey=" + midkey +
+          ", finalKey=" + this.finalKey;
+    }
+    
+    /**
+     * @return First key in this file.  Can be null around construction time.
+     */
+    public HStoreKey getFirstKey() {
+      return this.firstKey;
+    }
+
+    /**
+     * @return Final key in file.  Can be null around construction time.
+     */
+    public HStoreKey getFinalKey() {
+      return this.finalKey;
+    }
+    
+    @Override
+    public synchronized WritableComparable getClosest(WritableComparable key,
+        Writable value, boolean before)
+    throws IOException {
+      if ((!before && ((HStoreKey)key).compareTo(this.finalKey) > 0) ||
+          (before && ((HStoreKey)key).compareTo(this.firstKey) < 0)) {
+        return null;
+      }
+      return  super.getClosest(key, value, before);
+    }
+  }
+  
+  public static class HBaseWriter extends MapFile.Writer {
+    /**
+     * @param conf
+     * @param fs
+     * @param dirName
+     * @param compression
+     * @param hri
+     * @throws IOException
+     */
+    public HBaseWriter(Configuration conf, FileSystem fs, String dirName,
+        SequenceFile.CompressionType compression, final HRegionInfo hri)
+    throws IOException {
+      super(conf, fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri),
+         VALUE_CLASS, compression);
+      // Default for mapfiles is 128.  Makes random reads faster if we
+      // have more keys indexed and we're not 'next'-ing around in the
+      // mapfile.
+      setIndexInterval(conf.getInt("hbase.io.index.interval", 128));
+    }
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java?rev=711155&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/HalfMapFileReader.java Mon Nov  3 17:28:53 2008
@@ -0,0 +1,228 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.io.Reference.Range;
+import org.apache.hadoop.hbase.util.Writables;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * A facade for a {@link MapFile.Reader} that serves up either the top or
+ * bottom half of a MapFile where 'bottom' is the first half of the file
+ * containing the keys that sort lowest and 'top' is the second half of the
+ * file with keys that sort greater than those of the bottom half.  The top
+ * includes the split files midkey, of the key that follows if it does not
+ * exist in the file.
+ * 
+ * <p>This type works in tandem with the {@link Reference} type.  This class
+ * is used reading while Reference is used writing.
+ * 
+ * <p>This file is not splitable.  Calls to {@link #midKey()} return null.
+ */
+public class HalfMapFileReader extends BloomFilterMapFile.Reader {
+  private final boolean top;
+  private final HStoreKey midkey;
+  private boolean firstNextCall = true;
+  private final WritableComparable<HStoreKey> firstKey;
+  private final WritableComparable<HStoreKey> finalKey;
+  
+  public HalfMapFileReader(final FileSystem fs, final String dirName, 
+      final Configuration conf, final Range r,
+      final WritableComparable<HStoreKey> mk,
+      final HRegionInfo hri)
+  throws IOException {
+    this(fs, dirName, conf, r, mk, false, false, hri);
+  }
+  
+  @SuppressWarnings("unchecked")
+  public HalfMapFileReader(final FileSystem fs, final String dirName, 
+      final Configuration conf, final Range r,
+      final WritableComparable<HStoreKey> mk, final boolean filter,
+      final boolean blockCacheEnabled,
+      final HRegionInfo hri)
+  throws IOException {
+    super(fs, dirName, conf, filter, blockCacheEnabled, hri);
+    // This is not actual midkey for this half-file; its just border
+    // around which we split top and bottom.  Have to look in files to find
+    // actual last and first keys for bottom and top halves.  Half-files don't
+    // have an actual midkey themselves. No midkey is how we indicate file is
+    // not splittable.
+    this.midkey = new HStoreKey((HStoreKey)mk);
+    this.midkey.setHRegionInfo(hri);
+    // Is it top or bottom half?
+    this.top = Reference.isTopFileRegion(r);
+    // Firstkey comes back null if passed midkey is lower than first key in file
+    // and its a bottom half HalfMapFileReader, OR, if midkey is higher than
+    // the final key in the backing file.  In either case, it means this half
+    // file is empty.
+    this.firstKey = this.top?
+      super.getClosest(this.midkey, new ImmutableBytesWritable()):
+      super.getFirstKey().compareTo(this.midkey) > 0?
+        null: super.getFirstKey();
+    this.finalKey = this.top?
+      super.getFinalKey():
+      super.getClosest(new HStoreKey.BeforeThisStoreKey(this.midkey),
+          new ImmutableBytesWritable(), true);
+  }
+  
+  /*
+   * Check key is not bleeding into wrong half of the file.
+   * @param key
+   * @throws IOException
+   */
+  private void checkKey(final WritableComparable<HStoreKey> key)
+  throws IOException {
+    if (top) {
+      if (key.compareTo(midkey) < 0) {
+        throw new IOException("Illegal Access: Key is less than midKey of " +
+        "backing mapfile");
+      }
+    } else if (key.compareTo(midkey) >= 0) {
+      throw new IOException("Illegal Access: Key is greater than or equal " +
+      "to midKey of backing mapfile");
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized void finalKey(WritableComparable key)
+  throws IOException {
+     Writables.copyWritable(this.finalKey, key);
+     return;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized Writable get(WritableComparable key, Writable val)
+  throws IOException {
+    checkKey(key);
+    return super.get(key, val);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized WritableComparable getClosest(WritableComparable key,
+    Writable val)
+  throws IOException {
+    WritableComparable closest = null;
+    if (top) {
+      // If top, the lowest possible key is first key.  Do not have to check
+      // what comes back from super getClosest.  Will return exact match or
+      // greater.
+      closest = (key.compareTo(getFirstKey()) < 0)?
+        getClosest(getFirstKey(), val): super.getClosest(key, val);
+    } else {
+      // We're serving bottom of the file.
+      if (key.compareTo(this.midkey) < 0) {
+        // Check key is within range for bottom.
+        closest = super.getClosest(key, val);
+        // midkey was made against largest store file at time of split. Smaller
+        // store files could have anything in them.  Check return value is
+        // not beyond the midkey (getClosest returns exact match or next
+        // after).
+        if (closest != null && closest.compareTo(this.midkey) >= 0) {
+          // Don't let this value out.
+          closest = null;
+        }
+      }
+      // Else, key is > midkey so let out closest = null.
+    }
+    return closest;
+  }
+
+  @SuppressWarnings({"unused", "unchecked"})
+  @Override
+  public synchronized WritableComparable midKey() throws IOException {
+    // Returns null to indicate file is not splitable.
+    return null;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized boolean next(WritableComparable key, Writable val)
+  throws IOException {
+    if (firstNextCall) {
+      firstNextCall = false;
+      if (isEmpty()) {
+        return false;
+      }
+      // Seek and fill by calling getClosest on first key.
+      if (this.firstKey != null) {
+        Writables.copyWritable(this.firstKey, key);
+        WritableComparable nearest = getClosest(key, val);
+        if (!key.equals(nearest)) {
+          throw new IOException("Keys don't match and should: " +
+              key.toString() + ", " + nearest.toString());
+        }
+      }
+      return true;
+    }
+    boolean result = super.next(key, val);
+    if (!top && key.compareTo(midkey) >= 0) {
+      result = false;
+    }
+    return result;
+  }
+  
+  private boolean isEmpty() {
+    return this.firstKey == null;
+  }
+
+  @Override
+  public synchronized void reset() throws IOException {
+    if (top) {
+      firstNextCall = true;
+      // I don't think this is needed. seek(this.firstKey);
+      return;
+    }
+    super.reset();
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public synchronized boolean seek(WritableComparable key)
+  throws IOException {
+    checkKey(key);
+    return super.seek(key);
+  }
+  
+  @Override
+  public HStoreKey getFirstKey() {
+    return (HStoreKey)this.firstKey;
+  }
+  
+  @Override
+  public HStoreKey getFinalKey() {
+    return (HStoreKey)this.finalKey;
+  }
+  
+  @Override
+  public String toString() {
+    return super.toString() + ", half=" + (top? "top": "bottom");
+  }
+}
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/ImmutableBytesWritable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/ImmutableBytesWritable.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/ImmutableBytesWritable.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/ImmutableBytesWritable.java Mon Nov  3 17:28:53 2008
@@ -79,7 +79,7 @@
    * Get the data from the BytesWritable.
    * @return The data is only valid between 0 and getSize() - 1.
    */
-  public byte[] get() {
+  public byte [] get() {
     if (this.bytes == null) {
       throw new IllegalStateException("Uninitialiized. Null constructor " +
         "called w/o accompaying readFields invocation");

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/Reference.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/Reference.java?rev=711155&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/Reference.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/io/Reference.java Mon Nov  3 17:28:53 2008
@@ -0,0 +1,117 @@
+/**
+ * 
+ */
+package org.apache.hadoop.hbase.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * A reference to a part of a store file.  The file referenced usually lives
+ * under a different region.  The part referenced is usually the top or bottom
+ * half of the file.  References are made at region split time.  Being lazy
+ * about copying data between the parent of the split and the split daughters
+ * makes splitting faster.
+ * 
+ * <p>References work with {@link HalfMapFileReader}.  References know how to
+ * write out the reference format in the file system and are whats juggled when
+ * references are mixed in with direct store files.  The
+ * {@link HalfMapFileReader} is used reading the referred to file.
+ *
+ * <p>References to store files located over in some other region look like
+ * this in the file system
+ * <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
+ * i.e. an id followed by the name of the referenced region.  The data
+ * ('mapfiles') of references are empty. The accompanying <code>info</code> file
+ * contains the <code>midkey</code> that demarks top and bottom of the
+ * referenced storefile, the id of the remote store we're referencing and
+ * whether we're to serve the top or bottom region of the remote store file.
+ * Note, a region is itself not splitable if it has instances of store file
+ * references.  References are cleaned up by compactions.
+ */
+public class Reference implements Writable {
+  // TODO: see if it makes sense making a ReferenceMapFile whose Writer is this
+  // class and whose Reader is the {@link HalfMapFileReader}.
+
+  private int encodedRegionName;
+  private long fileid;
+  private Range region;
+  private HStoreKey midkey;
+  
+  /** 
+   * For split HStoreFiles, it specifies if the file covers the lower half or
+   * the upper half of the key range
+   */
+  public static enum Range {
+    /** HStoreFile contains upper half of key range */
+    top,
+    /** HStoreFile contains lower half of key range */
+    bottom
+  }
+  
+ public Reference(final int ern, final long fid, final HStoreKey m,
+      final Range fr) {
+    this.encodedRegionName = ern;
+    this.fileid = fid;
+    this.region = fr;
+    this.midkey = m;
+  }
+  
+ public Reference() {
+    this(-1, -1, null, Range.bottom);
+  }
+
+  public long getFileId() {
+    return fileid;
+  }
+
+  public Range getFileRegion() {
+    return region;
+  }
+  
+  public HStoreKey getMidkey() {
+    return midkey;
+  }
+  
+  public int getEncodedRegionName() {
+    return this.encodedRegionName;
+  }
+
+  @Override
+  public String toString() {
+    return encodedRegionName + "/" + fileid + "/" + region;
+  }
+
+  // Make it serializable.
+
+  public void write(DataOutput out) throws IOException {
+    // Write out the encoded region name as a String.  Doing it as a String
+    // keeps a Reference's serialization backword compatible with
+    // pre-HBASE-82 serializations.  ALternative is rewriting all
+    // info files in hbase (Serialized References are written into the
+    // 'info' file that accompanies HBase Store files).
+    out.writeUTF(Integer.toString(encodedRegionName));
+    out.writeLong(fileid);
+    // Write true if we're doing top of the file.
+    out.writeBoolean(isTopFileRegion(region));
+    this.midkey.write(out);
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    this.encodedRegionName = Integer.parseInt(in.readUTF());
+    fileid = in.readLong();
+    boolean tmp = in.readBoolean();
+    // If true, set region to top.
+    region = tmp? Range.top: Range.bottom;
+    midkey = new HStoreKey();
+    midkey.readFields(in);
+  }
+  
+  public static boolean isTopFileRegion(final Range r) {
+    return r.equals(Range.top);
+  }
+}
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Mon Nov  3 17:28:53 2008
@@ -1,5 +1,5 @@
   /**
- * Copyright 2007 The Apache Software Foundation
+ * Copyright 2008 The Apache Software Foundation
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -57,6 +57,7 @@
 import org.apache.hadoop.hbase.io.BatchUpdate;
 import org.apache.hadoop.hbase.io.Cell;
 import org.apache.hadoop.hbase.io.HbaseMapWritable;
+import org.apache.hadoop.hbase.io.Reference;
 import org.apache.hadoop.hbase.io.RowResult;
 import org.apache.hadoop.hbase.ipc.HRegionInterface;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -775,15 +776,15 @@
       // Split each store file.
       for(HStoreFile h: hstoreFilesToSplit) {
         // A reference to the bottom half of the hsf store file.
-        HStoreFile.Reference aReference = new HStoreFile.Reference(
+        Reference aReference = new Reference(
             this.regionInfo.getEncodedName(), h.getFileId(),
-            new HStoreKey(midKey, this.regionInfo), HStoreFile.Range.bottom);
+            new HStoreKey(midKey, this.regionInfo), Reference.Range.bottom);
         HStoreFile a = new HStoreFile(this.conf, fs, splits,
             regionAInfo, h.getColFamily(), -1, aReference);
         // Reference to top half of the hsf store file.
-        HStoreFile.Reference bReference = new HStoreFile.Reference(
+        Reference bReference = new Reference(
             this.regionInfo.getEncodedName(), h.getFileId(),
-            new HStoreKey(midKey, this.regionInfo), HStoreFile.Range.top);
+            new HStoreKey(midKey, this.regionInfo), Reference.Range.top);
         HStoreFile b = new HStoreFile(this.conf, fs, splits,
             regionBInfo, h.getColFamily(), -1, bReference);
         h.splitStoreFile(a, b, this.fs);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Mon Nov  3 17:28:53 2008
@@ -49,13 +49,15 @@
 import org.apache.hadoop.hbase.HStoreKey;
 import org.apache.hadoop.hbase.RemoteExceptionHandler;
 import org.apache.hadoop.hbase.filter.RowFilterInterface;
+import org.apache.hadoop.hbase.io.BloomFilterMapFile;
 import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.io.HBaseMapFile;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.Reference;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.Progressable;
 import org.apache.hadoop.util.StringUtils;
 
@@ -191,9 +193,11 @@
     }
     this.desiredMaxFileSize = maxFileSize;
 
-    this.majorCompactionTime = conf.getLong(HConstants.MAJOR_COMPACTION_PERIOD, 86400000);
+    this.majorCompactionTime =
+      conf.getLong(HConstants.MAJOR_COMPACTION_PERIOD, 86400000);
     if (family.getValue(HConstants.MAJOR_COMPACTION_PERIOD) != null) {
-      String strCompactionTime = family.getValue(HConstants.MAJOR_COMPACTION_PERIOD);
+      String strCompactionTime =
+        family.getValue(HConstants.MAJOR_COMPACTION_PERIOD);
       this.majorCompactionTime = (new Long(strCompactionTime)).longValue();
     }
 
@@ -209,16 +213,10 @@
       this.compression = SequenceFile.CompressionType.NONE;
     }
     
-    Path mapdir = HStoreFile.getMapDir(basedir, info.getEncodedName(),
-        family.getName());
-    if (!fs.exists(mapdir)) {
-      fs.mkdirs(mapdir);
-    }
-    Path infodir = HStoreFile.getInfoDir(basedir, info.getEncodedName(),
-        family.getName());
-    if (!fs.exists(infodir)) {
-      fs.mkdirs(infodir);
-    }
+    Path mapdir = checkdir(HStoreFile.getMapDir(basedir, info.getEncodedName(),
+        family.getName()));
+    Path infodir = checkdir(HStoreFile.getInfoDir(basedir, info.getEncodedName(),
+        family.getName()));
     
     // Go through the 'mapdir' and 'infodir' together, make sure that all 
     // MapFiles are in a reliable state.  Every entry in 'mapdir' must have a 
@@ -232,27 +230,19 @@
         Bytes.toString(this.storeName) + ", max sequence id " + this.maxSeqId);
     }
     
-    try {
-      doReconstructionLog(reconstructionLog, maxSeqId, reporter);
-    } catch (EOFException e) {
-      // Presume we got here because of lack of HADOOP-1700; for now keep going
-      // but this is probably not what we want long term.  If we got here there
-      // has been data-loss
-      LOG.warn("Exception processing reconstruction log " + reconstructionLog +
-        " opening " + this.storeName +
-        " -- continuing.  Probably lack-of-HADOOP-1700 causing DATA LOSS!", e);
-    } catch (IOException e) {
-      // Presume we got here because of some HDFS issue. Don't just keep going.
-      // Fail to open the HStore.  Probably means we'll fail over and over
-      // again until human intervention but alternative has us skipping logs
-      // and losing edits: HBASE-642.
-      LOG.warn("Exception processing reconstruction log " + reconstructionLog +
-        " opening " + this.storeName, e);
-      throw e;
-    }
-
-    // Finally, start up all the map readers! (There could be more than one
-    // since we haven't compacted yet.)
+    // Do reconstruction log.
+    runReconstructionLog(reconstructionLog, this.maxSeqId, reporter);
+    
+    // Finally, start up all the map readers!
+    setupReaders();
+  }
+  
+  /*
+   * Setup the mapfile readers for this store. There could be more than one
+   * since we haven't compacted yet.
+   * @throws IOException
+   */
+  private void setupReaders() throws IOException {
     boolean first = true;
     for(Map.Entry<Long, HStoreFile> e: this.storefiles.entrySet()) {
       MapFile.Reader r = null;
@@ -270,6 +260,18 @@
     }
   }
 
+  /*
+   * @param dir If doesn't exist, create it.
+   * @return Passed <code>dir</code>.
+   * @throws IOException
+   */
+  private Path checkdir(final Path dir) throws IOException {
+    if (!fs.exists(dir)) {
+      fs.mkdirs(dir);
+    }
+    return dir;
+  }
+
   HColumnDescriptor getFamily() {
     return this.family;
   }
@@ -279,6 +281,36 @@
   }
   
   /*
+   * Run reconstuction log
+   * @param reconstructionLog
+   * @param msid
+   * @param reporter
+   * @throws IOException
+   */
+  private void runReconstructionLog(final Path reconstructionLog,
+    final long msid, final Progressable reporter)
+  throws IOException {
+    try {
+      doReconstructionLog(reconstructionLog, msid, reporter);
+    } catch (EOFException e) {
+      // Presume we got here because of lack of HADOOP-1700; for now keep going
+      // but this is probably not what we want long term.  If we got here there
+      // has been data-loss
+      LOG.warn("Exception processing reconstruction log " + reconstructionLog +
+        " opening " + this.storeName +
+        " -- continuing.  Probably lack-of-HADOOP-1700 causing DATA LOSS!", e);
+    } catch (IOException e) {
+      // Presume we got here because of some HDFS issue. Don't just keep going.
+      // Fail to open the HStore.  Probably means we'll fail over and over
+      // again until human intervention but alternative has us skipping logs
+      // and losing edits: HBASE-642.
+      LOG.warn("Exception processing reconstruction log " + reconstructionLog +
+        " opening " + this.storeName, e);
+      throw e;
+    }
+  }
+
+  /*
    * Read the reconstructionLog to see whether we need to build a brand-new 
    * MapFile out of non-flushed log entries.  
    *
@@ -396,7 +428,7 @@
       long fid = Long.parseLong(m.group(1));
       
       HStoreFile curfile = null;
-      HStoreFile.Reference reference = null;
+      Reference reference = null;
       if (isReference) {
         reference = HStoreFile.readSplitInfo(p, fs);
       }
@@ -437,7 +469,7 @@
           // TODO: This is going to fail if we are to rebuild a file from
           // meta because it won't have right comparator: HBASE-848.
           long count = MapFile.fix(this.fs, mapfile, HStoreKey.class,
-            HStoreFile.HbaseMapFile.VALUE_CLASS, false, this.conf);
+            HBaseMapFile.VALUE_CLASS, false, this.conf);
           if (LOG.isDebugEnabled()) {
             LOG.debug("Fixed index on " + mapfile.toString() + "; had " +
               count + " entries");
@@ -448,21 +480,33 @@
           continue;
         }
       }
-      storeSize += curfile.length();
+      long length = curfile.length();
+      storeSize += length;
       
       // TODO: Confirm referent exists.
       
       // Found map and sympathetic info file.  Add this hstorefile to result.
       if (LOG.isDebugEnabled()) {
         LOG.debug("loaded " + FSUtils.getPath(p) + ", isReference=" +
-          isReference + ", sequence id=" + storeSeqId);
+          isReference + ", sequence id=" + storeSeqId + ", length=" + length);
       }
       results.put(Long.valueOf(storeSeqId), curfile);
       // Keep list of sympathetic data mapfiles for cleaning info dir in next
       // section.  Make sure path is fully qualified for compare.
       mapfiles.add(this.fs.makeQualified(mapfile));
     }
-    
+    cleanDataFiles(mapfiles, mapdir);
+    return results;
+  }
+  
+  /*
+   * If no info file delete the sympathetic data file.
+   * @param mapfiles List of mapfiles.
+   * @param mapdir Directory to check.
+   * @throws IOException
+   */
+  private void cleanDataFiles(final List<Path> mapfiles, final Path mapdir)
+  throws IOException {
     // List paths by experience returns fully qualified names -- at least when
     // running on a mini hdfs cluster.
     FileStatus [] datfiles = fs.listStatus(mapdir);
@@ -474,7 +518,6 @@
         fs.delete(p, true);
       }
     }
-    return results;
   }
 
   /* 
@@ -732,19 +775,20 @@
    * @param dir
    * @throws IOException
    */
-  private static long getLowestTimestamp(FileSystem fs, Path dir) throws IOException {
-   FileStatus[] stats = fs.listStatus(dir);
-   if (stats == null || stats.length == 0) {
-     return 0l;
-   }
-   long lowTimestamp = Long.MAX_VALUE;   
-   for (int i = 0; i < stats.length; i++) {
-     long timestamp = stats[i].getModificationTime();
-     if (timestamp < lowTimestamp){
-       lowTimestamp = timestamp;
-     }
-   }
-  return lowTimestamp;
+  private static long getLowestTimestamp(FileSystem fs, Path dir)
+  throws IOException {
+    FileStatus[] stats = fs.listStatus(dir);
+    if (stats == null || stats.length == 0) {
+      return 0l;
+    }
+    long lowTimestamp = Long.MAX_VALUE;   
+    for (int i = 0; i < stats.length; i++) {
+      long timestamp = stats[i].getModificationTime();
+      if (timestamp < lowTimestamp){
+        lowTimestamp = timestamp;
+      }
+    }
+    return lowTimestamp;
   }
 
   /**
@@ -768,12 +812,11 @@
    * @return mid key if a split is needed, null otherwise
    * @throws IOException
    */
-  StoreSize compact(boolean majorCompaction) throws IOException {
+  StoreSize compact(final boolean majorCompaction) throws IOException {
     boolean forceSplit = this.info.shouldSplit(false);
     boolean doMajorCompaction = majorCompaction;
     synchronized (compactLock) {
       long maxId = -1;
-      int nrows = -1;
       List<HStoreFile> filesToCompact = null;
       synchronized (storefiles) {
         if (this.storefiles.size() <= 0) {
@@ -791,18 +834,7 @@
       // compacting below. Only check if doMajorCompaction is not true.
       long lastMajorCompaction = 0L;
       if (!doMajorCompaction) {
-        Path mapdir = HStoreFile.getMapDir(basedir, info.getEncodedName(), family.getName());
-        long lowTimestamp = getLowestTimestamp(fs, mapdir);
-        lastMajorCompaction = System.currentTimeMillis() - lowTimestamp;
-        if (lowTimestamp < (System.currentTimeMillis() - majorCompactionTime) &&
-            lowTimestamp > 0l) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Major compaction triggered on store: " + this.storeNameStr +
-              ". Time since last major compaction: " +
-              ((System.currentTimeMillis() - lowTimestamp)/1000) + " seconds");
-          }
-          doMajorCompaction = true;
-        }
+        doMajorCompaction = isMajorCompaction();
       }
       if (!doMajorCompaction && !hasReferences(filesToCompact) &&
           filesToCompact.size() < compactionThreshold) {
@@ -813,7 +845,7 @@
         return checkSplit(forceSplit);
       }
 
-      // HBASE-745, preparing all store file size for incremental compacting
+      // HBASE-745, preparing all store file sizes for incremental compacting
       // selection.
       int countOfFiles = filesToCompact.size();
       long totalSize = 0;
@@ -866,25 +898,7 @@
        * based access pattern is practically designed to ruin the cache.
        */
       List<MapFile.Reader> rdrs = new ArrayList<MapFile.Reader>();
-      for (HStoreFile file: filesToCompact) {
-        try {
-          HStoreFile.BloomFilterMapFile.Reader reader =
-            file.getReader(fs, false, false);
-          rdrs.add(reader);
-          
-          // Compute the size of the new bloomfilter if needed
-          if (this.family.isBloomfilter()) {
-            nrows += reader.getBloomFilterSize();
-          }
-        } catch (IOException e) {
-          // Add info about which file threw exception. It may not be in the
-          // exception message so output a message here where we know the
-          // culprit.
-          LOG.warn("Failed with " + e.toString() + ": " + file.toString());
-          closeCompactionReaders(rdrs);
-          throw e;
-        }
-      }
+      int nrows = createReaders(rdrs, filesToCompact);
 
       // Step through them, writing to the brand-new MapFile
       HStoreFile compactedOutputFile = new HStoreFile(conf, fs, 
@@ -915,9 +929,75 @@
       }
     }
     return checkSplit(forceSplit);
-  }
+  }  
 
   /*
+   * @return True if we should run a major compaction.
+   */
+  private boolean isMajorCompaction() throws IOException {
+    boolean result = false;
+    Path mapdir = HStoreFile.getMapDir(this.basedir, this.info.getEncodedName(),
+        this.family.getName());
+    long lowTimestamp = getLowestTimestamp(fs, mapdir);
+    if (lowTimestamp < (System.currentTimeMillis() - this.majorCompactionTime) &&
+        lowTimestamp > 0l) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Major compaction triggered on store: " +
+          this.storeNameStr + ". Time since last major compaction: " +
+          ((System.currentTimeMillis() - lowTimestamp)/1000) + " seconds");
+      }
+      result = true;
+    }
+    return result;
+  }
+  
+  /*
+   * Create readers for the passed in list of HStoreFiles and add them to
+   * <code>readers</code> list.
+   * @param readers Add Readers here.
+   * @param files List of HSFs to make Readers for.
+   * @return Count of rows for bloom filter sizing.  Returns -1 if no bloom
+   * filter wanted.
+   */
+  private int createReaders(final List<MapFile.Reader> rs,
+    final List<HStoreFile> files)
+  throws IOException {
+    /* We create a new list of MapFile.Reader objects so we don't screw up
+     * the caching associated with the currently-loaded ones. Our iteration-
+     * based access pattern is practically designed to ruin the cache.
+     */
+    int nrows = -1;
+    for (HStoreFile file: files) {
+      try {
+        // TODO: Readers are opened without block-cache enabled.  Means we don't
+        // get the prefetch that makes the read faster.  But we don't want to
+        // enable block-cache for these readers that are about to be closed.
+        // The compaction of soon-to-be closed readers will probably force out
+        // blocks that may be needed servicing real-time requests whereas
+        // compaction runs in background.  TODO: We know we're going to read
+        // this file straight through.  Leverage this fact.  Use a big buffer
+        // client side to speed things up or read it all up into memory one file
+        // at a time or pull local and memory-map the file but leave the writer
+        // up in hdfs?
+        BloomFilterMapFile.Reader reader = file.getReader(fs, false, false);
+        rs.add(reader);
+        // Compute the size of the new bloomfilter if needed
+        if (this.family.isBloomfilter()) {
+          nrows += reader.getBloomFilterSize();
+        }
+      } catch (IOException e) {
+        // Add info about which file threw exception. It may not be in the
+        // exception message so output a message here where we know the
+        // culprit.
+        LOG.warn("Failed with " + e.toString() + ": " + file.toString());
+        closeCompactionReaders(rs);
+        throw e;
+      }
+    }
+    return nrows;
+  }
+  
+  /*
    * Compact a list of MapFile.Readers into MapFile.Writer.
    * 
    * We work by iterating through the readers in parallel. We always increment
@@ -1166,7 +1246,6 @@
         // time
         getFullFromMapFile(map, key, columns, deletes, results);
       }
-      
     } finally {
       this.lock.readLock().unlock();
     }
@@ -1203,7 +1282,7 @@
             // recent delete timestamp, record it for later
             if (!deletes.containsKey(readcol) 
               || deletes.get(readcol).longValue() < readkey.getTimestamp()) {
-              deletes.put(readcol, readkey.getTimestamp());              
+              deletes.put(readcol, Long.valueOf(readkey.getTimestamp()));              
             }
           } else if (!(deletes.containsKey(readcol) 
             && deletes.get(readcol).longValue() >= readkey.getTimestamp()) ) {
@@ -1377,7 +1456,8 @@
    * @return Matching keys.
    * @throws IOException
    */
-  public List<HStoreKey> getKeys(final HStoreKey origin, final int versions, final long now)
+  public List<HStoreKey> getKeys(final HStoreKey origin, final int versions,
+    final long now)
   throws IOException {
     // This code below is very close to the body of the get method.  Any 
     // changes in the flow below should also probably be done in get.  TODO:
@@ -1788,6 +1868,9 @@
    * @throws IOException
    */
   private HStoreKey getFinalKey(final MapFile.Reader mf) throws IOException {
+    if (mf instanceof HBaseMapFile.HBaseReader) {
+      return ((HBaseMapFile.HBaseReader)mf).getFinalKey();
+    }
     HStoreKey finalKey = new HStoreKey(); 
     mf.finalKey(finalKey);
     finalKey.setHRegionInfo(this.info);
@@ -1839,10 +1922,10 @@
   
   /**
    * Determines if HStore can be split
-   * 
+   * @param force Whether to force a split or not.
    * @return a StoreSize if store can be split, null otherwise
    */
-  StoreSize checkSplit(boolean force) {
+  StoreSize checkSplit(final boolean force) {
     if (this.storefiles.size() <= 0) {
       return null;
     }
@@ -1859,39 +1942,35 @@
       synchronized (storefiles) {
         for (Map.Entry<Long, HStoreFile> e: storefiles.entrySet()) {
           HStoreFile curHSF = e.getValue();
+          if (splitable) {
+            splitable = !curHSF.isReference();
+            if (!splitable) {
+              // RETURN IN MIDDLE OF FUNCTION!!! If not splitable, just return.
+              return null;
+            }
+          }
           long size = curHSF.length();
           if (size > maxSize) {
             // This is the largest one so far
             maxSize = size;
             mapIndex = e.getKey();
           }
-          if (splitable) {
-            splitable = !curHSF.isReference();
-          }
         }
       }
-      if (!splitable) {
-        return null;
-      }
-      MapFile.Reader r = this.readers.get(mapIndex);
 
-      // seek back to the beginning of mapfile
-      r.reset();
-
-      // get the first and last keys
-      HStoreKey firstKey = new HStoreKey();
-      HStoreKey lastKey = new HStoreKey();
-      Writable value = new ImmutableBytesWritable();
-      r.next(firstKey, value);
-      r.finalKey(lastKey);
+      // Cast to HbaseReader.
+      HBaseMapFile.HBaseReader r =
+        (HBaseMapFile.HBaseReader)this.readers.get(mapIndex);
 
       // get the midkey
       HStoreKey mk = (HStoreKey)r.midKey();
       if (mk != null) {
         // if the midkey is the same as the first and last keys, then we cannot
         // (ever) split this region. 
-        if (HStoreKey.equalsTwoRowKeys(info, mk.getRow(), firstKey.getRow()) && 
-            HStoreKey.equalsTwoRowKeys(info, mk.getRow(), lastKey.getRow())) {
+        if (HStoreKey.equalsTwoRowKeys(info, mk.getRow(),
+              r.getFirstKey().getRow()) && 
+            HStoreKey.equalsTwoRowKeys(info, mk.getRow(),
+              r.getFinalKey().getRow())) {
           return null;
         }
         return new StoreSize(maxSize, mk.getRow());
@@ -1967,6 +2046,9 @@
     return this.storefiles.size();
   }
   
+  /*
+   * Datastructure that holds size and key.
+   */
   class StoreSize {
     private final long size;
     private final byte[] key;
@@ -1988,4 +2070,4 @@
   HRegionInfo getHRegionInfo() {
     return this.info;
   }
-}
+}
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java Mon Nov  3 17:28:53 2008
@@ -19,19 +19,13 @@
  */
 package org.apache.hadoop.hbase.regionserver;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInput;
 import java.io.DataInputStream;
-import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -39,18 +33,12 @@
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
-import org.apache.hadoop.hbase.HStoreKey;
-import org.apache.hadoop.hbase.io.BlockFSInputStream;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.BloomFilterMapFile;
+import org.apache.hadoop.hbase.io.HalfMapFileReader;
+import org.apache.hadoop.hbase.io.Reference;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.Hash;
-import org.apache.hadoop.hbase.util.Writables;
 import org.apache.hadoop.io.MapFile;
 import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.WritableComparable;
-import org.onelab.filter.BloomFilter;
-import org.onelab.filter.Key;
 
 /**
  * A HStore data file.  HStores usually have one or more of these files.  They
@@ -99,17 +87,6 @@
   static final String HSTORE_INFO_DIR = "info";
   static final String HSTORE_FILTER_DIR = "filter";
   
-  /** 
-   * For split HStoreFiles, specifies if the file covers the lower half or
-   * the upper half of the key range
-   */
-  public static enum Range {
-    /** HStoreFile contains upper half of key range */
-    top,
-    /** HStoreFile contains lower half of key range */
-    bottom
-  }
-  
   private final static Random rand = new Random();
 
   private final Path basedir;
@@ -154,8 +131,7 @@
     this.fileId = id;
     
     // If a reference, construction does not write the pointer files.  Thats
-    // done by invocations of writeReferenceFiles(hsf, fs).  Happens at fast
-    // split time.
+    // done by invocations of writeReferenceFiles(hsf, fs). Happens at split.
     this.reference = ref;
   }
 
@@ -287,11 +263,11 @@
   /**
    * @see #writeSplitInfo(FileSystem fs)
    */
-  static HStoreFile.Reference readSplitInfo(final Path p, final FileSystem fs)
+  static Reference readSplitInfo(final Path p, final FileSystem fs)
   throws IOException {
     FSDataInputStream in = fs.open(p);
     try {
-      HStoreFile.Reference r = new HStoreFile.Reference();
+      Reference r = new Reference();
       r.readFields(in);
       return r;
     } finally {
@@ -319,7 +295,8 @@
   long loadInfo(FileSystem fs) throws IOException {
     Path p = null;
     if (isReference()) {
-      p = getInfoFilePath(reference.getEncodedRegionName(), reference.getFileId());
+      p = getInfoFilePath(reference.getEncodedRegionName(),
+        this.reference.getFileId());
     } else {
       p = getInfoFilePath();
     }
@@ -405,7 +382,7 @@
       final boolean bloomFilter, final boolean blockCacheEnabled)
   throws IOException {
     if (isReference()) {
-      return new HStoreFile.HalfMapFileReader(fs,
+      return new HalfMapFileReader(fs,
           getMapFilePath(reference).toString(), conf, 
           reference.getFileRegion(), reference.getMidkey(), bloomFilter,
           blockCacheEnabled, this.hri);
@@ -453,10 +430,6 @@
     return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +
       (isReference()? "-" + reference.toString(): "");
   }
-  
-  static boolean isTopFileRegion(final Range r) {
-    return r.equals(Range.top);
-  }
 
   private static String createHStoreFilename(final long fid,
       final int encodedRegionName) {
@@ -510,525 +483,4 @@
     return new Path(base, new Path(Integer.toString(encodedRegionName),
         new Path(Bytes.toString(f), subdir)));
   }
-
-  /*
-   * Data structure to hold reference to a store file over in another region.
-   */
-  static class Reference implements Writable {
-    private int encodedRegionName;
-    private long fileid;
-    private Range region;
-    private HStoreKey midkey;
-    
-    Reference(final int ern, final long fid, final HStoreKey m,
-        final Range fr) {
-      this.encodedRegionName = ern;
-      this.fileid = fid;
-      this.region = fr;
-      this.midkey = m;
-    }
-    
-    Reference() {
-      this(-1, -1, null, Range.bottom);
-    }
-
-    long getFileId() {
-      return fileid;
-    }
-
-    Range getFileRegion() {
-      return region;
-    }
-    
-    HStoreKey getMidkey() {
-      return midkey;
-    }
-    
-    int getEncodedRegionName() {
-      return this.encodedRegionName;
-    }
-   
-    @Override
-    public String toString() {
-      return encodedRegionName + "/" + fileid + "/" + region;
-    }
-
-    // Make it serializable.
-
-    public void write(DataOutput out) throws IOException {
-      // Write out the encoded region name as a String.  Doing it as a String
-      // keeps a Reference's serialziation backword compatible with
-      // pre-HBASE-82 serializations.  ALternative is rewriting all
-      // info files in hbase (Serialized References are written into the
-      // 'info' file that accompanies HBase Store files).
-      out.writeUTF(Integer.toString(encodedRegionName));
-      out.writeLong(fileid);
-      // Write true if we're doing top of the file.
-      out.writeBoolean(isTopFileRegion(region));
-      midkey.write(out);
-    }
-
-    public void readFields(DataInput in) throws IOException {
-      this.encodedRegionName = Integer.parseInt(in.readUTF());
-      fileid = in.readLong();
-      boolean tmp = in.readBoolean();
-      // If true, set region to top.
-      region = tmp? Range.top: Range.bottom;
-      midkey = new HStoreKey();
-      midkey.readFields(in);
-    }
-  }
-
-  /**
-   * Hbase customizations of MapFile.
-   */
-  static class HbaseMapFile extends MapFile {
-    static final Class<? extends Writable>  VALUE_CLASS =
-      ImmutableBytesWritable.class;
-
-    /**
-     * Custom bloom filter key maker.
-     * @param key
-     * @return Key made of bytes of row only.
-     */
-    protected static Key getBloomFilterKey(WritableComparable key) {
-      return new Key(((HStoreKey) key).getRow());
-    }
-
-    /**
-     * A reader capable of reading and caching blocks of the data file.
-     */
-    static class HbaseReader extends MapFile.Reader {
-
-      private final boolean blockCacheEnabled;
-
-      /**
-       * @param fs
-       * @param dirName
-       * @param conf
-       * @param hri
-       * @throws IOException
-       */
-      public HbaseReader(FileSystem fs, String dirName, Configuration conf,
-        HRegionInfo hri)
-      throws IOException {
-        this(fs, dirName, conf, false, hri);
-      }
-      
-      /**
-       * @param fs
-       * @param dirName
-       * @param conf
-       * @param blockCacheEnabled
-       * @param hri
-       * @throws IOException
-       */
-      public HbaseReader(FileSystem fs, String dirName, Configuration conf,
-          boolean blockCacheEnabled, HRegionInfo hri)
-      throws IOException {
-        super(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), 
-            conf, false); // defer opening streams
-        this.blockCacheEnabled = blockCacheEnabled;
-        open(fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri), conf);
-        
-        // Force reading of the mapfile index by calling midKey.
-        // Reading the index will bring the index into memory over
-        // here on the client and then close the index file freeing
-        // up socket connection and resources in the datanode. 
-        // Usually, the first access on a MapFile.Reader will load the
-        // index force the issue in HStoreFile MapFiles because an
-        // access may not happen for some time; meantime we're
-        // using up datanode resources.  See HADOOP-2341.
-        midKey();
-      }
-
-      @Override
-      protected org.apache.hadoop.io.SequenceFile.Reader createDataFileReader(
-          FileSystem fs, Path dataFile, Configuration conf)
-      throws IOException {
-        if (!blockCacheEnabled) {
-          return super.createDataFileReader(fs, dataFile, conf);
-        }
-        LOG.info("Block Cache enabled");
-        final int blockSize = conf.getInt("hbase.hstore.blockCache.blockSize",
-            64 * 1024);
-        return new SequenceFile.Reader(fs, dataFile,  conf) {
-          @Override
-          protected FSDataInputStream openFile(FileSystem fs, Path file,
-              int bufferSize, long length) throws IOException {
-            
-            return new FSDataInputStream(new BlockFSInputStream(
-                    super.openFile(fs, file, bufferSize, length), length,
-                    blockSize));
-          }
-        };
-      }
-    }
-    
-    static class HbaseWriter extends MapFile.Writer {
-      /**
-       * @param conf
-       * @param fs
-       * @param dirName
-       * @param compression
-       * @param hri
-       * @throws IOException
-       */
-      public HbaseWriter(Configuration conf, FileSystem fs, String dirName,
-          SequenceFile.CompressionType compression, final HRegionInfo hri)
-      throws IOException {
-        super(conf, fs, dirName, new HStoreKey.HStoreKeyWritableComparator(hri),
-           VALUE_CLASS, compression);
-        // Default for mapfiles is 128.  Makes random reads faster if we
-        // have more keys indexed and we're not 'next'-ing around in the
-        // mapfile.
-        setIndexInterval(conf.getInt("hbase.io.index.interval", 128));
-      }
-    }
-  }
-  
-  /**
-   * On write, all keys are added to a bloom filter.  On read, all keys are
-   * tested first against bloom filter. Keys are HStoreKey.  If passed bloom
-   * filter is null, just passes invocation to parent.
-   */
-  static class BloomFilterMapFile extends HbaseMapFile {
-    protected static final String BLOOMFILTER_FILE_NAME = "filter";
-
-    static class Reader extends HbaseReader {
-      private final BloomFilter bloomFilter;
-
-      /**
-       * @param fs
-       * @param dirName
-       * @param conf
-       * @param filter
-       * @param blockCacheEnabled
-       * @param hri
-       * @throws IOException
-       */
-      public Reader(FileSystem fs, String dirName, Configuration conf,
-          final boolean filter, final boolean blockCacheEnabled, 
-          HRegionInfo hri)
-      throws IOException {
-        super(fs, dirName, conf, blockCacheEnabled, hri);
-        if (filter) {
-          this.bloomFilter = loadBloomFilter(fs, dirName);
-        } else {
-          this.bloomFilter = null;
-        }
-      }
-
-      private BloomFilter loadBloomFilter(FileSystem fs, String dirName)
-      throws IOException {
-        Path filterFile = new Path(dirName, BLOOMFILTER_FILE_NAME);
-        if(!fs.exists(filterFile)) {
-          throw new FileNotFoundException("Could not find bloom filter: " +
-              filterFile);
-        }
-        BloomFilter filter = new BloomFilter();
-        FSDataInputStream in = fs.open(filterFile);
-        try {
-          filter.readFields(in);
-        } finally {
-          in.close();
-        }
-        return filter;
-      }
-      
-      @Override
-      public Writable get(WritableComparable key, Writable val)
-      throws IOException {
-        if (bloomFilter == null) {
-          return super.get(key, val);
-        }
-        if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("bloom filter reported that key exists");
-          }
-          return super.get(key, val);
-        }
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("bloom filter reported that key does not exist");
-        }
-        return null;
-      }
-
-      @Override
-      public WritableComparable getClosest(WritableComparable key,
-          Writable val) throws IOException {
-        if (bloomFilter == null) {
-          return super.getClosest(key, val);
-        }
-        // Note - the key being passed to us is always a HStoreKey
-        if(bloomFilter.membershipTest(getBloomFilterKey(key))) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("bloom filter reported that key exists");
-          }
-          return super.getClosest(key, val);
-        }
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("bloom filter reported that key does not exist");
-        }
-        return null;
-      }
-      
-      /* @return size of the bloom filter */
-      int getBloomFilterSize() {
-        return bloomFilter == null ? 0 : bloomFilter.getVectorSize();
-      }
-    }
-    
-    static class Writer extends HbaseWriter {
-      private static final double DEFAULT_NUMBER_OF_HASH_FUNCTIONS = 4.0;
-      private final BloomFilter bloomFilter;
-      private final String dirName;
-      private final FileSystem fs;
-      
-      /**
-       * @param conf
-       * @param fs
-       * @param dirName
-       * @param compression
-       * @param filter
-       * @param nrows
-       * @param hri
-       * @throws IOException
-       */
-      @SuppressWarnings("unchecked")
-      public Writer(Configuration conf, FileSystem fs, String dirName,
-        SequenceFile.CompressionType compression, final boolean filter,
-        int nrows, final HRegionInfo hri)
-      throws IOException {
-        super(conf, fs, dirName, compression, hri);
-        this.dirName = dirName;
-        this.fs = fs;
-        if (filter) {
-          /* 
-           * There is no way to automatically determine the vector size and the
-           * number of hash functions to use. In particular, bloom filters are
-           * very sensitive to the number of elements inserted into them. For
-           * HBase, the number of entries depends on the size of the data stored
-           * in the column. Currently the default region size is 256MB, so the
-           * number of entries is approximately 
-           * 256MB / (average value size for column).
-           * 
-           * If m denotes the number of bits in the Bloom filter (vectorSize),
-           * n denotes the number of elements inserted into the Bloom filter and
-           * k represents the number of hash functions used (nbHash), then
-           * according to Broder and Mitzenmacher,
-           * 
-           * ( http://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/BloomFilterSurvey.pdf )
-           * 
-           * the probability of false positives is minimized when k is
-           * approximately m/n ln(2).
-           * 
-           * If we fix the number of hash functions and know the number of
-           * entries, then the optimal vector size m = (k * n) / ln(2)
-           */
-          this.bloomFilter = new BloomFilter(
-              (int) Math.ceil(
-                  (DEFAULT_NUMBER_OF_HASH_FUNCTIONS * (1.0 * nrows)) /
-                  Math.log(2.0)),
-              (int) DEFAULT_NUMBER_OF_HASH_FUNCTIONS,
-              Hash.getHashType(conf)
-          );
-        } else {
-          this.bloomFilter = null;
-        }
-      }
-
-      @Override
-      public void append(WritableComparable key, Writable val)
-      throws IOException {
-        if (bloomFilter != null) {
-          bloomFilter.add(getBloomFilterKey(key));
-        }
-        super.append(key, val);
-      }
-
-      @Override
-      public synchronized void close() throws IOException {
-        super.close();
-        if (this.bloomFilter != null) {
-          flushBloomFilter();
-        }
-      }
-      
-      /**
-       * Flushes bloom filter to disk
-       * 
-       * @throws IOException
-       */
-      private void flushBloomFilter() throws IOException {
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("flushing bloom filter for " + this.dirName);
-        }
-        FSDataOutputStream out =
-          fs.create(new Path(dirName, BLOOMFILTER_FILE_NAME));
-        try {
-          bloomFilter.write(out);
-        } finally {
-          out.close();
-        }
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("flushed bloom filter for " + this.dirName);
-        }
-      }
-    }
-  }
-  
-  /**
-   * A facade for a {@link MapFile.Reader} that serves up either the top or
-   * bottom half of a MapFile (where 'bottom' is the first half of the file
-   * containing the keys that sort lowest and 'top' is the second half of the
-   * file with keys that sort greater than those of the bottom half).
-   * Subclasses BloomFilterMapFile.Reader in case 
-   * 
-   * <p>This file is not splitable.  Calls to {@link #midKey()} return null.
-   */
-  static class HalfMapFileReader extends BloomFilterMapFile.Reader {
-    private final boolean top;
-    private final WritableComparable midkey;
-    private boolean firstNextCall = true;
-    
-    HalfMapFileReader(final FileSystem fs, final String dirName, 
-        final Configuration conf, final Range r,
-        final WritableComparable midKey,
-        final HRegionInfo hri)
-    throws IOException {
-      this(fs, dirName, conf, r, midKey, false, false, hri);
-    }
-    
-    HalfMapFileReader(final FileSystem fs, final String dirName, 
-        final Configuration conf, final Range r,
-        final WritableComparable midKey, final boolean filter,
-        final boolean blockCacheEnabled,
-        final HRegionInfo hri)
-    throws IOException {
-      super(fs, dirName, conf, filter, blockCacheEnabled, hri);
-      top = isTopFileRegion(r);
-      midkey = midKey;
-    }
-    
-    @SuppressWarnings("unchecked")
-    private void checkKey(final WritableComparable key)
-    throws IOException {
-      if (top) {
-        if (key.compareTo(midkey) < 0) {
-          throw new IOException("Illegal Access: Key is less than midKey of " +
-          "backing mapfile");
-        }
-      } else if (key.compareTo(midkey) >= 0) {
-        throw new IOException("Illegal Access: Key is greater than or equal " +
-        "to midKey of backing mapfile");
-      }
-    }
-
-    @Override
-    public synchronized void finalKey(WritableComparable key)
-    throws IOException {
-      if (top) {
-        super.finalKey(key); 
-      } else {
-        reset();
-        Writable value = new ImmutableBytesWritable();
-        WritableComparable k = super.getClosest(midkey, value, true);
-        ByteArrayOutputStream byteout = new ByteArrayOutputStream();
-        DataOutputStream out = new DataOutputStream(byteout);
-        k.write(out);
-        ByteArrayInputStream bytein =
-          new ByteArrayInputStream(byteout.toByteArray());
-        DataInputStream in = new DataInputStream(bytein);
-        key.readFields(in);
-      }
-    }
-
-    @Override
-    public synchronized Writable get(WritableComparable key, Writable val)
-        throws IOException {
-      checkKey(key);
-      return super.get(key, val);
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public synchronized WritableComparable getClosest(WritableComparable key,
-      Writable val)
-    throws IOException {
-      WritableComparable closest = null;
-      if (top) {
-        // If top, the lowest possible key is midkey.  Do not have to check
-        // what comes back from super getClosest.  Will return exact match or
-        // greater.
-        closest = (key.compareTo(this.midkey) < 0)?
-          this.midkey: super.getClosest(key, val);
-      } else {
-        // We're serving bottom of the file.
-        if (key.compareTo(this.midkey) < 0) {
-          // Check key is within range for bottom.
-          closest = super.getClosest(key, val);
-          // midkey was made against largest store file at time of split. Smaller
-          // store files could have anything in them.  Check return value is
-          // not beyond the midkey (getClosest returns exact match or next
-          // after).
-          if (closest != null && closest.compareTo(this.midkey) >= 0) {
-            // Don't let this value out.
-            closest = null;
-          }
-        }
-        // Else, key is > midkey so let out closest = null.
-      }
-      return closest;
-    }
-
-    @SuppressWarnings("unused")
-    @Override
-    public synchronized WritableComparable midKey() throws IOException {
-      // Returns null to indicate file is not splitable.
-      return null;
-    }
-
-    @SuppressWarnings("unchecked")
-    @Override
-    public synchronized boolean next(WritableComparable key, Writable val)
-    throws IOException {
-      if (firstNextCall) {
-        firstNextCall = false;
-        if (this.top) {
-          // Seek to midkey.  Midkey may not exist in this file.  That should be
-          // fine.  Then we'll either be positioned at end or start of file.
-          WritableComparable nearest = getClosest(midkey, val);
-          // Now copy the mid key into the passed key.
-          if (nearest != null) {
-            Writables.copyWritable(nearest, key);
-            return true;
-          }
-          return false;
-        }
-      }
-      boolean result = super.next(key, val);
-      if (!top && key.compareTo(midkey) >= 0) {
-        result = false;
-      }
-      return result;
-    }
-
-    @Override
-    public synchronized void reset() throws IOException {
-      if (top) {
-        firstNextCall = true;
-        seek(midkey);
-        return;
-      }
-      super.reset();
-    }
-
-    @Override
-    public synchronized boolean seek(WritableComparable key)
-    throws IOException {
-      checkKey(key);
-      return super.seek(key);
-    }
-  }
-}
+}
\ No newline at end of file

Modified: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseClusterTestCase.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseClusterTestCase.java?rev=711155&r1=711154&r2=711155&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseClusterTestCase.java (original)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HBaseClusterTestCase.java Mon Nov  3 17:28:53 2008
@@ -21,24 +21,22 @@
 
 import java.io.PrintWriter;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.client.HConnectionManager;
+import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.ReflectionUtils;
-import org.apache.hadoop.hbase.client.HConnectionManager;
-import org.apache.hadoop.hbase.client.HTable;
 
 /**
  * Abstract base class for HBase cluster junit tests.  Spins up an hbase
  * cluster in setup and tears it down again in tearDown.
  */
 public abstract class HBaseClusterTestCase extends HBaseTestCase {
-  private static final Log LOG =
-    LogFactory.getLog(HBaseClusterTestCase.class.getName());
-  
+  private static final Log LOG = LogFactory.getLog(HBaseClusterTestCase.class);
   protected MiniHBaseCluster cluster;
   protected MiniDFSCluster dfsCluster;
   protected int regionServers;
@@ -85,7 +83,6 @@
   /**
    * Actually start the MiniHBase instance.
    */
-  @SuppressWarnings("unused")
   protected void hBaseClusterSetup() throws Exception {
     // start the mini cluster
     this.cluster = new MiniHBaseCluster(conf, regionServers);
@@ -93,7 +90,7 @@
     // We need to sleep because we cannot open a HTable when the cluster
     // is not ready
     Thread.sleep(5000);
-    HTable meta = new HTable(conf, ".META.");
+    new HTable(conf, HConstants.META_TABLE_NAME);
   }
   
   /**
@@ -112,12 +109,12 @@
 
         // mangle the conf so that the fs parameter points to the minidfs we
         // just started up
-        FileSystem fs = dfsCluster.getFileSystem();
-        conf.set("fs.default.name", fs.getUri().toString());      
-        Path parentdir = fs.getHomeDirectory();
+        FileSystem filesystem = dfsCluster.getFileSystem();
+        conf.set("fs.default.name", filesystem.getUri().toString());      
+        Path parentdir = filesystem.getHomeDirectory();
         conf.set(HConstants.HBASE_DIR, parentdir.toString());
-        fs.mkdirs(parentdir);
-        FSUtils.setVersion(fs, parentdir);
+        filesystem.mkdirs(parentdir);
+        FSUtils.setVersion(filesystem, parentdir);
       }
 
       // do the super setup now. if we had done it first, then we would have



Mime
View raw message