hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r788160 [1/2] - in /hadoop/hbase/trunk: ./ bin/ conf/ src/docs/src/documentation/content/xdocs/ src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/regionserver/ src/java/org/apache/ha...
Date Wed, 24 Jun 2009 19:56:52 GMT
Author: stack
Date: Wed Jun 24 19:56:51 2009
New Revision: 788160

URL: http://svn.apache.org/viewvc?rev=788160&view=rev
Log:
HBASE-1578 Change the name of the in-memory updates from 'memcache' to 'memtable' or....

Added:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStore.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java
Removed:
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/DFSAbort.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestMemcache.java
Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/bin/HBase.rb
    hadoop/hbase/trunk/bin/hirb.rb
    hadoop/hbase/trunk/conf/hbase-default.xml
    hadoop/hbase/trunk/src/docs/src/documentation/content/xdocs/metrics.xml
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HServerLoad.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/KeyValue.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/DeleteCompare.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/QueryMatcher.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/Store.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/WildcardColumnTracker.java
    hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestClassMigration.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/client/TestClient.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/client/TestScannerTimes.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestCompaction.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestDeleteCompare.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestHRegion.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestLogRolling.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestQueryMatcher.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestScanner.java
    hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/regionserver/TestStore.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Wed Jun 24 19:56:51 2009
@@ -397,6 +397,8 @@
                row tables & performance
    HBASE-1577  Move memcache to ConcurrentSkipListMap from
                ConcurrentSkipListSet
+   HBASE-1578  Change the name of the in-memory updates from 'memcache' to
+               'memtable' or....
 
   OPTIMIZATIONS
    HBASE-1412  Change values for delete column and column family in KeyValue

Modified: hadoop/hbase/trunk/bin/HBase.rb
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/bin/HBase.rb?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/bin/HBase.rb (original)
+++ hadoop/hbase/trunk/bin/HBase.rb Wed Jun 24 19:56:51 2009
@@ -217,9 +217,9 @@
           htd.setMaxFileSize(HTableDescriptor::DEFAULT_MAX_FILESIZE);
         args[READONLY]? htd.setReadOnly(JBoolean.valueOf(args[READONLY])) : 
           htd.setReadOnly(HTableDescriptor::DEFAULT_READONLY);
-        args[MEMCACHE_FLUSHSIZE]? 
-          htd.setMemcacheFlushSize(JLong.valueOf(args[MEMCACHE_FLUSHSIZE])) :
-          htd.setMemcacheFlushSize(HTableDescriptor::DEFAULT_MEMCACHE_FLUSH_SIZE);
+        args[MEMSTORE_FLUSHSIZE]? 
+          htd.setMemStoreFlushSize(JLong.valueOf(args[MEMSTORE_FLUSHSIZE])) :
+          htd.setMemStoreFlushSize(HTableDescriptor::DEFAULT_MEMSTORE_FLUSH_SIZE);
         @admin.modifyTable(tableName.to_java_bytes, htd)
       else
         descriptor = hcd(args) 

Modified: hadoop/hbase/trunk/bin/hirb.rb
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/bin/hirb.rb?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/bin/hirb.rb (original)
+++ hadoop/hbase/trunk/bin/hirb.rb Wed Jun 24 19:56:51 2009
@@ -156,7 +156,7 @@
            hbase> alter 't1', {NAME => 'f1', METHOD => 'delete'}
 
            You can also change table-scope attributes like MAX_FILESIZE
-           MEMCACHE_FLUSHSIZE and READONLY.
+           MEMSTORE_FLUSHSIZE and READONLY.
 
            For example, to change the max size of a family to 128MB, do:
            hbase> alter 't1', {METHOD => 'table_att', MAX_FILESIZE => '134217728'}

Modified: hadoop/hbase/trunk/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/conf/hbase-default.xml?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/conf/hbase-default.xml (original)
+++ hadoop/hbase/trunk/conf/hbase-default.xml Wed Jun 24 19:56:51 2009
@@ -230,20 +230,20 @@
     </description>
   </property>
   <property>
-    <name>hbase.regionserver.globalMemcache.upperLimit</name>
+    <name>hbase.regionserver.global.memstore.upperLimit</name>
     <value>0.4</value>
-    <description>Maximum size of all memcaches in a region server before new 
-      updates are blocked and flushes are forced. Defaults to 40% of heap.
+    <description>Maximum size of all memstores in a region server before new 
+      updates are blocked and flushes are forced. Defaults to 40% of heap
     </description>
   </property>
   <property>
-    <name>hbase.regionserver.globalMemcache.lowerLimit</name>
+    <name>hbase.regionserver.global.memstore.lowerLimit</name>
     <value>0.25</value>
-    <description>When memcaches are being forced to flush to make room in
+    <description>When memstores are being forced to flush to make room in
       memory, keep flushing until we hit this mark. Defaults to 30% of heap. 
-      This value equal to hbase.regionserver.globalmemcache.upperLimit causes
+      This value equal to hbase.regionserver.global.memstore.upperLimit causes
       the minimum possible flushing to occur when updates are blocked due to 
-      memcache limiting.
+      memstore limiting.
     </description>
   </property>  
   <property>
@@ -268,22 +268,22 @@
     </description>
   </property>
   <property>
-    <name>hbase.hregion.memcache.flush.size</name>
+    <name>hbase.hregion.memstore.flush.size</name>
     <value>67108864</value>
     <description>
-    A HRegion memcache will be flushed to disk if size of the memcache
+    Memstore will be flushed to disk if size of the memstore
     exceeds this number of bytes.  Value is checked by a thread that runs
-    every hbase.server.thread.wakefrequency.  
+    every hbase.server.thread.wakefrequency.
     </description>
   </property>
   <property>
-    <name>hbase.hregion.memcache.block.multiplier</name>
+    <name>hbase.hregion.memstore.block.multiplier</name>
     <value>2</value>
     <description>
-    Block updates if memcache has hbase.hregion.block.memcache
+    Block updates if memstore has hbase.hregion.block.memstore
     time hbase.hregion.flush.size bytes.  Useful preventing
-    runaway memcache during spikes in update traffic.  Without an
-    upper-bound, memcache fills such that when it flushes the
+    runaway memstore during spikes in update traffic.  Without an
+    upper-bound, memstore fills such that when it flushes the
     resultant flush files take a long time to compact or split, or
     worse, we OOME.
     </description>
@@ -302,7 +302,7 @@
     <value>3</value>
     <description>
     If more than this number of HStoreFiles in any one HStore
-    (one HStoreFile is written per flush of memcache) then a compaction
+    (one HStoreFile is written per flush of memstore) then a compaction
     is run to rewrite all HStoreFiles files as one.  Larger numbers
     put off compaction but when it runs, it takes longer to complete.
     During a compaction, updates cannot be flushed to disk.  Long

Modified: hadoop/hbase/trunk/src/docs/src/documentation/content/xdocs/metrics.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/docs/src/documentation/content/xdocs/metrics.xml?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/docs/src/documentation/content/xdocs/metrics.xml (original)
+++ hadoop/hbase/trunk/src/docs/src/documentation/content/xdocs/metrics.xml Wed Jun 24 19:56:51 2009
@@ -49,7 +49,7 @@
       </p>
       <p>
       If you enable the <em>hbase</em> context, on regionservers you'll see total requests since last
-      metric emission, count of regions and storefiles as well as a count of memcache size.
+      metric emission, count of regions and storefiles as well as a count of memstore size.
       On the master, you'll see a count of the cluster's requests.
       </p>
       <p>

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HServerLoad.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HServerLoad.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HServerLoad.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HServerLoad.java Wed Jun 24 19:56:51 2009
@@ -59,8 +59,8 @@
     private int stores;
     /** the number of storefiles for the region */
     private int storefiles;
-    /** the current size of the memcache for the region, in MB */
-    private int memcacheSizeMB;
+    /** the current size of the memstore for the region, in MB */
+    private int memstoreSizeMB;
     /** the current total size of storefile indexes for the region, in MB */
     private int storefileIndexSizeMB;
 
@@ -75,16 +75,16 @@
      * @param name
      * @param stores
      * @param storefiles
-     * @param memcacheSizeMB
+     * @param memstoreSizeMB
      * @param storefileIndexSizeMB
      */
     public RegionLoad(final byte[] name, final int stores,
-        final int storefiles, final int memcacheSizeMB,
+        final int storefiles, final int memstoreSizeMB,
         final int storefileIndexSizeMB) {
       this.name = name;
       this.stores = stores;
       this.storefiles = storefiles;
-      this.memcacheSizeMB = memcacheSizeMB;
+      this.memstoreSizeMB = memstoreSizeMB;
       this.storefileIndexSizeMB = storefileIndexSizeMB;
     }
 
@@ -119,10 +119,10 @@
     }
 
     /**
-     * @return the memcache size, in MB
+     * @return the memstore size, in MB
      */
-    public int getMemcacheSizeMB() {
-      return memcacheSizeMB;
+    public int getMemStoreSizeMB() {
+      return memstoreSizeMB;
     }
 
     /**
@@ -156,10 +156,10 @@
     }
 
     /**
-     * @param memcacheSizeMB the memcache size, in MB
+     * @param memstoreSizeMB the memstore size, in MB
      */
-    public void setMemcacheSizeMB(int memcacheSizeMB) {
-      this.memcacheSizeMB = memcacheSizeMB;
+    public void setMemStoreSizeMB(int memstoreSizeMB) {
+      this.memstoreSizeMB = memstoreSizeMB;
     }
 
     /**
@@ -177,7 +177,7 @@
       in.readFully(this.name);
       this.stores = in.readInt();
       this.storefiles = in.readInt();
-      this.memcacheSizeMB = in.readInt();
+      this.memstoreSizeMB = in.readInt();
       this.storefileIndexSizeMB = in.readInt();
     }
 
@@ -186,7 +186,7 @@
       out.write(name);
       out.writeInt(stores);
       out.writeInt(storefiles);
-      out.writeInt(memcacheSizeMB);
+      out.writeInt(memstoreSizeMB);
       out.writeInt(storefileIndexSizeMB);
     }
 
@@ -199,8 +199,8 @@
         Integer.valueOf(this.stores));
       sb = Strings.appendKeyValue(sb, "storefiles",
         Integer.valueOf(this.storefiles));
-      sb = Strings.appendKeyValue(sb, "memcacheSize",
-        Integer.valueOf(this.memcacheSizeMB));
+      sb = Strings.appendKeyValue(sb, "memstoreSize",
+        Integer.valueOf(this.memstoreSizeMB));
       sb = Strings.appendKeyValue(sb, "storefileIndexSize",
         Integer.valueOf(this.storefileIndexSizeMB));
       return sb.toString();
@@ -350,12 +350,12 @@
   }
 
   /**
-   * @return Size of memcaches in MB
+   * @return Size of memstores in MB
    */
-  public int getMemcacheSizeInMB() {
+  public int getMemStoreSizeInMB() {
     int count = 0;
     for (RegionLoad info: regionLoad)
-    	count += info.getMemcacheSizeMB();
+    	count += info.getMemStoreSizeMB();
     return count;
   }
 
@@ -411,16 +411,16 @@
    * @param name
    * @param stores
    * @param storefiles
-   * @param memcacheSizeMB
+   * @param memstoreSizeMB
    * @param storefileIndexSizeMB
    * @deprecated Use {@link #addRegionInfo(RegionLoad)}
    */
   @Deprecated
   public void addRegionInfo(final byte[] name, final int stores,
-      final int storefiles, final int memcacheSizeMB,
+      final int storefiles, final int memstoreSizeMB,
       final int storefileIndexSizeMB) {
     this.regionLoad.add(new HServerLoad.RegionLoad(name, stores, storefiles,
-      memcacheSizeMB, storefileIndexSizeMB));
+      memstoreSizeMB, storefileIndexSizeMB));
   }
 
   // Writable

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HTableDescriptor.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HTableDescriptor.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/HTableDescriptor.java Wed Jun 24 19:56:51 2009
@@ -70,9 +70,9 @@
   public static final String READONLY = "READONLY";
   public static final ImmutableBytesWritable READONLY_KEY =
     new ImmutableBytesWritable(Bytes.toBytes(READONLY));
-  public static final String MEMCACHE_FLUSHSIZE = "MEMCACHE_FLUSHSIZE";
-  public static final ImmutableBytesWritable MEMCACHE_FLUSHSIZE_KEY =
-    new ImmutableBytesWritable(Bytes.toBytes(MEMCACHE_FLUSHSIZE));
+  public static final String MEMSTORE_FLUSHSIZE = "MEMSTORE_FLUSHSIZE";
+  public static final ImmutableBytesWritable MEMSTORE_FLUSHSIZE_KEY =
+    new ImmutableBytesWritable(Bytes.toBytes(MEMSTORE_FLUSHSIZE));
   public static final String IS_ROOT = "IS_ROOT";
   public static final ImmutableBytesWritable IS_ROOT_KEY =
     new ImmutableBytesWritable(Bytes.toBytes(IS_ROOT));
@@ -94,7 +94,7 @@
 
   public static final boolean DEFAULT_READONLY = false;
 
-  public static final int DEFAULT_MEMCACHE_FLUSH_SIZE = 1024*1024*64;
+  public static final int DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*64;
   
   public static final int DEFAULT_MAX_FILESIZE = 1024*1024*256;
     
@@ -122,7 +122,7 @@
     for(HColumnDescriptor descriptor : families) {
       this.families.put(descriptor.getName(), descriptor);
     }
-    setMemcacheFlushSize(16 * 1024);
+    setMemStoreFlushSize(16 * 1024);
   }
 
   /**
@@ -440,19 +440,19 @@
   /**
    * @return memory cache flush size for each hregion
    */
-  public int getMemcacheFlushSize() {
-    byte [] value = getValue(MEMCACHE_FLUSHSIZE_KEY);
+  public int getMemStoreFlushSize() {
+    byte [] value = getValue(MEMSTORE_FLUSHSIZE_KEY);
     if (value != null)
       return Integer.valueOf(Bytes.toString(value)).intValue();
-    return DEFAULT_MEMCACHE_FLUSH_SIZE;
+    return DEFAULT_MEMSTORE_FLUSH_SIZE;
   }
   
   /**
-   * @param memcacheFlushSize memory cache flush size for each hregion
+   * @param memstoreFlushSize memory cache flush size for each hregion
    */
-  public void setMemcacheFlushSize(int memcacheFlushSize) {
-    setValue(MEMCACHE_FLUSHSIZE_KEY,
-      Bytes.toBytes(Integer.toString(memcacheFlushSize)));
+  public void setMemStoreFlushSize(int memstoreFlushSize) {
+    setValue(MEMSTORE_FLUSHSIZE_KEY,
+      Bytes.toBytes(Integer.toString(memstoreFlushSize)));
   }
     
 //  public Collection<IndexSpecification> getIndexes() {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/KeyValue.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/KeyValue.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/KeyValue.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/KeyValue.java Wed Jun 24 19:56:51 2009
@@ -214,7 +214,7 @@
   /**
    * Lowest possible key.
    * Makes a Key with highest possible Timestamp, empty row and column.  No
-   * key can be equal or lower than this one in memcache or in store file.
+   * key can be equal or lower than this one in memstore or in store file.
    */
   public static final KeyValue LOWESTKEY = 
     new KeyValue(HConstants.EMPTY_BYTE_ARRAY, HConstants.LATEST_TIMESTAMP);
@@ -1771,7 +1771,7 @@
     int compareTimestamps(final long ltimestamp, final long rtimestamp) {
       // The below older timestamps sorting ahead of newer timestamps looks
       // wrong but it is intentional. This way, newer timestamps are first
-      // found when we iterate over a memcache and newer versions are the
+      // found when we iterate over a memstore and newer versions are the
       // first we trip over when reading from a store file.
       if (ltimestamp < rtimestamp) {
         return 1;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/client/UnmodifyableHTableDescriptor.java Wed Jun 24 19:56:51 2009
@@ -119,10 +119,10 @@
   }
 
   /**
-   * @see org.apache.hadoop.hbase.HTableDescriptor#setMemcacheFlushSize(int)
+   * @see org.apache.hadoop.hbase.HTableDescriptor#setMemStoreFlushSize(int)
    */
   @Override
-  public void setMemcacheFlushSize(int memcacheFlushSize) {
+  public void setMemStoreFlushSize(int memstoreFlushSize) {
     throw new UnsupportedOperationException("HTableDescriptor is read-only");
   }
 

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ColumnTracker.java Wed Jun 24 19:56:51 2009
@@ -36,7 +36,7 @@
  * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
  * conditions of the query.  This method returns a {@link MatchCode} to define
  * what action should be taken.
- * <li>{@link #update} is called at the end of every StoreFile or Memcache.
+ * <li>{@link #update} is called at the end of every StoreFile or memstore.
  * <p>
  * This class is NOT thread-safe as queries are never multi-threaded 
  */

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/DeleteCompare.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/DeleteCompare.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/DeleteCompare.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/DeleteCompare.java Wed Jun 24 19:56:51 2009
@@ -5,7 +5,7 @@
 
 
 /**
- * Class that provides static method needed when putting deletes into memcache 
+ * Class that provides static method needed when putting deletes into memstore 
  */
 public class DeleteCompare {
   
@@ -14,7 +14,7 @@
    */
   enum DeleteCode {
     /**
-     * Do nothing.  Move to next KV in Memcache
+     * Do nothing.  Move to next KV in memstore
      */
     SKIP,
     
@@ -24,13 +24,13 @@
     DELETE,
     
     /**
-     * Stop looking at KVs in Memcache.  Finalize.
+     * Stop looking at KVs in memstore.  Finalize.
      */
     DONE
   }
 
   /**
-   * Method used when putting deletes into memcache to remove all the previous
+   * Method used when putting deletes into memstore to remove all the previous
    * entries that are affected by this Delete
    * @param mem
    * @param deleteBuffer

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java Wed Jun 24 19:56:51 2009
@@ -40,7 +40,7 @@
  * <ul><li>{@link #checkColumn} is called when a Put satisfies all other
  * conditions of the query.  This method returns a {@link MatchCode} to define
  * what action should be taken.
- * <li>{@link #update} is called at the end of every StoreFile or Memcache.
+ * <li>{@link #update} is called at the end of every StoreFile or memstore.
  * <p>
  * This class is NOT thread-safe as queries are never multi-threaded 
  */
@@ -129,7 +129,7 @@
   }
   
   /**
-   * Called at the end of every StoreFile or Memcache.
+   * Called at the end of every StoreFile or memstore.
    */
   public void update() {
     if(this.columns.size() != 0) {

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/FlushRequester.java Wed Jun 24 19:56:51 2009
@@ -24,7 +24,7 @@
  * Implementors of this interface want to be notified when an HRegion
  * determines that a cache flush is needed. A FlushRequester (or null)
  * must be passed to the HRegion constructor so it knows who to call when it
- * has a filled memcache.
+ * has a filled memstore.
  */
 public interface FlushRequester {
   /**

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HLog.java Wed Jun 24 19:56:51 2009
@@ -709,7 +709,7 @@
    * Abort a cache flush.
    * Call if the flush fails. Note that the only recovery for an aborted flush
    * currently is a restart of the regionserver so the snapshot content dropped
-   * by the failure gets restored to the memcache.
+   * by the failure gets restored to the memstore.
    */
   void abortCacheFlush() {
     this.cacheFlushLock.unlock();

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Wed Jun 24 19:56:51 2009
@@ -125,7 +125,7 @@
   // private int [] storeSize = null;
   // private byte [] name = null;
   
-  final AtomicLong memcacheSize = new AtomicLong(0);
+  final AtomicLong memstoreSize = new AtomicLong(0);
 
   // This is the table subdirectory.
   final Path basedir;
@@ -148,7 +148,7 @@
    * compactions and closes.
    */
   static class WriteState {
-    // Set while a memcache flush is happening.
+    // Set while a memstore flush is happening.
     volatile boolean flushing = false;
     // Set when a flush has been requested.
     volatile boolean flushRequested = false;
@@ -178,10 +178,10 @@
 
   private volatile WriteState writestate = new WriteState();
 
-  final int memcacheFlushSize;
+  final int memstoreFlushSize;
   private volatile long lastFlushTime;
   final FlushRequester flushListener;
-  private final int blockingMemcacheSize;
+  private final int blockingMemStoreSize;
   final long threadWakeFrequency;
   // Used to guard splits and closes
   private final ReentrantReadWriteLock splitsAndClosesLock =
@@ -212,12 +212,12 @@
    */
   public HRegion(){
     this.basedir = null;
-    this.blockingMemcacheSize = 0;
+    this.blockingMemStoreSize = 0;
     this.conf = null;
     this.flushListener = null;
     this.fs = null;
     this.historian = null;
-    this.memcacheFlushSize = 0;
+    this.memstoreFlushSize = 0;
     this.log = null;
     this.regionCompactionDir = null;
     this.regiondir = null;
@@ -265,14 +265,14 @@
     }
     this.regionCompactionDir =
       new Path(getCompactionDir(basedir), encodedNameStr);
-    int flushSize = regionInfo.getTableDesc().getMemcacheFlushSize();
-    if (flushSize == HTableDescriptor.DEFAULT_MEMCACHE_FLUSH_SIZE) {
-      flushSize = conf.getInt("hbase.hregion.memcache.flush.size",
-                      HTableDescriptor.DEFAULT_MEMCACHE_FLUSH_SIZE);
-    }
-    this.memcacheFlushSize = flushSize;
-    this.blockingMemcacheSize = this.memcacheFlushSize *
-      conf.getInt("hbase.hregion.memcache.block.multiplier", 1);
+    int flushSize = regionInfo.getTableDesc().getMemStoreFlushSize();
+    if (flushSize == HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE) {
+      flushSize = conf.getInt("hbase.hregion.memstore.flush.size",
+                      HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
+    }
+    this.memstoreFlushSize = flushSize;
+    this.blockingMemStoreSize = this.memstoreFlushSize *
+      conf.getInt("hbase.hregion.memstore.block.multiplier", 1);
   }
 
   /**
@@ -811,7 +811,7 @@
         this.writestate.flushing = true;
       } else {
         if(LOG.isDebugEnabled()) {
-          LOG.debug("NOT flushing memcache for region " + this +
+          LOG.debug("NOT flushing memstore for region " + this +
             ", flushing=" +
               writestate.flushing + ", writesEnabled=" +
               writestate.writesEnabled);
@@ -838,23 +838,23 @@
 
   /**
    * Flushing the cache is a little tricky. We have a lot of updates in the
-   * HMemcache, all of which have also been written to the log. We need to
-   * write those updates in the HMemcache out to disk, while being able to
+   * memstore, all of which have also been written to the log. We need to
+   * write those updates in the memstore out to disk, while being able to
    * process reads/writes as much as possible during the flush operation. Also,
-   * the log has to state clearly the point in time at which the HMemcache was
+   * the log has to state clearly the point in time at which the memstore was
    * flushed. (That way, during recovery, we know when we can rely on the
-   * on-disk flushed structures and when we have to recover the HMemcache from
+   * on-disk flushed structures and when we have to recover the memstore from
    * the log.)
    * 
    * <p>So, we have a three-step process:
    * 
-   * <ul><li>A. Flush the memcache to the on-disk stores, noting the current
+   * <ul><li>A. Flush the memstore to the on-disk stores, noting the current
    * sequence ID for the log.<li>
    * 
    * <li>B. Write a FLUSHCACHE-COMPLETE message to the log, using the sequence
-   * ID that was current at the time of memcache-flush.</li>
+   * ID that was current at the time of memstore-flush.</li>
    * 
-   * <li>C. Get rid of the memcache structures that are now redundant, as
+   * <li>C. Get rid of the memstore structures that are now redundant, as
    * they've been flushed to the on-disk HStores.</li>
    * </ul>
    * <p>This method is protected, but can be accessed via several public
@@ -874,27 +874,27 @@
     // Record latest flush time
     this.lastFlushTime = startTime;
     // If nothing to flush, return and avoid logging start/stop flush.
-    if (this.memcacheSize.get() <= 0) {
+    if (this.memstoreSize.get() <= 0) {
       return false;
     }
     if (LOG.isDebugEnabled()) {
-      LOG.debug("Started memcache flush for region " + this +
-        ". Current region memcache size " +
-          StringUtils.humanReadableInt(this.memcacheSize.get()));
+      LOG.debug("Started memstore flush for region " + this +
+        ". Current region memstore size " +
+          StringUtils.humanReadableInt(this.memstoreSize.get()));
     }
 
-    // Stop updates while we snapshot the memcache of all stores. We only have
+    // Stop updates while we snapshot the memstore of all stores. We only have
     // to do this for a moment.  Its quick.  The subsequent sequence id that
     // goes into the HLog after we've flushed all these snapshots also goes
     // into the info file that sits beside the flushed files.
-    // We also set the memcache size to zero here before we allow updates
+    // We also set the memstore size to zero here before we allow updates
     // again so its value will represent the size of the updates received
     // during the flush
     long sequenceId = -1L;
     long completeSequenceId = -1L;
     this.updatesLock.writeLock().lock();
-    // Get current size of memcaches.
-    final long currentMemcacheSize = this.memcacheSize.get();
+    // Get current size of memstores.
+    final long currentMemStoreSize = this.memstoreSize.get();
     try {
       for (Store s: stores.values()) {
         s.snapshot();
@@ -906,12 +906,12 @@
     }
 
     // Any failure from here on out will be catastrophic requiring server
-    // restart so hlog content can be replayed and put back into the memcache.
+    // restart so hlog content can be replayed and put back into the memstore.
     // Otherwise, the snapshot content while backed up in the hlog, it will not
     // be part of the current running servers state.
     boolean compactionRequested = false;
     try {
-      // A.  Flush memcache to all the HStores.
+      // A.  Flush memstore to all the HStores.
       // Keep running vector of all store files that includes both old and the
       // just-made new flush store file.
       for (Store hstore: stores.values()) {
@@ -920,11 +920,11 @@
           compactionRequested = true;
         }
       }
-      // Set down the memcache size by amount of flush.
-      this.memcacheSize.addAndGet(-currentMemcacheSize);
+      // Set down the memstore size by amount of flush.
+      this.memstoreSize.addAndGet(-currentMemStoreSize);
     } catch (Throwable t) {
       // An exception here means that the snapshot was not persisted.
-      // The hlog needs to be replayed so its content is restored to memcache.
+      // The hlog needs to be replayed so its content is restored to memstore.
       // Currently, only a server restart will do this.
       // We used to only catch IOEs but its possible that we'd get other
       // exceptions -- e.g. HBASE-659 was about an NPE -- so now we catch
@@ -946,7 +946,7 @@
     this.log.completeCacheFlush(getRegionName(),
         regionInfo.getTableDesc().getName(), completeSequenceId);
 
-    // C. Finally notify anyone waiting on memcache to clear:
+    // C. Finally notify anyone waiting on memstore to clear:
     // e.g. checkResources().
     synchronized (this) {
       notifyAll();
@@ -955,8 +955,8 @@
     if (LOG.isDebugEnabled()) {
       long now = System.currentTimeMillis();
       String timeTaken = StringUtils.formatTimeDiff(now, startTime);
-      LOG.debug("Finished memcache flush of ~" +
-        StringUtils.humanReadableInt(currentMemcacheSize) + " for region " +
+      LOG.debug("Finished memstore flush of ~" +
+        StringUtils.humanReadableInt(currentMemStoreSize) + " for region " +
         this + " in " + (now - startTime) + "ms, sequence id=" + sequenceId +
         ", compaction requested=" + compactionRequested);
       if (!regionInfo.isMetaRegion()) {
@@ -1161,7 +1161,7 @@
           kv.updateLatestStamp(byteNow);
         }
 
-        size = this.memcacheSize.addAndGet(store.delete(kv));
+        size = this.memstoreSize.addAndGet(store.delete(kv));
       }
       flush = isFlushSize(size);
     } finally {
@@ -1369,15 +1369,15 @@
    */
   private void checkResources() {
     boolean blocked = false;
-    while (this.memcacheSize.get() > this.blockingMemcacheSize) {
+    while (this.memstoreSize.get() > this.blockingMemStoreSize) {
       requestFlush();
       if (!blocked) {
         LOG.info("Blocking updates for '" + Thread.currentThread().getName() +
           "' on region " + Bytes.toString(getRegionName()) +
-          ": Memcache size " +
-          StringUtils.humanReadableInt(this.memcacheSize.get()) +
+          ": memstore size " +
+          StringUtils.humanReadableInt(this.memstoreSize.get()) +
           " is >= than blocking " +
-          StringUtils.humanReadableInt(this.blockingMemcacheSize) + " size");
+          StringUtils.humanReadableInt(this.blockingMemStoreSize) + " size");
       }
       blocked = true;
       synchronized(this) {
@@ -1404,7 +1404,7 @@
   }
 
   /** 
-   * Add updates first to the hlog and then add values to memcache.
+   * Add updates first to the hlog and then add values to memstore.
    * Warning: Assumption is caller has lock on passed in row.
    * @param edits Cell updates by column
    * @praram now
@@ -1416,7 +1416,7 @@
   }
 
   /** 
-   * Add updates first to the hlog (if writeToWal) and then add values to memcache.
+   * Add updates first to the hlog (if writeToWal) and then add values to memstore.
    * Warning: Assumption is caller has lock on passed in row.
    * @param family
    * @param edits
@@ -1440,7 +1440,7 @@
       long size = 0;
       Store store = getStore(family);
       for (KeyValue kv: edits) {
-        size = this.memcacheSize.addAndGet(store.add(kv));
+        size = this.memstoreSize.addAndGet(store.add(kv));
       }
       flush = isFlushSize(size);
     } finally {
@@ -1474,7 +1474,7 @@
    * @return True if size is over the flush threshold
    */
   private boolean isFlushSize(final long size) {
-    return size > this.memcacheFlushSize;
+    return size > this.memstoreFlushSize;
   }
 
   protected Store instantiateHStore(Path baseDir, 
@@ -2271,7 +2271,7 @@
           store.incrementColumnValue(row, family, qualifier, amount);
 
       result = vas.value;
-      long size = this.memcacheSize.addAndGet(vas.sizeAdded);
+      long size = this.memstoreSize.addAndGet(vas.sizeAdded);
       flush = isFlushSize(size);
     } finally {
       releaseRowLock(lid);

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Wed Jun 24 19:56:51 2009
@@ -196,7 +196,7 @@
   CompactSplitThread compactSplitThread;
 
   // Cache flushing  
-  MemcacheFlusher cacheFlusher;
+  MemStoreFlusher cacheFlusher;
   
   /* Check for major compactions.
    */
@@ -322,7 +322,7 @@
     this.workerThread = new Thread(worker);
 
     // Cache flushing thread.
-    this.cacheFlusher = new MemcacheFlusher(conf, this);
+    this.cacheFlusher = new MemStoreFlusher(conf, this);
     
     // Compaction thread
     this.compactSplitThread = new CompactSplitThread(this);
@@ -756,7 +756,7 @@
     byte[] name = r.getRegionName();
     int stores = 0;
     int storefiles = 0;
-    int memcacheSizeMB = (int)(r.memcacheSize.get()/1024/1024);
+    int memstoreSizeMB = (int)(r.memstoreSize.get()/1024/1024);
     int storefileIndexSizeMB = 0;
     synchronized (r.stores) {
       stores += r.stores.size();
@@ -766,7 +766,7 @@
           (int)(store.getStorefilesIndexSize()/1024/1024);
       }
     }
-    return new HServerLoad.RegionLoad(name, stores, storefiles, memcacheSizeMB,
+    return new HServerLoad.RegionLoad(name, stores, storefiles, memstoreSizeMB,
       storefileIndexSizeMB);
   }
  
@@ -1061,12 +1061,12 @@
     // the synchronizations?
     int stores = 0;
     int storefiles = 0;
-    long memcacheSize = 0;
+    long memstoreSize = 0;
     long storefileIndexSize = 0;
     synchronized (this.onlineRegions) {
       for (Map.Entry<Integer, HRegion> e: this.onlineRegions.entrySet()) {
         HRegion r = e.getValue();
-        memcacheSize += r.memcacheSize.get();
+        memstoreSize += r.memstoreSize.get();
         synchronized (r.stores) {
           stores += r.stores.size();
           for(Map.Entry<byte [], Store> ee: r.stores.entrySet()) {
@@ -1079,7 +1079,7 @@
     }
     this.metrics.stores.set(stores);
     this.metrics.storefiles.set(storefiles);
-    this.metrics.memcacheSizeMB.set((int)(memcacheSize/(1024*1024)));
+    this.metrics.memstoreSizeMB.set((int)(memstoreSize/(1024*1024)));
     this.metrics.storefileIndexSizeMB.set((int)(storefileIndexSize/(1024*1024)));
 
     LruBlockCache lruBlockCache = (LruBlockCache)StoreFile.getBlockCache(conf);
@@ -1741,7 +1741,7 @@
     this.requestCount.incrementAndGet();
     HRegion region = getRegion(regionName);
     try {
-      cacheFlusher.reclaimMemcacheMemory();
+      cacheFlusher.reclaimMemStoreMemory();
       region.put(put, getLockFromId(put.getLockId()));
     } catch (Throwable t) {
       throw convertThrowableToIOE(cleanup(t));
@@ -1754,7 +1754,7 @@
     checkOpen();
     try {
       HRegion region = getRegion(regionName);
-      this.cacheFlusher.reclaimMemcacheMemory();
+      this.cacheFlusher.reclaimMemStoreMemory();
       Integer[] locks = new Integer[puts.length];
       for (i = 0; i < puts.length; i++) {
         this.requestCount.incrementAndGet();
@@ -1794,7 +1794,7 @@
     this.requestCount.incrementAndGet();
     HRegion region = getRegion(regionName);
     try {
-      cacheFlusher.reclaimMemcacheMemory();
+      cacheFlusher.reclaimMemStoreMemory();
       return region.checkAndPut(row, family, qualifier, value, put,
           getLockFromId(put.getLockId()), true);
     } catch (Throwable t) {
@@ -1932,7 +1932,7 @@
     checkOpen();
     try {
       boolean writeToWAL = true;
-      this.cacheFlusher.reclaimMemcacheMemory();
+      this.cacheFlusher.reclaimMemStoreMemory();
       this.requestCount.incrementAndGet();
       Integer lock = getLockFromId(delete.getLockId());
       HRegion region = getRegion(regionName);
@@ -2164,7 +2164,7 @@
     // Copy over all regions. Regions are sorted by size with biggest first.
     synchronized (this.onlineRegions) {
       for (HRegion region : this.onlineRegions.values()) {
-        sortedRegions.put(Long.valueOf(region.memcacheSize.get()), region);
+        sortedRegions.put(Long.valueOf(region.memstoreSize.get()), region);
       }
     }
     return sortedRegions;
@@ -2288,14 +2288,14 @@
   }
 
   /**
-   * Return the total size of all memcaches in every region.
-   * @return memcache size in bytes
+   * Return the total size of all memstores in every region.
+   * @return memstore size in bytes
    */
-  public long getGlobalMemcacheSize() {
+  public long getGlobalMemStoreSize() {
     long total = 0;
     synchronized (onlineRegions) {
       for (HRegion region : onlineRegions.values()) {
-        total += region.memcacheSize.get();
+        total += region.memstoreSize.get();
       }
     }
     return total;

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java Wed Jun 24 19:56:51 2009
@@ -34,7 +34,7 @@
  * Implements KeyValueScanner itself.
  * <p>
  * This class is used at the Region level to merge across Stores
- * and at the Store level to merge across the Memcache and StoreFiles.
+ * and at the Store level to merge across the memstore and StoreFiles.
  * <p>
  * In the Region case, we also need InternalScanner.next(List), so this class
  * also implements InternalScanner.  WARNING: As is, if you try to use this

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStore.java?rev=788160&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStore.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStore.java Wed Jun 24 19:56:51 2009
@@ -0,0 +1,754 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
+import java.rmi.UnexpectedException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableMap;
+import java.util.NavigableSet;
+import java.util.SortedMap;
+import java.util.TreeSet;
+import java.util.concurrent.ConcurrentSkipListMap;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.regionserver.DeleteCompare.DeleteCode;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * The MemStore holds in-memory modifications to the Store.  Modifications
+ * are {@link KeyValue}s.  When asked to flush, current memstore is moved
+ * to snapshot and is cleared.  We continue to serve edits out of new memstore
+ * and backing snapshot until flusher reports in that the flush succeeded. At
+ * this point we let the snapshot go.
+ * TODO: Adjust size of the memstore when we remove items because they have
+ * been deleted.
+ */
+class MemStore {
+  private static final Log LOG = LogFactory.getLog(MemStore.class);
+
+  private final long ttl;
+
+  // MemStore.  Use a SkipListMap rather than SkipListSet because of the
+  // better semantics.  The Map will overwrite if passed a key it already had
+  // whereas the Set will not add new KV if key is same though value might be
+  // different.  Value is not important -- just make sure always same
+  // reference passed.
+  volatile ConcurrentSkipListMap<KeyValue, Object> memstore;
+
+  // Snapshot of memstore.  Made for flusher.
+  volatile ConcurrentSkipListMap<KeyValue, Object> snapshot;
+
+  private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+
+  final KeyValue.KVComparator comparator;
+
+  // Used comparing versions -- same r/c and ts but different type.
+  final KeyValue.KVComparator comparatorIgnoreType;
+
+  // Used comparing versions -- same r/c and type but different timestamp.
+  final KeyValue.KVComparator comparatorIgnoreTimestamp;
+
+  // TODO: Fix this guess by studying jprofiler
+  private final static int ESTIMATED_KV_HEAP_TAX = 60;
+
+  /* Value we add memstore 'value'.  Memstore backing is a Map
+   * but we are only interested in its keys.
+   */
+  private static final Object NULL = new Object();
+
+  /**
+   * Default constructor. Used for tests.
+   */
+  public MemStore() {
+    this(HConstants.FOREVER, KeyValue.COMPARATOR);
+  }
+
+  /**
+   * Constructor.
+   * @param ttl The TTL for cache entries, in milliseconds.
+   * @param c
+   */
+  public MemStore(final long ttl, final KeyValue.KVComparator c) {
+    this.ttl = ttl;
+    this.comparator = c;
+    this.comparatorIgnoreTimestamp =
+      this.comparator.getComparatorIgnoringTimestamps();
+    this.comparatorIgnoreType = this.comparator.getComparatorIgnoringType();
+    this.memstore = createMap(c);
+    this.snapshot = createMap(c);
+  }
+
+  static ConcurrentSkipListMap<KeyValue, Object> createMap(final KeyValue.KVComparator c) {
+    return new ConcurrentSkipListMap<KeyValue, Object>(c);
+  }
+
+  void dump() {
+    for (Map.Entry<KeyValue, ?> entry: this.memstore.entrySet()) {
+      LOG.info(entry.getKey());
+    }
+    for (Map.Entry<KeyValue, ?> entry: this.snapshot.entrySet()) {
+      LOG.info(entry.getKey());
+    }
+  }
+
+  /**
+   * Creates a snapshot of the current memstore.
+   * Snapshot must be cleared by call to {@link #clearSnapshot(java.util.Map)}
+   * To get the snapshot made by this method, use {@link #getSnapshot()}
+   */
+  void snapshot() {
+    this.lock.writeLock().lock();
+    try {
+      // If snapshot currently has entries, then flusher failed or didn't call
+      // cleanup.  Log a warning.
+      if (!this.snapshot.isEmpty()) {
+        LOG.warn("Snapshot called again without clearing previous. " +
+          "Doing nothing. Another ongoing flush or did we fail last attempt?");
+      } else {
+        // We used to synchronize on the memstore here but we're inside a
+        // write lock so removed it. Comment is left in case removal was a
+        // mistake. St.Ack
+        if (!this.memstore.isEmpty()) {
+          this.snapshot = this.memstore;
+          this.memstore = createMap(this.comparator);
+        }
+      }
+    } finally {
+      this.lock.writeLock().unlock();
+    }
+  }
+
+  /**
+   * Return the current snapshot.
+   * Called by flusher to get current snapshot made by a previous
+   * call to {@link #snapshot()}
+   * @return Return snapshot.
+   * @see {@link #snapshot()}
+   * @see {@link #clearSnapshot(java.util.Map)}
+   */
+  ConcurrentSkipListMap<KeyValue, ?> getSnapshot() {
+    return this.snapshot;
+  }
+
+  /**
+   * The passed snapshot was successfully persisted; it can be let go.
+   * @param ss The snapshot to clean out.
+   * @throws UnexpectedException
+   * @see {@link #snapshot()}
+   */
+  void clearSnapshot(final Map<KeyValue, ?> ss)
+  throws UnexpectedException {
+    this.lock.writeLock().lock();
+    try {
+      if (this.snapshot != ss) {
+        throw new UnexpectedException("Current snapshot is " +
+          this.snapshot + ", was passed " + ss);
+      }
+      // OK. Passed in snapshot is same as current snapshot.  If not-empty,
+      // create a new snapshot and let the old one go.
+      if (!ss.isEmpty()) {
+        this.snapshot = createMap(this.comparator);
+      }
+    } finally {
+      this.lock.writeLock().unlock();
+    }
+  }
+
+  /**
+   * Write an update
+   * @param kv
+   * @return approximate size of the passed key and value.
+   */
+  long add(final KeyValue kv) {
+    long size = -1;
+    this.lock.readLock().lock();
+    try {
+      // Add anything as value as long as same instance each time.
+      size = heapSize(kv,
+        this.memstore.put(kv, NULL) == null);
+    } finally {
+      this.lock.readLock().unlock();
+    }
+    return size;
+  }
+  
+  /** 
+   * Write a delete
+   * @param delete
+   * @return approximate size of the passed key and value.
+   */
+  long delete(final KeyValue delete) {
+    long size = 0;
+    this.lock.readLock().lock();
+    //Have to find out what we want to do here, to find the fastest way of
+    //removing things that are under a delete.
+    //Actions that will take place here are:
+    //1. Insert a delete and remove all the affected entries already in memstore
+    //2. In the case of a Delete and the matching put is found then don't insert
+    //   the delete
+    //TODO Would be nice with if we had an iterator for this, so we could remove
+    //things that needs to be removed while iterating and don't have to go
+    //back and do it afterwards
+    
+    try {
+      boolean notpresent = false;
+      List<KeyValue> deletes = new ArrayList<KeyValue>();
+      SortedMap<KeyValue, Object> tail = this.memstore.tailMap(delete);
+
+      //Parse the delete, so that it is only done once
+      byte [] deleteBuffer = delete.getBuffer();
+      int deleteOffset = delete.getOffset();
+  
+      int deleteKeyLen = Bytes.toInt(deleteBuffer, deleteOffset);
+      deleteOffset += Bytes.SIZEOF_INT + Bytes.SIZEOF_INT;
+  
+      short deleteRowLen = Bytes.toShort(deleteBuffer, deleteOffset);
+      deleteOffset += Bytes.SIZEOF_SHORT;
+      int deleteRowOffset = deleteOffset;
+  
+      deleteOffset += deleteRowLen;
+  
+      byte deleteFamLen = deleteBuffer[deleteOffset];
+      deleteOffset += Bytes.SIZEOF_BYTE + deleteFamLen;
+  
+      int deleteQualifierOffset = deleteOffset;
+      int deleteQualifierLen = deleteKeyLen - deleteRowLen - deleteFamLen -
+        Bytes.SIZEOF_SHORT - Bytes.SIZEOF_BYTE - Bytes.SIZEOF_LONG - 
+        Bytes.SIZEOF_BYTE;
+      
+      deleteOffset += deleteQualifierLen;
+  
+      int deleteTimestampOffset = deleteOffset;
+      deleteOffset += Bytes.SIZEOF_LONG;
+      byte deleteType = deleteBuffer[deleteOffset];
+      
+      //Comparing with tail from memstore
+      for (Map.Entry<KeyValue, ?> entry : tail.entrySet()) {
+        DeleteCode res = DeleteCompare.deleteCompare(entry.getKey(),
+            deleteBuffer, 
+            deleteRowOffset, deleteRowLen, deleteQualifierOffset, 
+            deleteQualifierLen, deleteTimestampOffset, deleteType,
+            comparator.getRawComparator());
+        if (res == DeleteCode.DONE) {
+          break;
+        } else if (res == DeleteCode.DELETE) {
+          deletes.add(entry.getKey());
+        } // SKIP
+      }
+
+      //Delete all the entries effected by the last added delete
+      for(KeyValue del : deletes) {
+        notpresent = this.memstore.remove(del) == null;
+        size -= heapSize(del, notpresent);
+      }
+      
+      // Adding the delete to memstore. Add any value, as long as
+      // same instance each time.
+      size += heapSize(delete, this.memstore.put(delete, NULL) == null);
+    } finally {
+      this.lock.readLock().unlock();
+    }
+    return size;
+  }
+  
+  /*
+   * Calculate how the memstore size has changed, approximately.  Be careful.
+   * If class changes, be sure to change the size calculation.
+   * Add in tax of Map.Entry.
+   * @param kv
+   * @param notpresent True if the kv was NOT present in the set.
+   * @return Size
+   */
+  long heapSize(final KeyValue kv, final boolean notpresent) {
+    return notpresent?
+      // Add overhead for value byte array and for Map.Entry -- 57 bytes
+      // on x64 according to jprofiler.
+      ESTIMATED_KV_HEAP_TAX + 57 + kv.getLength(): 0; // Guess no change in size.
+  }
+
+  /**
+   * @param kv Find the row that comes after this one.  If null, we return the
+   * first.
+   * @return Next row or null if none found.
+   */
+  KeyValue getNextRow(final KeyValue kv) {
+    this.lock.readLock().lock();
+    try {
+      return getLowest(getNextRow(kv, this.memstore),
+        getNextRow(kv, this.snapshot));
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  /*
+   * @param a
+   * @param b
+   * @return Return lowest of a or b or null if both a and b are null
+   */
+  private KeyValue getLowest(final KeyValue a, final KeyValue b) {
+    if (a == null) {
+      return b;
+    }
+    if (b == null) {
+      return a;
+    }
+    return comparator.compareRows(a, b) <= 0? a: b;
+  }
+
+  /*
+   * @param kv Find row that follows this one.  If null, return first.
+   * @param map Set to look in for a row beyond <code>row</code>.
+   * @return Next row or null if none found.  If one found, will be a new
+   * KeyValue -- can be destroyed by subsequent calls to this method.
+   */
+  private KeyValue getNextRow(final KeyValue kv,
+      final NavigableMap<KeyValue, ?> map) {
+    KeyValue result = null;
+    SortedMap<KeyValue, ?> tail = kv == null? map: map.tailMap(kv);
+    // Iterate until we fall into the next row; i.e. move off current row
+    for (Map.Entry<KeyValue, ?> i : tail.entrySet()) {
+      if (comparator.compareRows(i.getKey(), kv) <= 0)
+        continue;
+      // Note: Not suppressing deletes or expired cells.  Needs to be handled
+      // by higher up functions.
+      result = i.getKey();
+      break;
+    }
+    return result;
+  }
+
+
+  /**
+   * @param row Row to look for.
+   * @param candidateKeys Map of candidate keys (Accumulation over lots of
+   * lookup over stores and memstores)
+   */
+  void getRowKeyAtOrBefore(final KeyValue row,
+      final NavigableSet<KeyValue> candidateKeys) {
+    getRowKeyAtOrBefore(row, candidateKeys,
+      new TreeSet<KeyValue>(this.comparator), System.currentTimeMillis());
+  }
+
+  /**
+   * @param kv Row to look for.
+   * @param candidates Map of candidate keys (Accumulation over lots of
+   * lookup over stores and memstores).  Pass a Set with a Comparator that
+   * ignores key Type so we can do Set.remove using a delete, i.e. a KeyValue
+   * with a different Type to the candidate key.
+   * @param deletes Pass a Set that has a Comparator that ignores key type.
+   * @param now
+   */
+  void getRowKeyAtOrBefore(final KeyValue kv,
+      final NavigableSet<KeyValue> candidates, 
+      final NavigableSet<KeyValue> deletes, final long now) {
+    this.lock.readLock().lock();
+    try {
+      getRowKeyAtOrBefore(memstore, kv, candidates, deletes, now);
+      getRowKeyAtOrBefore(snapshot, kv, candidates, deletes, now);
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  private void getRowKeyAtOrBefore(final ConcurrentSkipListMap<KeyValue, Object> map,
+      final KeyValue kv, final NavigableSet<KeyValue> candidates,
+      final NavigableSet<KeyValue> deletes, final long now) {
+    if (map.isEmpty()) {
+      return;
+    }
+    // We want the earliest possible to start searching from.  Start before
+    // the candidate key in case it turns out a delete came in later.
+    KeyValue search = candidates.isEmpty()? kv: candidates.first();
+
+    // Get all the entries that come equal or after our search key
+    SortedMap<KeyValue, Object> tail = map.tailMap(search);
+
+    // if there are items in the tail map, there's either a direct match to
+    // the search key, or a range of values between the first candidate key
+    // and the ultimate search key (or the end of the cache)
+    if (!tail.isEmpty() &&
+        this.comparator.compareRows(tail.firstKey(), search) <= 0) {
+      // Keep looking at cells as long as they are no greater than the 
+      // ultimate search key and there's still records left in the map.
+      KeyValue deleted = null;
+      KeyValue found = null;
+      for (Iterator<Map.Entry<KeyValue, Object>> iterator =
+          tail.entrySet().iterator();
+        iterator.hasNext() && (found == null ||
+          this.comparator.compareRows(found, kv) <= 0);) {
+        found = iterator.next().getKey();
+        if (this.comparator.compareRows(found, kv) <= 0) {
+          if (found.isDeleteType()) {
+            Store.handleDeletes(found, candidates, deletes);
+            if (deleted == null) {
+              deleted = found;
+            }
+          } else {
+            if (Store.notExpiredAndNotInDeletes(this.ttl, found, now, deletes)) {
+              candidates.add(found);
+            } else {
+              if (deleted == null) {
+                deleted = found;
+              }
+              // TODO: Check this removes the right key.
+              // Its expired.  Remove it.
+              iterator.remove();
+            }
+          }
+        }
+      }
+      if (candidates.isEmpty() && deleted != null) {
+        getRowKeyBefore(map, deleted, candidates, deletes, now);
+      }
+    } else {
+      // The tail didn't contain any keys that matched our criteria, or was 
+      // empty. Examine all the keys that proceed our splitting point.
+      getRowKeyBefore(map, search, candidates, deletes, now);
+    }
+  }
+
+  /*
+   * Get row key that comes before passed <code>search_key</code>
+   * Use when we know search_key is not in the map and we need to search
+   * earlier in the cache.
+   * @param map
+   * @param search
+   * @param candidates
+   * @param deletes Pass a Set that has a Comparator that ignores key type.
+   * @param now
+   */
+  private void getRowKeyBefore(ConcurrentSkipListMap<KeyValue, Object> map,
+      KeyValue search, NavigableSet<KeyValue> candidates,
+      final NavigableSet<KeyValue> deletes, final long now) {
+    NavigableMap<KeyValue, Object> headMap = map.headMap(search);
+    // If we tried to create a headMap and got an empty map, then there are
+    // no keys at or before the search key, so we're done.
+    if (headMap.isEmpty()) {
+      return;
+    }
+
+    // If there aren't any candidate keys at this point, we need to search
+    // backwards until we find at least one candidate or run out of headMap.
+    if (candidates.isEmpty()) {
+      KeyValue lastFound = null;
+      // TODO: Confirm we're iterating in the right order
+      for (Iterator<KeyValue> i = headMap.descendingKeySet().iterator();
+          i.hasNext();) {
+        KeyValue found = i.next();
+        // if the last row we found a candidate key for is different than
+        // the row of the current candidate, we can stop looking -- if its
+        // not a delete record.
+        boolean deleted = found.isDeleteType();
+        if (lastFound != null &&
+            this.comparator.matchingRows(lastFound, found) && !deleted) {
+          break;
+        }
+        // If this isn't a delete, record it as a candidate key. Also 
+        // take note of this candidate so that we'll know when
+        // we cross the row boundary into the previous row.
+        if (!deleted) {
+          if (Store.notExpiredAndNotInDeletes(this.ttl, found, now, deletes)) {
+            lastFound = found;
+            candidates.add(found);
+          } else {
+            // Its expired.
+            Store.expiredOrDeleted(map, found);
+          }
+        } else {
+          // We are encountering items in reverse.  We may have just added
+          // an item to candidates that this later item deletes.  Check.  If we
+          // found something in candidates, remove it from the set.
+          if (Store.handleDeletes(found, candidates, deletes)) {
+            remove(map, found);
+          }
+        }
+      }
+    } else {
+      // If there are already some candidate keys, we only need to consider
+      // the very last row's worth of keys in the headMap, because any 
+      // smaller acceptable candidate keys would have caused us to start
+      // our search earlier in the list, and we wouldn't be searching here.
+      SortedMap<KeyValue, Object> rowTailMap = 
+        headMap.tailMap(headMap.lastKey().cloneRow(HConstants.LATEST_TIMESTAMP));
+      Iterator<Map.Entry<KeyValue, Object>> i = rowTailMap.entrySet().iterator();
+      do {
+        KeyValue found = i.next().getKey();
+        if (found.isDeleteType()) {
+          Store.handleDeletes(found, candidates, deletes);
+        } else {
+          if (ttl == HConstants.FOREVER ||
+              now < found.getTimestamp() + ttl ||
+              !deletes.contains(found)) {
+            candidates.add(found);
+          } else {
+            Store.expiredOrDeleted(map, found);
+          }
+        }
+      } while (i.hasNext());
+    }
+  }
+
+
+  /*
+   * @param map
+   * @param kv This is a delete record.  Remove anything behind this of same
+   * r/c/ts.
+   * @return True if we removed anything.
+   */
+  private boolean remove(final NavigableMap<KeyValue, Object> map,
+      final KeyValue kv) {
+    SortedMap<KeyValue, Object> m = map.tailMap(kv);
+    if (m.isEmpty()) {
+      return false;
+    }
+    boolean removed = false;
+    for (Map.Entry<KeyValue, Object> entry: m.entrySet()) {
+      if (this.comparatorIgnoreType.compare(entry.getKey(), kv) == 0) {
+        // Same r/c/ts.  Remove it.
+        m.remove(entry.getKey());
+        removed = true;
+        continue;
+      }
+      break;
+    }
+    return removed;
+  }
+
+  /**
+   * @return scanner on memstore and snapshot in this order.
+   */
+  KeyValueScanner [] getScanners() {
+    this.lock.readLock().lock();
+    try {
+      KeyValueScanner [] scanners = new KeyValueScanner[2];
+      scanners[0] = new MemStoreScanner(this.memstore);
+      scanners[1] = new MemStoreScanner(this.snapshot);
+      return scanners;
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+
+  //
+  // HBASE-880/1249/1304
+  //
+
+  /**
+   * Perform a single-row Get on the  and snapshot, placing results
+   * into the specified KV list.
+   * <p>
+   * This will return true if it is determined that the query is complete
+   * and it is not necessary to check any storefiles after this.
+   * <p>
+   * Otherwise, it will return false and you should continue on.
+   * @param matcher Column matcher
+   * @param result List to add results to
+   * @return true if done with store (early-out), false if not
+   * @throws IOException
+   */
+  public boolean get(QueryMatcher matcher, List<KeyValue> result)
+  throws IOException {
+    this.lock.readLock().lock();
+    try {
+      if(internalGet(this.memstore, matcher, result) || matcher.isDone()) {
+        return true;
+      }
+      matcher.update();
+      return internalGet(this.snapshot, matcher, result) || matcher.isDone();
+    } finally {
+      this.lock.readLock().unlock();
+    }
+  }
+  
+  /**
+   *
+   * @param map memstore or snapshot
+   * @param matcher query matcher
+   * @param result list to add results to
+   * @return true if done with store (early-out), false if not
+   * @throws IOException
+   */
+  private boolean internalGet(SortedMap<KeyValue, Object> map, QueryMatcher matcher,
+      List<KeyValue> result)
+  throws IOException {
+    if(map.isEmpty()) return false;
+    // Seek to startKey
+    SortedMap<KeyValue, Object> tail = map.tailMap(matcher.getStartKey());
+    for (Map.Entry<KeyValue, Object> entry : tail.entrySet()) {
+      QueryMatcher.MatchCode res = matcher.match(entry.getKey());
+      switch(res) {
+        case INCLUDE:
+          result.add(entry.getKey());
+          break;
+        case SKIP:
+          break;
+        case NEXT:
+          return false;
+        case DONE:
+          return true;
+        default:
+          throw new RuntimeException("Unexpected " + res);
+      }
+    }
+    return false;
+  }
+  
+
+  /*
+   * MemStoreScanner implements the KeyValueScanner.
+   * It lets the caller scan the contents of a memstore.
+   * This behaves as if it were a real scanner but does not maintain position
+   * in the passed memstore tree.
+   */
+  protected class MemStoreScanner implements KeyValueScanner {
+    private final NavigableMap<KeyValue, Object> mc;
+    private KeyValue current = null;
+    private List<KeyValue> result = new ArrayList<KeyValue>();
+    private int idx = 0;
+
+    MemStoreScanner(final NavigableMap<KeyValue, Object> mc) {
+      this.mc = mc;
+    }
+
+    public boolean seek(KeyValue key) {
+      try {
+        if (key == null) {
+          close();
+          return false;
+        }
+        this.current = key;
+        return cacheNextRow();
+      } catch(Exception e) {
+        close();
+        return false;
+      }
+    }
+
+    public KeyValue peek() {
+      if (idx >= this.result.size()) {
+        if (!cacheNextRow()) {
+          return null;
+        }
+        return peek();
+      }
+      return result.get(idx);
+    }
+
+    public KeyValue next() {
+      if (idx >= result.size()) {
+        if (!cacheNextRow()) {
+          return null;
+        }
+        return next();
+      }
+      return this.result.get(idx++);
+    }
+
+    /**
+     * @return True if we successfully cached a NavigableSet aligned on
+     * next row.
+     */
+    boolean cacheNextRow() {
+      SortedMap<KeyValue, Object> keys;
+      try {
+        keys = this.mc.tailMap(this.current);
+      } catch (Exception e) {
+        close();
+        return false;
+      }
+      if (keys == null || keys.isEmpty()) {
+        close();
+        return false;
+      }
+      this.current = null;
+      byte [] row = keys.firstKey().getRow();
+      for (Map.Entry<KeyValue, Object> key: keys.entrySet()) {
+        KeyValue kv = key.getKey();
+        if (comparator.compareRows(kv, row) != 0) {
+          this.current = kv;
+          break;
+        }
+        result.add(kv);
+      }
+      return true;
+    }
+
+    public void close() {
+      current = null;
+      idx = 0;
+      if (!result.isEmpty()) {
+        result.clear();
+      }
+    }
+  }
+
+  /**
+   * Code to help figure if our approximation of object heap sizes is close
+   * enough.  See hbase-900.  Fills memstores then waits so user can heap
+   * dump and bring up resultant hprof in something like jprofiler which
+   * allows you get 'deep size' on objects.
+   * @param args
+   */
+  public static void main(String [] args) {
+    RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
+    LOG.info("vmName=" + runtime.getVmName() + ", vmVendor=" +
+      runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
+    LOG.info("vmInputArguments=" + runtime.getInputArguments());
+    MemStore memstore1 = new MemStore();
+    // TODO: x32 vs x64
+    long size = 0;
+    final int count = 10000;
+    byte [] column = Bytes.toBytes("col:umn");
+    for (int i = 0; i < count; i++) {
+      // Give each its own ts
+      size += memstore1.add(new KeyValue(Bytes.toBytes(i), column, i));
+    }
+    LOG.info("memstore1 estimated size=" + size);
+    for (int i = 0; i < count; i++) {
+      size += memstore1.add(new KeyValue(Bytes.toBytes(i), column, i));
+    }
+    LOG.info("memstore1 estimated size (2nd loading of same data)=" + size);
+    // Make a variably sized memstore.
+    MemStore memstore2 = new MemStore();
+    for (int i = 0; i < count; i++) {
+      size += memstore2.add(new KeyValue(Bytes.toBytes(i), column, i,
+        new byte[i]));
+    }
+    LOG.info("memstore2 estimated size=" + size);
+    final int seconds = 30;
+    LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
+    for (int i = 0; i < seconds; i++) {
+      // Thread.sleep(1000);
+    }
+    LOG.info("Exiting.");
+  }
+}

Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java?rev=788160&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MemStoreFlusher.java Wed Jun 24 19:56:51 2009
@@ -0,0 +1,343 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.util.ArrayList;
+import java.util.ConcurrentModificationException;
+import java.util.HashSet;
+import java.util.SortedMap;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.DroppedSnapshotException;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.RemoteExceptionHandler;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Thread that flushes cache on request
+ *
+ * NOTE: This class extends Thread rather than Chore because the sleep time
+ * can be interrupted when there is something to do, rather than the Chore
+ * sleep time which is invariant.
+ * 
+ * @see FlushRequester
+ */
+class MemStoreFlusher extends Thread implements FlushRequester {
+  static final Log LOG = LogFactory.getLog(MemStoreFlusher.class);
+  private final BlockingQueue<HRegion> flushQueue =
+    new LinkedBlockingQueue<HRegion>();
+  
+  private final HashSet<HRegion> regionsInQueue = new HashSet<HRegion>();
+
+  private final long threadWakeFrequency;
+  private final HRegionServer server;
+  private final ReentrantLock lock = new ReentrantLock();
+
+  protected final long globalMemStoreLimit;
+  protected final long globalMemStoreLimitLowMark;
+  
+  private static final float DEFAULT_UPPER = 0.4f;
+  private static final float DEFAULT_LOWER = 0.25f;
+  private static final String UPPER_KEY =
+    "hbase.regionserver.global.memstore.upperLimit";
+  private static final String LOWER_KEY =
+    "hbase.regionserver.global.memstore.lowerLimit";
+  private long blockingStoreFilesNumber;
+  private long blockingWaitTime;
+
+  /**
+   * @param conf
+   * @param server
+   */
+  public MemStoreFlusher(final HBaseConfiguration conf,
+      final HRegionServer server) {
+    super();
+    this.server = server;
+    this.threadWakeFrequency =
+      conf.getLong(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000);
+    long max = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
+    this.globalMemStoreLimit = globalMemStoreLimit(max, DEFAULT_UPPER,
+      UPPER_KEY, conf);
+    long lower = globalMemStoreLimit(max, DEFAULT_LOWER, LOWER_KEY, conf);
+    if (lower > this.globalMemStoreLimit) {
+      lower = this.globalMemStoreLimit;
+      LOG.info("Setting globalMemStoreLimitLowMark == globalMemStoreLimit " +
+        "because supplied " + LOWER_KEY + " was > " + UPPER_KEY);
+    }
+    this.globalMemStoreLimitLowMark = lower;
+    this.blockingStoreFilesNumber = 
+      conf.getInt("hbase.hstore.blockingStoreFiles", -1);
+    if (this.blockingStoreFilesNumber == -1) {
+      this.blockingStoreFilesNumber = 1 +
+        conf.getInt("hbase.hstore.compactionThreshold", 3);
+    }
+    this.blockingWaitTime = conf.getInt("hbase.hstore.blockingWaitTime",
+      90000); // default of 180 seconds
+    LOG.info("globalMemStoreLimit=" +
+      StringUtils.humanReadableInt(this.globalMemStoreLimit) +
+      ", globalMemStoreLimitLowMark=" +
+      StringUtils.humanReadableInt(this.globalMemStoreLimitLowMark) +
+      ", maxHeap=" + StringUtils.humanReadableInt(max));
+  }
+
+  /**
+   * Calculate size using passed <code>key</code> for configured
+   * percentage of <code>max</code>.
+   * @param max
+   * @param defaultLimit
+   * @param key
+   * @param c
+   * @return Limit.
+   */
+  static long globalMemStoreLimit(final long max,
+     final float defaultLimit, final String key, final HBaseConfiguration c) {
+    float limit = c.getFloat(key, defaultLimit);
+    return getMemStoreLimit(max, limit, defaultLimit);
+  }
+  
+  static long getMemStoreLimit(final long max, final float limit,
+      final float defaultLimit) {
+    if (limit >= 0.9f || limit < 0.1f) {
+      LOG.warn("Setting global memstore limit to default of " + defaultLimit +
+        " because supplied value outside allowed range of 0.1 -> 0.9");
+    }
+    return (long)(max * limit);
+  }
+  
+  @Override
+  public void run() {
+    while (!this.server.isStopRequested() && this.server.isInSafeMode()) {
+      try {
+        Thread.sleep(threadWakeFrequency);
+      } catch (InterruptedException ex) {
+        continue;
+      }
+    }
+    while (!server.isStopRequested()) {
+      HRegion r = null;
+      try {
+        r = flushQueue.poll(threadWakeFrequency, TimeUnit.MILLISECONDS);
+        if (r == null) {
+          continue;
+        }
+        if (!flushRegion(r, false)) {
+          break;
+        }
+      } catch (InterruptedException ex) {
+        continue;
+      } catch (ConcurrentModificationException ex) {
+        continue;
+      } catch (Exception ex) {
+        LOG.error("Cache flush failed" +
+          (r != null ? (" for region " + Bytes.toString(r.getRegionName())) : ""),
+          ex);
+        if (!server.checkFileSystem()) {
+          break;
+        }
+      }
+    }
+    regionsInQueue.clear();
+    flushQueue.clear();
+    LOG.info(getName() + " exiting");
+  }
+  
+  public void request(HRegion r) {
+    synchronized (regionsInQueue) {
+      if (!regionsInQueue.contains(r)) {
+        regionsInQueue.add(r);
+        flushQueue.add(r);
+      }
+    }
+  }
+  
+  /**
+   * Only interrupt once it's done with a run through the work loop.
+   */ 
+  void interruptIfNecessary() {
+    lock.lock();
+    try {
+      this.interrupt();
+    } finally {
+      lock.unlock();
+    }
+  }
+  
+  /*
+   * Flush a region.
+   * 
+   * @param region the region to be flushed
+   * @param removeFromQueue True if the region needs to be removed from the
+   * flush queue. False if called from the main flusher run loop and true if
+   * called from flushSomeRegions to relieve memory pressure from the region
+   * server.  If <code>true</code>, we are in a state of emergency; we are not
+   * taking on updates regionserver-wide, not until memory is flushed.  In this
+   * case, do not let a compaction run inline with blocked updates. Compactions
+   * can take a long time. Stopping compactions, there is a danger that number
+   * of flushes will overwhelm compaction on a busy server; we'll have to see.
+   * That compactions do not run when called out of flushSomeRegions means that
+   * compactions can be reported by the historian without danger of deadlock
+   * (HBASE-670).
+   * 
+   * <p>In the main run loop, regions have already been removed from the flush
+   * queue, and if this method is called for the relief of memory pressure,
+   * this may not be necessarily true. We want to avoid trying to remove 
+   * region from the queue because if it has already been removed, it requires a
+   * sequential scan of the queue to determine that it is not in the queue.
+   * 
+   * <p>If called from flushSomeRegions, the region may be in the queue but
+   * it may have been determined that the region had a significant amount of 
+   * memory in use and needed to be flushed to relieve memory pressure. In this
+   * case, its flush may preempt the pending request in the queue, and if so,
+   * it needs to be removed from the queue to avoid flushing the region
+   * multiple times.
+   * 
+   * @return true if the region was successfully flushed, false otherwise. If 
+   * false, there will be accompanying log messages explaining why the log was
+   * not flushed.
+   */
+  private boolean flushRegion(HRegion region, boolean removeFromQueue) {
+    // Wait until it is safe to flush
+    int count = 0;
+    boolean triggered = false;
+    while (count++ < (blockingWaitTime / 500)) {
+      for (Store hstore: region.stores.values()) {
+        if (hstore.getStorefilesCount() > this.blockingStoreFilesNumber) {
+          // always request a compaction
+          server.compactSplitThread.compactionRequested(region, getName());
+          // only log once
+          if (!triggered) {
+            LOG.info("Too many store files for region " + region + ": " +
+              hstore.getStorefilesCount() + ", waiting");
+            triggered = true;
+          }
+          try {
+            Thread.sleep(500);
+          } catch (InterruptedException e) {
+            // ignore
+          }
+          continue;
+        }
+      }
+      if (triggered) {
+        LOG.info("Compaction completed on region " + region +
+          ", proceeding");
+      }
+      break;
+    }
+    synchronized (regionsInQueue) {
+      // See comment above for removeFromQueue on why we do not
+      // take the region out of the set. If removeFromQueue is true, remove it
+      // from the queue too if it is there. This didn't used to be a
+      // constraint, but now that HBASE-512 is in play, we need to try and
+      // limit double-flushing of regions.
+      if (regionsInQueue.remove(region) && removeFromQueue) {
+        flushQueue.remove(region);
+      }
+      lock.lock();
+    }
+    try {
+      // See comment above for removeFromQueue on why we do not
+      // compact if removeFromQueue is true. Note that region.flushCache()
+      // only returns true if a flush is done and if a compaction is needed.
+      if (region.flushcache() && !removeFromQueue) {
+        server.compactSplitThread.compactionRequested(region, getName());
+      }
+    } catch (DroppedSnapshotException ex) {
+      // Cache flush can fail in a few places. If it fails in a critical
+      // section, we get a DroppedSnapshotException and a replay of hlog
+      // is required. Currently the only way to do this is a restart of
+      // the server. Abort because hdfs is probably bad (HBASE-644 is a case
+      // where hdfs was bad but passed the hdfs check).
+      LOG.fatal("Replay of hlog required. Forcing server shutdown", ex);
+      server.abort();
+      return false;
+    } catch (IOException ex) {
+      LOG.error("Cache flush failed"
+          + (region != null ? (" for region " + Bytes.toString(region.getRegionName())) : ""),
+          RemoteExceptionHandler.checkIOException(ex));
+      if (!server.checkFileSystem()) {
+        return false;
+      }
+    } finally {
+      lock.unlock();
+    }
+
+    return true;
+  }
+  
+  /**
+   * Check if the regionserver's memstore memory usage is greater than the 
+   * limit. If so, flush regions with the biggest memstores until we're down
+   * to the lower limit. This method blocks callers until we're down to a safe
+   * amount of memstore consumption.
+   */
+  public synchronized void reclaimMemStoreMemory() {
+    if (server.getGlobalMemStoreSize() >= globalMemStoreLimit) {
+      flushSomeRegions();
+    }
+  }
+
+  /*
+   * Emergency!  Need to flush memory.
+   */
+  private synchronized void flushSomeRegions() {
+    // keep flushing until we hit the low water mark
+    long globalMemStoreSize = -1;
+    ArrayList<HRegion> regionsToCompact = new ArrayList<HRegion>();
+    for (SortedMap<Long, HRegion> m =
+        this.server.getCopyOfOnlineRegionsSortedBySize();
+      (globalMemStoreSize = server.getGlobalMemStoreSize()) >=
+        this.globalMemStoreLimitLowMark;) {
+      // flush the region with the biggest memstore
+      if (m.size() <= 0) {
+        LOG.info("No online regions to flush though we've been asked flush " +
+          "some; globalMemStoreSize=" +
+          StringUtils.humanReadableInt(globalMemStoreSize) +
+          ", globalMemStoreLimitLowMark=" +
+          StringUtils.humanReadableInt(this.globalMemStoreLimitLowMark));
+        break;
+      }
+      HRegion biggestMemStoreRegion = m.remove(m.firstKey());
+      LOG.info("Forced flushing of " +  biggestMemStoreRegion.toString() +
+        " because global memstore limit of " +
+        StringUtils.humanReadableInt(this.globalMemStoreLimit) +
+        " exceeded; currently " +
+        StringUtils.humanReadableInt(globalMemStoreSize) + " and flushing till " +
+        StringUtils.humanReadableInt(this.globalMemStoreLimitLowMark));
+      if (!flushRegion(biggestMemStoreRegion, true)) {
+        LOG.warn("Flush failed");
+        break;
+      }
+      regionsToCompact.add(biggestMemStoreRegion);
+    }
+    for (HRegion region : regionsToCompact) {
+      server.compactSplitThread.compactionRequested(region, getName());
+    }
+  }
+}

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/MinorCompactingStoreScanner.java Wed Jun 24 19:56:51 2009
@@ -20,19 +20,17 @@
 
 package org.apache.hadoop.hbase.regionserver;
 
-import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.KeyValue;
+import java.io.IOException;
+import java.util.List;
+
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.io.hfile.HFile;
-import org.apache.hadoop.hbase.util.Bytes;
-
-import java.util.List;
-import java.io.IOException;
 
 /**
  * A scanner that does a minor compaction at the same time.  Doesn't need to
- * implement ChangedReadersObserver, since it doesn't scan memcache, only store files
- * and optionally the memcache-snapshot.
+ * implement ChangedReadersObserver, since it doesn't scan memstore, only store files
+ * and optionally the memstore-snapshot.
  */
 public class MinorCompactingStoreScanner implements KeyValueScanner, InternalScanner {
 

Modified: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/QueryMatcher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/QueryMatcher.java?rev=788160&r1=788159&r2=788160&view=diff
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/QueryMatcher.java (original)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/QueryMatcher.java Wed Jun 24 19:56:51 2009
@@ -49,10 +49,10 @@
 public class QueryMatcher {
   /**
    * {@link #match} return codes.  These instruct the scanner moving through
-   * Memcaches and StoreFiles what to do with the current KeyValue.
+   * memstores and StoreFiles what to do with the current KeyValue.
    * <p>
    * Additionally, this contains "early-out" language to tell the scanner to
-   * move on to the next File (Memcache or Storefile), or to return immediately.
+   * move on to the next File (memstore or Storefile), or to return immediately.
    */
   static enum MatchCode {
     /**
@@ -66,7 +66,7 @@
     SKIP,
     
     /**
-     * Do not include, jump to next StoreFile or Memcache (in time order)
+     * Do not include, jump to next StoreFile or memstore (in time order)
      */
     NEXT,
     
@@ -100,7 +100,7 @@
   /** Keeps track of columns and versions */
   protected ColumnTracker columns;
   
-  /** Key to seek to in Memcache and StoreFiles */
+  /** Key to seek to in memstore and StoreFiles */
   protected KeyValue startKey;
   
   /** Row comparator for the region this query is for */
@@ -309,7 +309,7 @@
   }
   
   /**
-   * Called after reading each section (memcache, snapshot, storefiles).
+   * Called after reading each section (memstore, snapshot, storefiles).
    * <p>
    * This method will update the internal structures to be accurate for
    * the next section. 



Mime
View raw message