hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r690422 - in /hadoop/hbase/branches/0.2: CHANGES.txt bin/HBase.rb conf/hbase-default.xml src/java/org/apache/hadoop/hbase/regionserver/HRegion.java src/java/org/apache/hadoop/hbase/regionserver/HStore.java
Date Fri, 29 Aug 2008 23:35:40 GMT
Author: stack
Date: Fri Aug 29 16:35:39 2008
New Revision: 690422

URL: http://svn.apache.org/viewvc?rev=690422&view=rev
Log:
HBASE-834 'Major' compactions and upper bound on files we compact at any one time

Modified:
    hadoop/hbase/branches/0.2/CHANGES.txt
    hadoop/hbase/branches/0.2/bin/HBase.rb
    hadoop/hbase/branches/0.2/conf/hbase-default.xml
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java

Modified: hadoop/hbase/branches/0.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/CHANGES.txt?rev=690422&r1=690421&r2=690422&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.2/CHANGES.txt Fri Aug 29 16:35:39 2008
@@ -43,6 +43,8 @@
                its migration first' is useless (Jean-Daniel Cryans via Jim
                Kellerman)
    HBASE-826   delete table followed by recreation results in honked table
+   HBASE-834   'Major' compactions and upper bound on files we compact at any
+               one time (Billy Pearson via Stack)
 
   IMPROVEMENTS
    HBASE-801  When a table haven't disable, shell could response in a "user

Modified: hadoop/hbase/branches/0.2/bin/HBase.rb
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/bin/HBase.rb?rev=690422&r1=690421&r2=690422&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/bin/HBase.rb (original)
+++ hadoop/hbase/branches/0.2/bin/HBase.rb Fri Aug 29 16:35:39 2008
@@ -196,7 +196,7 @@
     end
 
     def getAllColumns
-       htd = @table.getMetadata()
+       htd = @table.getTableDescriptor()
        result = []
        for f in htd.getFamilies()
          n = f.getNameAsString()

Modified: hadoop/hbase/branches/0.2/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/conf/hbase-default.xml?rev=690422&r1=690421&r2=690422&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/conf/hbase-default.xml (original)
+++ hadoop/hbase/branches/0.2/conf/hbase-default.xml Fri Aug 29 16:35:39 2008
@@ -251,6 +251,20 @@
     </description>
   </property>
   <property>
+    <name>hbase.hstore.compaction.max</name>
+    <value>10</value>
+    <description>Max number of HStoreFiles to compact per 'minor' compaction.
+    </description>
+  </property>
+  <property>
+    <name>hbase.hregion.majorcompaction</name>
+    <value>86400000</value>
+    <description>The time (in miliseconds) between 'major' compactions of all
+    HStoreFiles in a region.  Default: 1 day.
+    </description>
+  </property>
+  <property>
+  <property>
     <name>hbase.regionserver.nbreservationblocks</name>
     <value>4</value>
     <description>The number of reservation blocks which are used to prevent

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=690422&r1=690421&r2=690422&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Fri
Aug 29 16:35:39 2008
@@ -881,12 +881,11 @@
    * conflicts with a region split, and that cannot happen because the region
    * server does them sequentially and not in parallel.
    * 
-   * @param force True to force a compaction regardless of thresholds (Needed
-   * by merge).
+   * @param majorCompaction True to force a major compaction regardless of thresholds
    * @return mid key if split is needed
    * @throws IOException
    */
-  private byte [] compactStores(final boolean force) throws IOException {
+  private byte [] compactStores(final boolean majorCompaction) throws IOException {
     splitsAndClosesLock.readLock().lock();
     try {
       byte [] midKey = null;
@@ -909,7 +908,7 @@
         doRegionCompactionPrep();
         long maxSize = -1;
         for (HStore store: stores.values()) {
-          final HStore.StoreSize size = store.compact(force);
+          final HStore.StoreSize size = store.compact(majorCompaction);
           if (size != null && size.getSize() > maxSize) {
             maxSize = size.getSize();
             midKey = size.getKey();
@@ -1509,6 +1508,7 @@
         List<HStoreKey> keys = store.getKeys(new HStoreKey(row, ts),
           ALL_VERSIONS, now);
         TreeMap<HStoreKey, byte []> edits = new TreeMap<HStoreKey, byte []>();
+        LOG.info("GETKEYS REMOVE " + keys);
         for (HStoreKey key: keys) {
           edits.put(key, HLogEdit.deleteBytes.get());
         }

Modified: hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=690422&r1=690421&r2=690422&view=diff
==============================================================================
--- hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/branches/0.2/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Fri
Aug 29 16:35:39 2008
@@ -86,7 +86,8 @@
   final FileSystem fs;
   private final HBaseConfiguration conf;
   protected long ttl;
-
+  private long majorCompactionTime;
+  private int maxFilesToCompact;
   private final long desiredMaxFileSize;
   private volatile long storeSize;
 
@@ -187,6 +188,8 @@
     }
     this.desiredMaxFileSize = maxFileSize;
 
+    this.majorCompactionTime = conf.getLong("hbase.hregion.majorcompaction", 86400000);
+    this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10);
     this.storeSize = 0L;
 
     if (family.getCompression() == HColumnDescriptor.CompressionType.BLOCK) {
@@ -708,7 +711,29 @@
     }
     return false;
   }
-  
+
+  /*
+   * Gets lowest timestamp from files in a dir
+   * 
+   * @param fs
+   * @param dir
+   * @throws IOException
+   */
+  private static long getLowestTimestamp(FileSystem fs, Path dir) throws IOException {
+   FileStatus[] stats = fs.listStatus(dir);
+   if (stats == null || stats.length == 0) {
+     return 0l;
+   }
+   long lowTimestamp = Long.MAX_VALUE;   
+   for (int i = 0; i < stats.length; i++) {
+     long timestamp = stats[i].getModificationTime();
+     if (timestamp < lowTimestamp){
+    	 lowTimestamp = timestamp;
+     }
+   }
+  return lowTimestamp;
+  }
+
   /**
    * Compact the back-HStores.  This method may take some time, so the calling 
    * thread must be able to block for long periods.
@@ -725,12 +750,12 @@
    * We don't want to hold the structureLock for the whole time, as a compact() 
    * can be lengthy and we want to allow cache-flushes during this period.
    * 
-   * @param force True to force a compaction regardless of thresholds (Needed
-   * by merge).
+   * @param majorCompaction True to force a major compaction regardless of
+   * thresholds
    * @return mid key if a split is needed, null otherwise
    * @throws IOException
    */
-  StoreSize compact(final boolean force) throws IOException {
+  StoreSize compact(boolean majorCompaction) throws IOException {
     synchronized (compactLock) {
       long maxId = -1;
       int nrows = -1;
@@ -741,12 +766,34 @@
         }
         // filesToCompact are sorted oldest to newest.
         filesToCompact = new ArrayList<HStoreFile>(this.storefiles.values());
-
+        
         // The max-sequenceID in any of the to-be-compacted TreeMaps is the 
         // last key of storefiles.
         maxId = this.storefiles.lastKey().longValue();
       }
-      if (!force && !hasReferences(filesToCompact) &&
+      // Check to see if we need to do a major compaction on this region.
+      // If so, change majorCompaction to true to skip the incremental compacting below.
+      // Only check if majorCompaction is not true.
+      long lastMajorCompaction = 0L;
+      if (!majorCompaction) {
+        Path mapdir = HStoreFile.getMapDir(basedir, info.getEncodedName(), family.getName());
+        long lowTimestamp = getLowestTimestamp(fs, mapdir);
+        if (LOG.isDebugEnabled() && lowTimestamp > 0l) {
+          LOG.debug("Time since last major compaction on store " + this.storeNameStr +
+            ": " + ((System.currentTimeMillis() - lowTimestamp)/1000) + " seconds");
+        }
+        lastMajorCompaction = System.currentTimeMillis() - lowTimestamp;
+        if (lowTimestamp < (System.currentTimeMillis() - majorCompactionTime) &&
+            lowTimestamp > 0l) {
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("Major compaction triggered on store: " + this.storeNameStr +
+              ". Time since last major compaction: " +
+              ((System.currentTimeMillis() - lowTimestamp)/1000) + " seconds");
+          }
+          majorCompaction = true;
+        }
+      }
+      if (!majorCompaction && !hasReferences(filesToCompact) &&
           filesToCompact.size() < compactionThreshold) {
         return checkSplit();
       }
@@ -772,13 +819,13 @@
         fileSizes[i] = len;
         totalSize += len;
       }
-      if (!force && !hasReferences(filesToCompact)) {
+      if (!majorCompaction && !hasReferences(filesToCompact)) {
         // Here we select files for incremental compaction.  
         // The rule is: if the largest(oldest) one is more than twice the 
         // size of the second, skip the largest, and continue to next...,
         // until we meet the compactionThreshold limit.
         for (point = 0; point < compactionThreshold - 1; point++) {
-          if (fileSizes[point] < fileSizes[point + 1] * 2) {
+          if (fileSizes[point] < fileSizes[point + 1] * 2 && maxFilesToCompact
< (countOfFiles - point)) {
             break;
           }
           skipped += fileSizes[point];
@@ -839,7 +886,7 @@
         this.compression, this.family.isBloomfilter(), nrows);
       writer.setIndexInterval(family.getMapFileIndexInterval());
       try {
-        compact(writer, rdrs);
+        compact(writer, rdrs, majorCompaction);
       } finally {
         writer.close();
       }
@@ -851,7 +898,9 @@
       completeCompaction(filesToCompact, compactedOutputFile);
       if (LOG.isDebugEnabled()) {
         LOG.debug("Completed compaction of " + this.storeNameStr +
-          " store size is " + StringUtils.humanReadableInt(storeSize));
+          " store size is " + StringUtils.humanReadableInt(storeSize) +
+          (majorCompaction? "": "; time since last major compaction: " +
+          (lastMajorCompaction/1000) + " seconds"));
       }
     }
     return checkSplit();
@@ -865,10 +914,12 @@
    * by timestamp.
    * @param compactedOut Where to write compaction.
    * @param pReaders List of readers sorted oldest to newest.
+   * @param majorCompaction True to force a major compaction regardless of
+   * thresholds
    * @throws IOException
    */
   private void compact(final MapFile.Writer compactedOut,
-      final List<MapFile.Reader> pReaders)
+      final List<MapFile.Reader> pReaders, final boolean majorCompaction)
   throws IOException {
     // Reverse order so we newest is first.
     List<MapFile.Reader> copy = new ArrayList<MapFile.Reader>(pReaders);
@@ -926,7 +977,10 @@
           timesSeen = 0;
         }
 
-        if (timesSeen <= family.getMaxVersions()) {
+        // Added majorCompaction here to make sure all versions make it to 
+        // the major compaction so we do not remove the wrong last versions
+        // this effected HBASE-826
+        if (timesSeen <= family.getMaxVersions() || !majorCompaction) {
           // Keep old versions until we have maxVersions worth.
           // Then just skip them.
           if (sk.getRow().length != 0 && sk.getColumn().length != 0) {



Mime
View raw message