hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From li...@apache.org
Subject svn commit: r1525273 - in /hbase/branches/0.89-fb/src: main/java/org/apache/hadoop/hbase/master/ main/java/org/apache/hadoop/hbase/regionserver/wal/ test/java/org/apache/hadoop/hbase/master/
Date Sat, 21 Sep 2013 18:18:17 GMT
Author: liyin
Date: Sat Sep 21 18:18:16 2013
New Revision: 1525273

URL: http://svn.apache.org/r1525273
Log:
[master] Archive old log files into hourly sub-directory

Author: fan

Summary: So we can do bulk deletion on expired logs.

Test Plan: Unit test TestOldLogsCleaner updated to test the scenario.

Reviewers: liyintang, mycnyc, jiqingt, aaiyer

Reviewed By: liyintang

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D964068

Task ID: 2851371

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java?rev=1525273&r1=1525272&r2=1525273&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/OldLogsCleaner.java
Sat Sep 21 18:18:16 2013
@@ -20,6 +20,10 @@
 package org.apache.hadoop.hbase.master;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -42,6 +46,8 @@ public class OldLogsCleaner extends Chor
 
   static final Log LOG = LogFactory.getLog(OldLogsCleaner.class.getName());
 
+  private final static Pattern datePattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2}-\\d{2}");
+
   // Max number we can delete on every chore, this is to make sure we don't
   // issue thousands of delete commands around the same time
   private final int maxDeletedLogs;
@@ -92,6 +98,30 @@ public class OldLogsCleaner extends Chor
   }
 
   /**
+   * Delete old hourly log directories directly.
+   * @throws IOException
+   */
+  private void cleanHourlyDirectories(List<FileStatus> hourly) throws IOException {
+    FileStatus[] files = this.fs.listStatus(this.oldLogDir);
+    if (files == null || files.length == 0) {
+      LOG.debug("Old log folder is empty");
+      return;
+    }
+    Arrays.sort(files);
+    // Only delete one hourly sub-directory in one iteration. So we won't delete
+    // too many directories/files in a short period of time.
+    // When the system generates 10000-12000 log files per hour,
+    // around 4GB data is deleted.
+    Path path = files[0].getPath();
+    if (logCleaner.isLogDeletable(path)) {
+      LOG.info("Removing old logs in " + path.toString());
+      this.fs.delete(path, true);
+    } else {
+      LOG.debug("Current hourly directories are not old enough. Oldest directory: " + path.toString());
+    }
+  }
+
+  /**
    * Delete log files directories recursively.
    * @param files The list of files/directories to traverse.
    * @param deleteCountLeft Max number of files to delete
@@ -109,8 +139,15 @@ public class OldLogsCleaner extends Chor
     for (FileStatus file : files) {
       if (deleteCountLeft <= 0) return 0; // we don't have anymore to delete
       if (file.isDir()) {
-        deleteCountLeft = cleanFiles(this.fs.listStatus(file.getPath()),
-                                     deleteCountLeft, maxDepth - 1);
+        FileStatus[] content = this.fs.listStatus(file.getPath());
+        if (content.length == 0) {
+          this.fs.delete(file.getPath(), true);
+          deleteCountLeft++;
+          LOG.debug("Remove empty folder " + file.getPath());
+        } else {
+          deleteCountLeft = cleanFiles(this.fs.listStatus(file.getPath()),
+              deleteCountLeft, maxDepth - 1);
+        }
         continue;
       }
       Path filePath = file.getPath();
@@ -132,10 +169,41 @@ public class OldLogsCleaner extends Chor
   @Override
   protected void chore() {
     try {
-      cleanFiles(this.fs.listStatus(this.oldLogDir), maxDeletedLogs, 2);
+      if (HLog.shouldArchiveToHourlyDir()) {
+        FileStatus[] subdirs = this.fs.listStatus(this.oldLogDir);
+        List<FileStatus> hourly = new ArrayList<FileStatus>();
+        List<FileStatus> legacy = new ArrayList<FileStatus>();
+        for (FileStatus f : subdirs) {
+          if (isMatchDatePattern(f.getPath())) {
+            hourly.add(f);
+          } else {
+            legacy.add(f);
+          }
+        }
+        if (!hourly.isEmpty()) {
+          cleanHourlyDirectories(hourly);
+        }
+        if (!legacy.isEmpty()) {
+          cleanFiles(legacy.toArray(new FileStatus[legacy.size()]), maxDeletedLogs, 2);
+        }
+      } else {
+        cleanFiles(this.fs.listStatus(this.oldLogDir), maxDeletedLogs, 2);
+      }
     } catch (IOException e) {
       e = RemoteExceptionHandler.checkIOException(e);
       LOG.warn("Error while cleaning the logs", e);
     }
   }
+
+  /**
+   * Update TTL setup for log cleaner delegate.
+   * @param c
+   */
+  public void updateLogCleanerConf(Configuration c) {
+    this.logCleaner.setConf(c);
+  }
+
+  public static boolean isMatchDatePattern(Path file) {
+    return datePattern.matcher(file.getName()).matches();
+  }
 }

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java?rev=1525273&r1=1525272&r2=1525273&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/TimeToLiveLogCleaner.java
Sat Sep 21 18:18:16 2013
@@ -20,12 +20,14 @@
 package org.apache.hadoop.hbase.master;
 
 import java.io.IOException;
+import java.text.ParseException;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
 
 /**
  * Log cleaner that uses the timestamp of the hlog to determine if it should
@@ -35,21 +37,31 @@ public class TimeToLiveLogCleaner implem
 
   static final Log LOG =
       LogFactory.getLog(TimeToLiveLogCleaner.class.getName());
+
   private Configuration conf;
   // Configured time a log can be kept after it was closed
   private long ttl;
+  private boolean parseTimeFromPathName;
 
   @Override
   public boolean isLogDeletable(Path filePath) {
     long time = 0;
     long currentTime = System.currentTimeMillis();
     try {
-      FileStatus fStat = filePath.getFileSystem(conf).getFileStatus(filePath);
-      time = fStat.getModificationTime();
+      // If the path name is in hourly format, skip getting modification time
+      if (HLog.shouldArchiveToHourlyDir() && OldLogsCleaner.isMatchDatePattern(filePath))
{
+        time = HLog.DATE_FORMAT.parse(filePath.getName()).getTime();
+      } else {
+        FileStatus fStat = filePath.getFileSystem(conf).getFileStatus(filePath);
+        time = fStat.getModificationTime();
+      }
     } catch (IOException e) {
       LOG.error("Unable to get modification time of file " + filePath.getName() +
       ", not deleting it.", e);
       return false;
+    } catch (ParseException pe) {
+      LOG.error("Unable to parse the date out of given file path " + filePath.getName(),
pe);
+      return false;
     }
     long life = currentTime - time;
     if (life < 0) {

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1525273&r1=1525272&r2=1525273&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
(original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
Sat Sep 21 18:18:16 2013
@@ -31,20 +31,20 @@ import java.io.OutputStream;
 import java.lang.reflect.Method;
 import java.net.URLEncoder;
 import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Calendar;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableSet;
-import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.concurrent.Callable;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentNavigableMap;
 import java.util.concurrent.ConcurrentSkipListMap;
 import java.util.concurrent.CopyOnWriteArrayList;
@@ -142,6 +142,7 @@ public class HLog implements Syncable {
   public static final byte [] METAFAMILY = Bytes.toBytes("METAFAMILY");
   public static final byte [] METAROW = Bytes.toBytes("METAROW");
   public static final boolean SPLIT_SKIP_ERRORS_DEFAULT = false;
+  public static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd-HH");
 
   /*
    * Name of directory that holds recovered edits written by the wal log
@@ -154,6 +155,10 @@ public class HLog implements Syncable {
   /** We include all timestamps by default */
   public static final long DEFAULT_LATEST_TS_TO_INCLUDE = Long.MAX_VALUE;
 
+  // If enabled, old logs will be archived into hourly sub-directories instead of
+  // server address sub-directories.
+  private static boolean ARCHIVE_TO_HOURLY_DIR = false;
+
   private final FileSystem fs;
   private final Path dir;
   private final Configuration conf;
@@ -454,6 +459,7 @@ public class HLog implements Syncable {
       fs.mkdirs(oldLogDir);
     }
     this.oldLogDir = oldLogDir;
+    ARCHIVE_TO_HOURLY_DIR = conf.getBoolean("hbase.hlog.archive.hourlydir", false);
 
     if (!fs.exists(dir)) {
       fs.mkdirs(dir);
@@ -1863,15 +1869,29 @@ public class HLog implements Syncable {
       return new Path(oldLogDir, filename);
     }
 
-    // since the filename is a valid name, we know there
-    // is a last '.' (won't return -1)
-    String subDirectoryName = filename.substring(0, filename.lastIndexOf('.'));
+    String subDirectoryName;
+    if (ARCHIVE_TO_HOURLY_DIR) {
+      // Group into hourly sub-directory
+      subDirectoryName = DATE_FORMAT.format(Calendar.getInstance().getTime());
+    } else {
+      // since the filename is a valid name, we know there
+      // is a last '.' (won't return -1)
+      subDirectoryName = filename.substring(0, filename.lastIndexOf('.'));
+    }
     Path oldLogsSubDir = new Path(oldLogDir, subDirectoryName);
     fs.mkdirs(oldLogsSubDir);
 
     return new Path(oldLogsSubDir, filename);
   }
 
+  public static boolean shouldArchiveToHourlyDir() {
+    return ARCHIVE_TO_HOURLY_DIR;
+  }
+
+  public static void setArchiveToHourlyDir(boolean b) {
+    ARCHIVE_TO_HOURLY_DIR = b;
+  }
+
   /**
    * Takes splitLogsMap and concurrently writes them to region directories using a thread
pool
    *

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java?rev=1525273&r1=1525272&r2=1525273&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java
(original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/master/TestOldLogsCleaner.java
Sat Sep 21 18:18:16 2013
@@ -22,11 +22,11 @@ package org.apache.hadoop.hbase.master;
 import static org.junit.Assert.assertEquals;
 
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
-import org.junit.Ignore;
 import org.junit.Test;
 
 import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -37,7 +37,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.conf.Configuration;
 
 import java.net.URLEncoder;
-import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.Calendar;
 
 public class TestOldLogsCleaner {
 
@@ -102,8 +102,8 @@ public class TestOldLogsCleaner {
     fs.createNewFile(new Path(oldLogDir, fakeMachineName + "." + (now + 10000) ));
 
     for (FileStatus stat : fs.listStatus(oldLogDir)) {
-        System.out.println(stat.getPath().toString());
-      }
+      System.out.println(stat.getPath().toString());
+    }
 
     assertEquals(34, fs.listStatus(oldLogDir).length);
 
@@ -119,4 +119,73 @@ public class TestOldLogsCleaner {
     assertEquals(2, fs.listStatus(oldLogDir).length);
   }
 
+  @Test
+  public void testLogCleaningWithArchivingToHourlyDir() throws Exception{
+    Configuration c = TEST_UTIL.getConfiguration();
+    // set TTL to delete 5 hours
+    c.setLong("hbase.master.logcleaner.ttl", 4 * 3600 * 1000);
+    c.setBoolean("hbase.hlog.archive.hourlydir", true);
+    HLog.setArchiveToHourlyDir(true);
+    Path oldLogDir = new Path(TEST_UTIL.getTestDir(),
+        HConstants.HREGION_OLDLOGDIR_NAME);
+    String fakeMachineName = URLEncoder.encode("regionserver:60020", "UTF8");
+
+    FileSystem fs = FileSystem.get(c);
+    StoppableImpl stop = new StoppableImpl();
+    OldLogsCleaner cleaner = new OldLogsCleaner(1000, stop,c, fs, oldLogDir);
+
+    // Create 1 invalid directory (considering legacy logs), 10 directories representing
+    // recent 10 hours respectively
+    fs.delete(oldLogDir, true);
+    fs.mkdirs(oldLogDir);
+    Path legacyDir = new Path(oldLogDir, "abc");
+    fs.mkdirs(legacyDir);
+    fs.createNewFile(new Path(legacyDir, "123.456"));
+    Calendar cal = Calendar.getInstance();
+    System.out.println("Now is: " + HLog.DATE_FORMAT.format(cal.getTime()));
+    for (int i = 0; i < 10; i++) {
+      cal.add(Calendar.HOUR, -1);
+      Path hourDir = new Path(oldLogDir, HLog.DATE_FORMAT.format(cal.getTime()));
+      fs.mkdirs(hourDir);
+      fs.createNewFile(new Path(hourDir, new Path(fakeMachineName + "." + i)));
+    }
+
+    for (FileStatus stat : fs.listStatus(oldLogDir)) {
+      System.out.println(stat.getPath().toString());
+    }
+
+    assertEquals(11, fs.listStatus(oldLogDir).length);
+
+    // This will delete oldest sub-directory
+    cleaner.chore();
+
+    assertEquals(10, fs.listStatus(oldLogDir).length);
+
+    // We will delete all log dir older than 4 hours
+    for (int i = 0; i < 10; i++) {
+      cleaner.chore();
+    }
+
+    // We should still see 3 newer dirs and an legacy one
+    assertEquals(4, fs.listStatus(oldLogDir).length);
+
+    Thread.sleep(1000);
+
+    // Update TTL configuration to delete all logs
+    c.setLong("hbase.master.logcleaner.ttl", 800);
+    cleaner.updateLogCleanerConf(c);
+
+    // Delete an hourly dir. File "123.456" should also be deleted this time.
+    cleaner.chore();
+    assertEquals(3, fs.listStatus(oldLogDir).length);
+    assertEquals(0, fs.listStatus(legacyDir).length);
+
+    // Delete an hourly dir. Dir "abc" should also be deleted this time.
+    cleaner.chore();
+    assertEquals(1, fs.listStatus(oldLogDir).length);
+
+    // Delete the last
+    cleaner.chore();
+    assertEquals(0, fs.listStatus(oldLogDir).length);
+  }
 }



Mime
View raw message