hadoop-mapreduce-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sa...@apache.org
Subject svn commit: r1536562 - in /hadoop/common/branches/branch-2.2/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/ hadoop-mapreduce-client/hadoop-mapreduce-client-co...
Date Mon, 28 Oct 2013 23:55:41 GMT
Author: sandy
Date: Mon Oct 28 23:55:41 2013
New Revision: 1536562

URL: http://svn.apache.org/r1536562
Log:
MAPREDUCE-4680. Job history cleaner should only check timestamps of files in old enough directories
(Robert Kanter via Sandy Ryza)

Added:
    hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestJobHistoryUtils.java
      - copied unchanged from r1536558, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestJobHistoryUtils.java
Modified:
    hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
    hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java

Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt?rev=1536562&r1=1536561&r2=1536562&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt Mon Oct 28 23:55:41
2013
@@ -20,6 +20,9 @@ Release 2.2.1 - UNRELEASED
 
   OPTIMIZATIONS
 
+    MAPREDUCE-4680. Job history cleaner should only check timestamps of files in
+    old enough directories (Robert Kanter via Sandy Ryza)
+
   BUG FIXES
 
     MAPREDUCE-5569. FloatSplitter is not generating correct splits (Nathan

Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java?rev=1536562&r1=1536561&r2=1536562&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
(original)
+++ hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
Mon Oct 28 23:55:41 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.j
 import java.io.File;
 import java.io.IOException;
 import java.util.Calendar;
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -499,4 +500,72 @@ public class JobHistoryUtils {
     return fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(
         histDirPath,jobId, (applicationAttemptId.getAttemptId() - 1)));
   }
+
+  /**
+   * Looks for the dirs to clean.  The folder structure is YYYY/MM/DD/Serial so
+   * we can use that to more efficiently find the directories to clean by
+   * comparing the cutoff timestamp with the timestamp from the folder
+   * structure.
+   *
+   * @param fc done dir FileContext
+   * @param root folder for completed jobs
+   * @param cutoff The cutoff for the max history age
+   * @return The list of directories for cleaning
+   * @throws IOException
+   */
+  public static List<FileStatus> getHistoryDirsForCleaning(FileContext fc,
+      Path root, long cutoff) throws IOException {
+    List<FileStatus> fsList = new ArrayList<FileStatus>();
+    Calendar cCal = Calendar.getInstance();
+    cCal.setTimeInMillis(cutoff);
+    int cYear = cCal.get(Calendar.YEAR);
+    int cMonth = cCal.get(Calendar.MONTH) + 1;
+    int cDate = cCal.get(Calendar.DATE);
+
+    RemoteIterator<FileStatus> yearDirIt = fc.listStatus(root);
+    while (yearDirIt.hasNext()) {
+      FileStatus yearDir = yearDirIt.next();
+      try {
+        int year = Integer.parseInt(yearDir.getPath().getName());
+        if (year <= cYear) {
+          RemoteIterator<FileStatus> monthDirIt =
+              fc.listStatus(yearDir.getPath());
+          while (monthDirIt.hasNext()) {
+            FileStatus monthDir = monthDirIt.next();
+            try {
+              int month = Integer.parseInt(monthDir.getPath().getName());
+              // If we only checked the month here, then something like 07/2013
+              // would incorrectly not pass when the cutoff is 06/2014
+              if (year < cYear || month <= cMonth) {
+                RemoteIterator<FileStatus> dateDirIt =
+                    fc.listStatus(monthDir.getPath());
+                while (dateDirIt.hasNext()) {
+                  FileStatus dateDir = dateDirIt.next();
+                  try {
+                    int date = Integer.parseInt(dateDir.getPath().getName());
+                    // If we only checked the date here, then something like
+                    // 07/21/2013 would incorrectly not pass when the cutoff is
+                    // 08/20/2013 or 07/20/2012
+                    if (year < cYear || month < cMonth || date <= cDate) {
+                      fsList.addAll(remoteIterToList(
+                          fc.listStatus(dateDir.getPath())));
+                    }
+                  } catch (NumberFormatException nfe) {
+                    // the directory didn't fit the format we're looking for so
+                    // skip the dir
+                  }
+                }
+              }
+            } catch (NumberFormatException nfe) {
+              // the directory didn't fit the format we're looking for so skip
+              // the dir
+            }
+          }
+        }
+      } catch (NumberFormatException nfe) {
+        // the directory didn't fit the format we're looking for so skip the dir
+      }
+    }
+    return fsList;
+  }
 }

Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java?rev=1536562&r1=1536561&r2=1536562&view=diff
==============================================================================
--- hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
(original)
+++ hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
Mon Oct 28 23:55:41 2013
@@ -919,6 +919,11 @@ public class HistoryFileManager extends 
     fileInfo.delete();
   }
 
+  List<FileStatus> getHistoryDirsForCleaning(long cutoff) throws IOException {
+      return JobHistoryUtils.
+        getHistoryDirsForCleaning(doneDirFc, doneDirPrefixPath, cutoff);
+  }
+
   /**
    * Clean up older history files.
    * 
@@ -927,12 +932,9 @@ public class HistoryFileManager extends 
    */
   @SuppressWarnings("unchecked")
   void clean() throws IOException {
-    // TODO this should be replaced by something that knows about the directory
-    // structure and will put less of a load on HDFS.
     long cutoff = System.currentTimeMillis() - maxHistoryAge;
     boolean halted = false;
-    // TODO Delete YYYY/MM/DD directories.
-    List<FileStatus> serialDirList = findTimestampedDirectories();
+    List<FileStatus> serialDirList = getHistoryDirsForCleaning(cutoff);
     // Sort in ascending order. Relies on YYYY/MM/DD/Serial
     Collections.sort(serialDirList);
     for (FileStatus serialDir : serialDirList) {



Mime
View raw message