Return-Path: X-Original-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Delivered-To: apmail-hadoop-mapreduce-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id A79E710CAB for ; Mon, 28 Oct 2013 23:56:06 +0000 (UTC) Received: (qmail 3290 invoked by uid 500); 28 Oct 2013 23:56:06 -0000 Delivered-To: apmail-hadoop-mapreduce-commits-archive@hadoop.apache.org Received: (qmail 3164 invoked by uid 500); 28 Oct 2013 23:56:06 -0000 Mailing-List: contact mapreduce-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: mapreduce-dev@hadoop.apache.org Delivered-To: mailing list mapreduce-commits@hadoop.apache.org Received: (qmail 3156 invoked by uid 99); 28 Oct 2013 23:56:06 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 28 Oct 2013 23:56:06 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 28 Oct 2013 23:56:03 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CCEB6238899C; Mon, 28 Oct 2013 23:55:41 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1536562 - in /hadoop/common/branches/branch-2.2/hadoop-mapreduce-project: ./ hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/ hadoop-mapreduce-client/hadoop-mapreduce-client-co... Date: Mon, 28 Oct 2013 23:55:41 -0000 To: mapreduce-commits@hadoop.apache.org From: sandy@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20131028235541.CCEB6238899C@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: sandy Date: Mon Oct 28 23:55:41 2013 New Revision: 1536562 URL: http://svn.apache.org/r1536562 Log: MAPREDUCE-4680. Job history cleaner should only check timestamps of files in old enough directories (Robert Kanter via Sandy Ryza) Added: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestJobHistoryUtils.java - copied unchanged from r1536558, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestJobHistoryUtils.java Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt?rev=1536562&r1=1536561&r2=1536562&view=diff ============================================================================== --- hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/CHANGES.txt Mon Oct 28 23:55:41 2013 @@ -20,6 +20,9 @@ Release 2.2.1 - UNRELEASED OPTIMIZATIONS + MAPREDUCE-4680. Job history cleaner should only check timestamps of files in + old enough directories (Robert Kanter via Sandy Ryza) + BUG FIXES MAPREDUCE-5569. FloatSplitter is not generating correct splits (Nathan Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java?rev=1536562&r1=1536561&r2=1536562&view=diff ============================================================================== --- hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java (original) +++ hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java Mon Oct 28 23:55:41 2013 @@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.j import java.io.File; import java.io.IOException; import java.util.Calendar; +import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; @@ -499,4 +500,72 @@ public class JobHistoryUtils { return fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile( histDirPath,jobId, (applicationAttemptId.getAttemptId() - 1))); } + + /** + * Looks for the dirs to clean. The folder structure is YYYY/MM/DD/Serial so + * we can use that to more efficiently find the directories to clean by + * comparing the cutoff timestamp with the timestamp from the folder + * structure. + * + * @param fc done dir FileContext + * @param root folder for completed jobs + * @param cutoff The cutoff for the max history age + * @return The list of directories for cleaning + * @throws IOException + */ + public static List getHistoryDirsForCleaning(FileContext fc, + Path root, long cutoff) throws IOException { + List fsList = new ArrayList(); + Calendar cCal = Calendar.getInstance(); + cCal.setTimeInMillis(cutoff); + int cYear = cCal.get(Calendar.YEAR); + int cMonth = cCal.get(Calendar.MONTH) + 1; + int cDate = cCal.get(Calendar.DATE); + + RemoteIterator yearDirIt = fc.listStatus(root); + while (yearDirIt.hasNext()) { + FileStatus yearDir = yearDirIt.next(); + try { + int year = Integer.parseInt(yearDir.getPath().getName()); + if (year <= cYear) { + RemoteIterator monthDirIt = + fc.listStatus(yearDir.getPath()); + while (monthDirIt.hasNext()) { + FileStatus monthDir = monthDirIt.next(); + try { + int month = Integer.parseInt(monthDir.getPath().getName()); + // If we only checked the month here, then something like 07/2013 + // would incorrectly not pass when the cutoff is 06/2014 + if (year < cYear || month <= cMonth) { + RemoteIterator dateDirIt = + fc.listStatus(monthDir.getPath()); + while (dateDirIt.hasNext()) { + FileStatus dateDir = dateDirIt.next(); + try { + int date = Integer.parseInt(dateDir.getPath().getName()); + // If we only checked the date here, then something like + // 07/21/2013 would incorrectly not pass when the cutoff is + // 08/20/2013 or 07/20/2012 + if (year < cYear || month < cMonth || date <= cDate) { + fsList.addAll(remoteIterToList( + fc.listStatus(dateDir.getPath()))); + } + } catch (NumberFormatException nfe) { + // the directory didn't fit the format we're looking for so + // skip the dir + } + } + } + } catch (NumberFormatException nfe) { + // the directory didn't fit the format we're looking for so skip + // the dir + } + } + } + } catch (NumberFormatException nfe) { + // the directory didn't fit the format we're looking for so skip the dir + } + } + return fsList; + } } Modified: hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java?rev=1536562&r1=1536561&r2=1536562&view=diff ============================================================================== --- hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java (original) +++ hadoop/common/branches/branch-2.2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java Mon Oct 28 23:55:41 2013 @@ -919,6 +919,11 @@ public class HistoryFileManager extends fileInfo.delete(); } + List getHistoryDirsForCleaning(long cutoff) throws IOException { + return JobHistoryUtils. + getHistoryDirsForCleaning(doneDirFc, doneDirPrefixPath, cutoff); + } + /** * Clean up older history files. * @@ -927,12 +932,9 @@ public class HistoryFileManager extends */ @SuppressWarnings("unchecked") void clean() throws IOException { - // TODO this should be replaced by something that knows about the directory - // structure and will put less of a load on HDFS. long cutoff = System.currentTimeMillis() - maxHistoryAge; boolean halted = false; - // TODO Delete YYYY/MM/DD directories. - List serialDirList = findTimestampedDirectories(); + List serialDirList = getHistoryDirsForCleaning(cutoff); // Sort in ascending order. Relies on YYYY/MM/DD/Serial Collections.sort(serialDirList); for (FileStatus serialDir : serialDirList) {