Return-Path: X-Original-To: apmail-hbase-commits-archive@www.apache.org Delivered-To: apmail-hbase-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6B07C1018E for ; Mon, 10 Jun 2013 18:04:56 +0000 (UTC) Received: (qmail 74908 invoked by uid 500); 10 Jun 2013 18:04:56 -0000 Delivered-To: apmail-hbase-commits-archive@hbase.apache.org Received: (qmail 74877 invoked by uid 500); 10 Jun 2013 18:04:53 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 74870 invoked by uid 99); 10 Jun 2013 18:04:52 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 10 Jun 2013 18:04:52 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 10 Jun 2013 18:04:49 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CE8C023888FE; Mon, 10 Jun 2013 18:04:28 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1491546 - /hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Date: Mon, 10 Jun 2013 18:04:28 -0000 To: commits@hbase.apache.org From: eclark@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130610180428.CE8C023888FE@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: eclark Date: Mon Jun 10 18:04:28 2013 New Revision: 1491546 URL: http://svn.apache.org/r1491546 Log: HBASE-8283 Backport HBASE-7842 Add compaction policy that explores more storefile groups to 0.94 Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Modified: hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java URL: http://svn.apache.org/viewvc/hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java?rev=1491546&r1=1491545&r2=1491546&view=diff ============================================================================== --- hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java (original) +++ hbase/branches/0.94/src/main/java/org/apache/hadoop/hbase/regionserver/Store.java Mon Jun 10 18:04:28 2013 @@ -1553,9 +1553,6 @@ public class Store extends SchemaConfigu if (!majorcompaction && !hasReferences(compactSelection.getFilesToCompact())) { - // we're doing a minor compaction, let's see what files are applicable - int start = 0; - double r = compactSelection.getCompactSelectionRatio(); // remove bulk import files that request to be excluded from minors compactSelection.getFilesToCompact().removeAll(Collections2.filter( @@ -1576,25 +1573,48 @@ public class Store extends SchemaConfigu compactSelection.emptyFileList(); return compactSelection; } - - /* TODO: add sorting + unit test back in when HBASE-2856 is fixed - // Sort files by size to correct when normal skew is altered by bulk load. - Collections.sort(filesToCompact, StoreFile.Comparators.FILE_SIZE); - */ - - // get store file sizes for incremental compacting selection. - int countOfFiles = compactSelection.getFilesToCompact().size(); - long [] fileSizes = new long[countOfFiles]; - long [] sumSize = new long[countOfFiles]; - for (int i = countOfFiles-1; i >= 0; --i) { - StoreFile file = compactSelection.getFilesToCompact().get(i); - fileSizes[i] = file.getReader().length(); - // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo - int tooFar = i + this.maxFilesToCompact - 1; - sumSize[i] = fileSizes[i] - + ((i+1 < countOfFiles) ? sumSize[i+1] : 0) - - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0); + if (conf.getBoolean("hbase.hstore.useExploringCompation", false)) { + compactSelection = exploringCompactionSelection(compactSelection); + } else { + compactSelection = defaultCompactionSelection(compactSelection); } + } else { + if(majorcompaction) { + if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { + LOG.debug("Warning, compacting more than " + this.maxFilesToCompact + + " files, probably because of a user-requested major compaction"); + if(priority != PRIORITY_USER) { + LOG.error("Compacting more than max files on a non user-requested compaction"); + } + } + } else if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { + // all files included in this compaction, up to max + int pastMax = compactSelection.getFilesToCompact().size() - this.maxFilesToCompact; + compactSelection.getFilesToCompact().subList(0, pastMax).clear(); + } + } + return compactSelection; + } + + private CompactSelection defaultCompactionSelection(CompactSelection compactSelection) { + // we're doing a minor compaction, let's see what files are applicable + int start = 0; + + double r = compactSelection.getCompactSelectionRatio(); + + // get store file sizes for incremental compacting selection. + int countOfFiles = compactSelection.getFilesToCompact().size(); + long [] fileSizes = new long[countOfFiles]; + long [] sumSize = new long[countOfFiles]; + for (int i = countOfFiles-1; i >= 0; --i) { + StoreFile file = compactSelection.getFilesToCompact().get(i); + fileSizes[i] = file.getReader().length(); + // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo + int tooFar = i + this.maxFilesToCompact - 1; + sumSize[i] = fileSizes[i] + + ((i+1 < countOfFiles) ? sumSize[i+1] : 0) + - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0); + } /* Start at the oldest file and stop when you find the first file that * meets compaction criteria: @@ -1609,43 +1629,143 @@ public class Store extends SchemaConfigu * situation where we always compact [end-threshold,end). Then, the * last file becomes an aggregate of the previous compactions. */ - while(countOfFiles - start >= this.minFilesToCompact && - fileSizes[start] > - Math.max(minCompactSize, (long)(sumSize[start+1] * r))) { - ++start; - } - int end = Math.min(countOfFiles, start + this.maxFilesToCompact); - long totalSize = fileSizes[start] - + ((start+1 < countOfFiles) ? sumSize[start+1] : 0); - compactSelection = compactSelection.getSubList(start, end); - - // if we don't have enough files to compact, just wait - if (compactSelection.getFilesToCompact().size() < this.minFilesToCompact) { - if (LOG.isDebugEnabled()) { - LOG.debug("Skipped compaction of " + this + while(countOfFiles - start >= this.minFilesToCompact && + fileSizes[start] > + Math.max(minCompactSize, (long)(sumSize[start+1] * r))) { + ++start; + } + int end = Math.min(countOfFiles, start + this.maxFilesToCompact); + long totalSize = fileSizes[start] + + ((start+1 < countOfFiles) ? sumSize[start+1] : 0); + compactSelection = compactSelection.getSubList(start, end); + + // if we don't have enough files to compact, just wait + if (compactSelection.getFilesToCompact().size() < this.minFilesToCompact) { + if (LOG.isDebugEnabled()) { + LOG.debug("Skipped compaction of " + this + ". Only " + (end - start) + " file(s) of size " + StringUtils.humanReadableInt(totalSize) + " have met compaction criteria."); - } - compactSelection.emptyFileList(); - return compactSelection; } - } else { - if(majorcompaction) { - if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { - LOG.debug("Warning, compacting more than " + this.maxFilesToCompact + - " files, probably because of a user-requested major compaction"); - if(priority != PRIORITY_USER) { - LOG.error("Compacting more than max files on a non user-requested compaction"); - } + compactSelection.emptyFileList(); + return compactSelection; + } + return compactSelection; + } + + private CompactSelection exploringCompactionSelection(CompactSelection compactSelection) { + + List candidates = compactSelection.getFilesToCompact(); + int futureFiles = filesCompacting.isEmpty() ? 0 : 1; + boolean mayBeStuck = (candidates.size() - filesCompacting.size() + futureFiles) + >= blockingStoreFileCount; + // Start off choosing nothing. + List bestSelection = new ArrayList(0); + List smallest = new ArrayList(0); + long bestSize = 0; + long smallestSize = Long.MAX_VALUE; + double r = compactSelection.getCompactSelectionRatio(); + + // Consider every starting place. + for (int startIndex = 0; startIndex < candidates.size(); startIndex++) { + // Consider every different sub list permutation in between start and end with min files. + for (int currentEnd = startIndex + minFilesToCompact - 1; + currentEnd < candidates.size(); currentEnd++) { + List potentialMatchFiles = candidates.subList(startIndex, currentEnd + 1); + + // Sanity checks + if (potentialMatchFiles.size() < minFilesToCompact) { + continue; + } + if (potentialMatchFiles.size() > maxFilesToCompact) { + continue; + } + + // Compute the total size of files that will + // have to be read if this set of files is compacted. + long size = getCompactionSize(potentialMatchFiles); + + // Store the smallest set of files. This stored set of files will be used + // if it looks like the algorithm is stuck. + if (size < smallestSize) { + smallest = potentialMatchFiles; + smallestSize = size; + } + + if (size >= minCompactSize + && !filesInRatio(potentialMatchFiles, r)) { + continue; + } + + if (size > maxCompactSize) { + continue; + } + + // Keep if this gets rid of more files. Or the same number of files for less io. + if (potentialMatchFiles.size() > bestSelection.size() + || (potentialMatchFiles.size() == bestSelection.size() && size < bestSize)) { + bestSelection = potentialMatchFiles; + bestSize = size; } - } else if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { - // all files included in this compaction, up to max - int pastMax = compactSelection.getFilesToCompact().size() - this.maxFilesToCompact; - compactSelection.getFilesToCompact().subList(0, pastMax).clear(); } } + + if (bestSelection.size() == 0 && mayBeStuck) { + smallest = new ArrayList(smallest); + compactSelection.getFilesToCompact().clear(); + compactSelection.getFilesToCompact().addAll(smallest); + } else { + bestSelection = new ArrayList(bestSelection); + compactSelection.getFilesToCompact().clear(); + compactSelection.getFilesToCompact().addAll(bestSelection); + } + return compactSelection; + + } + + /** + * Check that all files satisfy the ratio + * + * @param files set of files to examine. + * @param currentRatio The raio + * @return if all files are in ratio. + */ + private boolean filesInRatio(final List files, final double currentRatio) { + if (files.size() < 2) { + return true; + } + long totalFileSize = 0; + for (int i = 0; i < files.size(); i++) { + totalFileSize += files.get(i).getReader().length(); + } + for (int i = 0; i < files.size(); i++) { + long singleFileSize = files.get(i).getReader().length(); + long sumAllOtherFilesize = totalFileSize - singleFileSize; + + if ((singleFileSize > sumAllOtherFilesize * currentRatio) + && (sumAllOtherFilesize >= this.minCompactSize)) { + return false; + } + } + return true; + } + + /** + * Get the number of bytes a proposed compaction would have to read. + * + * @param files Set of files in a proposed compaction. + * @return size in bytes. + */ + private long getCompactionSize(final List files) { + long size = 0; + if (files == null) { + return size; + } + for (StoreFile f : files) { + size += f.getReader().length(); + } + return size; } /**