Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 813C4200CB6 for ; Thu, 29 Jun 2017 17:01:53 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 7F9C0160BED; Thu, 29 Jun 2017 15:01:53 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 379E2160C04 for ; Thu, 29 Jun 2017 17:01:51 +0200 (CEST) Received: (qmail 3205 invoked by uid 500); 29 Jun 2017 15:01:48 -0000 Mailing-List: contact commits-help@hbase.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@hbase.apache.org Delivered-To: mailing list commits@hbase.apache.org Received: (qmail 2190 invoked by uid 99); 29 Jun 2017 15:01:48 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 29 Jun 2017 15:01:48 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 76B34F170C; Thu, 29 Jun 2017 15:01:47 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: git-site-role@apache.org To: commits@hbase.apache.org Date: Thu, 29 Jun 2017 15:01:58 -0000 Message-Id: <6800dc3b9ea04954898fb25c40d1b347@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [12/16] hbase-site git commit: Published site at 82d554e3783372cc6b05489452c815b57c06f6cd. archived-at: Thu, 29 Jun 2017 15:01:53 -0000 http://git-wip-us.apache.org/repos/asf/hbase-site/blob/8346f194/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/StoreFileWriter.Builder.html ---------------------------------------------------------------------- diff --git a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/StoreFileWriter.Builder.html b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/StoreFileWriter.Builder.html index 1e05c3f..4420c74 100644 --- a/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/StoreFileWriter.Builder.html +++ b/devapidocs/src-html/org/apache/hadoop/hbase/regionserver/StoreFileWriter.Builder.html @@ -31,505 +31,506 @@ 023import java.io.IOException; 024import java.net.InetSocketAddress; 025import java.util.UUID; -026 -027import org.apache.commons.logging.Log; -028import org.apache.commons.logging.LogFactory; -029import org.apache.hadoop.conf.Configuration; -030import org.apache.hadoop.fs.FileSystem; -031import org.apache.hadoop.fs.Path; -032import org.apache.hadoop.hbase.Cell; -033import org.apache.hadoop.hbase.CellComparator; -034import org.apache.hadoop.hbase.CellUtil; -035import org.apache.hadoop.hbase.HColumnDescriptor; -036import org.apache.hadoop.hbase.HConstants; -037import org.apache.hadoop.hbase.KeyValue; -038import org.apache.hadoop.hbase.classification.InterfaceAudience; -039import org.apache.hadoop.hbase.io.hfile.CacheConfig; -040import org.apache.hadoop.hbase.io.hfile.HFile; -041import org.apache.hadoop.hbase.io.hfile.HFileContext; -042import org.apache.hadoop.hbase.util.BloomContext; -043import org.apache.hadoop.hbase.util.BloomFilterFactory; -044import org.apache.hadoop.hbase.util.BloomFilterWriter; -045import org.apache.hadoop.hbase.util.Bytes; -046import org.apache.hadoop.hbase.util.FSUtils; -047import org.apache.hadoop.hbase.util.RowBloomContext; -048import org.apache.hadoop.hbase.util.RowColBloomContext; -049import org.apache.hadoop.io.WritableUtils; -050 -051/** -052 * A StoreFile writer. Use this to read/write HBase Store Files. It is package -053 * local because it is an implementation detail of the HBase regionserver. -054 */ -055@InterfaceAudience.Private -056public class StoreFileWriter implements CellSink, ShipperListener { -057 private static final Log LOG = LogFactory.getLog(StoreFileWriter.class.getName()); -058 -059 private final BloomFilterWriter generalBloomFilterWriter; -060 private final BloomFilterWriter deleteFamilyBloomFilterWriter; -061 private final BloomType bloomType; -062 private long earliestPutTs = HConstants.LATEST_TIMESTAMP; -063 private long deleteFamilyCnt = 0; -064 private BloomContext bloomContext = null; -065 private BloomContext deleteFamilyBloomContext = null; -066 -067 /** -068 * timeRangeTrackerSet is used to figure if we were passed a filled-out TimeRangeTracker or not. -069 * When flushing a memstore, we set the TimeRangeTracker that it accumulated during updates to -070 * memstore in here into this Writer and use this variable to indicate that we do not need to -071 * recalculate the timeRangeTracker bounds; it was done already as part of add-to-memstore. -072 * A completed TimeRangeTracker is not set in cases of compactions when it is recalculated. -073 */ -074 private final boolean timeRangeTrackerSet; -075 final TimeRangeTracker timeRangeTracker; -076 -077 protected HFile.Writer writer; -078 -079 /** -080 * Creates an HFile.Writer that also write helpful meta data. -081 * @param fs file system to write to -082 * @param path file name to create -083 * @param conf user configuration -084 * @param comparator key comparator -085 * @param bloomType bloom filter setting -086 * @param maxKeys the expected maximum number of keys to be added. Was used -087 * for Bloom filter size in {@link HFile} format version 1. -088 * @param fileContext - The HFile context -089 * @param shouldDropCacheBehind Drop pages written to page cache after writing the store file. -090 * @throws IOException problem writing to FS -091 */ -092 StoreFileWriter(FileSystem fs, Path path, final Configuration conf, CacheConfig cacheConf, -093 final CellComparator comparator, BloomType bloomType, long maxKeys, -094 InetSocketAddress[] favoredNodes, HFileContext fileContext, boolean shouldDropCacheBehind) -095 throws IOException { -096 this(fs, path, conf, cacheConf, comparator, bloomType, maxKeys, favoredNodes, fileContext, -097 shouldDropCacheBehind, null); -098 } -099 -100 /** -101 * Creates an HFile.Writer that also write helpful meta data. -102 * @param fs file system to write to -103 * @param path file name to create -104 * @param conf user configuration -105 * @param comparator key comparator -106 * @param bloomType bloom filter setting -107 * @param maxKeys the expected maximum number of keys to be added. Was used -108 * for Bloom filter size in {@link HFile} format version 1. -109 * @param favoredNodes -110 * @param fileContext - The HFile context -111 * @param shouldDropCacheBehind Drop pages written to page cache after writing the store file. -112 * @param trt Ready-made timetracker to use. -113 * @throws IOException problem writing to FS -114 */ -115 private StoreFileWriter(FileSystem fs, Path path, -116 final Configuration conf, -117 CacheConfig cacheConf, -118 final CellComparator comparator, BloomType bloomType, long maxKeys, -119 InetSocketAddress[] favoredNodes, HFileContext fileContext, -120 boolean shouldDropCacheBehind, final TimeRangeTracker trt) -121 throws IOException { -122 // If passed a TimeRangeTracker, use it. Set timeRangeTrackerSet so we don't destroy it. -123 // TODO: put the state of the TRT on the TRT; i.e. make a read-only version (TimeRange) when -124 // it no longer writable. -125 this.timeRangeTrackerSet = trt != null; -126 this.timeRangeTracker = this.timeRangeTrackerSet? trt: new TimeRangeTracker(); -127 // TODO : Change all writers to be specifically created for compaction context -128 writer = HFile.getWriterFactory(conf, cacheConf) -129 .withPath(fs, path) -130 .withComparator(comparator) -131 .withFavoredNodes(favoredNodes) -132 .withFileContext(fileContext) -133 .withShouldDropCacheBehind(shouldDropCacheBehind) -134 .create(); -135 -136 generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite( -137 conf, cacheConf, bloomType, -138 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); -139 -140 if (generalBloomFilterWriter != null) { -141 this.bloomType = bloomType; -142 if (LOG.isTraceEnabled()) { -143 LOG.trace("Bloom filter type for " + path + ": " + this.bloomType + ", " + -144 generalBloomFilterWriter.getClass().getSimpleName()); -145 } -146 // init bloom context -147 switch (bloomType) { -148 case ROW: -149 bloomContext = new RowBloomContext(generalBloomFilterWriter, comparator); -150 break; -151 case ROWCOL: -152 bloomContext = new RowColBloomContext(generalBloomFilterWriter, comparator); -153 break; -154 default: -155 throw new IOException( -156 "Invalid Bloom filter type: " + bloomType + " (ROW or ROWCOL expected)"); -157 } -158 } else { -159 // Not using Bloom filters. -160 this.bloomType = BloomType.NONE; -161 } -162 -163 // initialize delete family Bloom filter when there is NO RowCol Bloom -164 // filter -165 if (this.bloomType != BloomType.ROWCOL) { -166 this.deleteFamilyBloomFilterWriter = BloomFilterFactory -167 .createDeleteBloomAtWrite(conf, cacheConf, -168 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); -169 deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter, comparator); -170 } else { -171 deleteFamilyBloomFilterWriter = null; -172 } -173 if (deleteFamilyBloomFilterWriter != null && LOG.isTraceEnabled()) { -174 LOG.trace("Delete Family Bloom filter type for " + path + ": " + -175 deleteFamilyBloomFilterWriter.getClass().getSimpleName()); -176 } -177 } -178 -179 /** -180 * Writes meta data. -181 * Call before {@link #close()} since its written as meta data to this file. -182 * @param maxSequenceId Maximum sequence id. -183 * @param majorCompaction True if this file is product of a major compaction -184 * @throws IOException problem writing to FS -185 */ -186 public void appendMetadata(final long maxSequenceId, final boolean majorCompaction) -187 throws IOException { -188 writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); -189 writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, -190 Bytes.toBytes(majorCompaction)); -191 appendTrackedTimestampsToMetadata(); -192 } -193 -194 /** -195 * Writes meta data. -196 * Call before {@link #close()} since its written as meta data to this file. -197 * @param maxSequenceId Maximum sequence id. -198 * @param majorCompaction True if this file is product of a major compaction -199 * @param mobCellsCount The number of mob cells. -200 * @throws IOException problem writing to FS -201 */ -202 public void appendMetadata(final long maxSequenceId, final boolean majorCompaction, -203 final long mobCellsCount) throws IOException { -204 writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); -205 writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction)); -206 writer.appendFileInfo(StoreFile.MOB_CELLS_COUNT, Bytes.toBytes(mobCellsCount)); -207 appendTrackedTimestampsToMetadata(); -208 } -209 -210 /** -211 * Add TimestampRange and earliest put timestamp to Metadata -212 */ -213 public void appendTrackedTimestampsToMetadata() throws IOException { -214 appendFileInfo(StoreFile.TIMERANGE_KEY, WritableUtils.toByteArray(timeRangeTracker)); -215 appendFileInfo(StoreFile.EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); -216 } -217 -218 /** -219 * Record the earlest Put timestamp. -220 * -221 * If the timeRangeTracker is not set, -222 * update TimeRangeTracker to include the timestamp of this key -223 */ -224 public void trackTimestamps(final Cell cell) { -225 if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { -226 earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); -227 } -228 if (!timeRangeTrackerSet) { -229 timeRangeTracker.includeTimestamp(cell); -230 } -231 } -232 -233 private void appendGeneralBloomfilter(final Cell cell) throws IOException { -234 if (this.generalBloomFilterWriter != null) { -235 /* -236 * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png -237 * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp -238 * -239 * 2 Types of Filtering: -240 * 1. Row = Row -241 * 2. RowCol = Row + Qualifier -242 */ -243 bloomContext.writeBloom(cell); -244 } -245 } -246 -247 private void appendDeleteFamilyBloomFilter(final Cell cell) -248 throws IOException { -249 if (!CellUtil.isDeleteFamily(cell) && !CellUtil.isDeleteFamilyVersion(cell)) { -250 return; -251 } -252 -253 // increase the number of delete family in the store file -254 deleteFamilyCnt++; -255 if (this.deleteFamilyBloomFilterWriter != null) { -256 deleteFamilyBloomContext.writeBloom(cell); -257 } -258 } -259 -260 @Override -261 public void append(final Cell cell) throws IOException { -262 appendGeneralBloomfilter(cell); -263 appendDeleteFamilyBloomFilter(cell); -264 writer.append(cell); -265 trackTimestamps(cell); -266 } -267 -268 @Override -269 public void beforeShipped() throws IOException { -270 // For now these writer will always be of type ShipperListener true. -271 // TODO : Change all writers to be specifically created for compaction context -272 writer.beforeShipped(); -273 if (generalBloomFilterWriter != null) { -274 generalBloomFilterWriter.beforeShipped(); -275 } -276 if (deleteFamilyBloomFilterWriter != null) { -277 deleteFamilyBloomFilterWriter.beforeShipped(); -278 } -279 } -280 -281 public Path getPath() { -282 return this.writer.getPath(); -283 } -284 -285 public boolean hasGeneralBloom() { -286 return this.generalBloomFilterWriter != null; -287 } -288 -289 /** -290 * For unit testing only. -291 * -292 * @return the Bloom filter used by this writer. -293 */ -294 BloomFilterWriter getGeneralBloomWriter() { -295 return generalBloomFilterWriter; -296 } -297 -298 private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException { -299 boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0); -300 if (haveBloom) { -301 bfw.compactBloom(); -302 } -303 return haveBloom; -304 } -305 -306 private boolean closeGeneralBloomFilter() throws IOException { -307 boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter); -308 -309 // add the general Bloom filter writer and append file info -310 if (hasGeneralBloom) { -311 writer.addGeneralBloomFilter(generalBloomFilterWriter); -312 writer.appendFileInfo(StoreFile.BLOOM_FILTER_TYPE_KEY, -313 Bytes.toBytes(bloomType.toString())); -314 bloomContext.addLastBloomKey(writer); -315 } -316 return hasGeneralBloom; -317 } -318 -319 private boolean closeDeleteFamilyBloomFilter() throws IOException { -320 boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter); -321 -322 // add the delete family Bloom filter writer -323 if (hasDeleteFamilyBloom) { -324 writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter); -325 } -326 -327 // append file info about the number of delete family kvs -328 // even if there is no delete family Bloom. -329 writer.appendFileInfo(StoreFile.DELETE_FAMILY_COUNT, -330 Bytes.toBytes(this.deleteFamilyCnt)); -331 -332 return hasDeleteFamilyBloom; -333 } -334 -335 public void close() throws IOException { -336 boolean hasGeneralBloom = this.closeGeneralBloomFilter(); -337 boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter(); -338 -339 writer.close(); -340 -341 // Log final Bloom filter statistics. This needs to be done after close() -342 // because compound Bloom filters might be finalized as part of closing. -343 if (LOG.isTraceEnabled()) { -344 LOG.trace((hasGeneralBloom ? "" : "NO ") + "General Bloom and " + -345 (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily" + " was added to HFile " + -346 getPath()); -347 } -348 -349 } -350 -351 public void appendFileInfo(byte[] key, byte[] value) throws IOException { -352 writer.appendFileInfo(key, value); -353 } -354 -355 /** For use in testing. -356 */ -357 HFile.Writer getHFileWriter() { -358 return writer; -359 } -360 -361 /** -362 * @param fs -363 * @param dir Directory to create file in. -364 * @return random filename inside passed <code>dir</code> -365 */ -366 static Path getUniqueFile(final FileSystem fs, final Path dir) throws IOException { -367 if (!fs.getFileStatus(dir).isDirectory()) { -368 throw new IOException("Expecting " + dir.toString() + " to be a directory"); -369 } -370 return new Path(dir, UUID.randomUUID().toString().replaceAll("-", "")); -371 } -372 -373 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="ICAST_INTEGER_MULTIPLY_CAST_TO_LONG", -374 justification="Will not overflow") -375 public static class Builder { -376 private final Configuration conf; -377 private final CacheConfig cacheConf; -378 private final FileSystem fs; -379 -380 private CellComparator comparator = CellComparator.COMPARATOR; -381 private BloomType bloomType = BloomType.NONE; -382 private long maxKeyCount = 0; -383 private Path dir; -384 private Path filePath; -385 private InetSocketAddress[] favoredNodes; -386 private HFileContext fileContext; -387 private TimeRangeTracker trt; -388 private boolean shouldDropCacheBehind; -389 -390 public Builder(Configuration conf, CacheConfig cacheConf, -391 FileSystem fs) { -392 this.conf = conf; -393 this.cacheConf = cacheConf; -394 this.fs = fs; -395 } -396 -397 /** -398 * Creates Builder with cache configuration disabled -399 */ -400 public Builder(Configuration conf, FileSystem fs) { -401 this.conf = conf; -402 this.cacheConf = CacheConfig.DISABLED; -403 this.fs = fs; -404 } -405 -406 /** -407 * @param trt A premade TimeRangeTracker to use rather than build one per append (building one -408 * of these is expensive so good to pass one in if you have one). -409 * @return this (for chained invocation) -410 */ -411 public Builder withTimeRangeTracker(final TimeRangeTracker trt) { -412 Preconditions.checkNotNull(trt); -413 this.trt = trt; -414 return this; -415 } -416 -417 /** -418 * Use either this method or {@link #withFilePath}, but not both. -419 * @param dir Path to column family directory. The directory is created if -420 * does not exist. The file is given a unique name within this -421 * directory. -422 * @return this (for chained invocation) -423 */ -424 public Builder withOutputDir(Path dir) { -425 Preconditions.checkNotNull(dir); -426 this.dir = dir; -427 return this; -428 } -429 -430 /** -431 * Use either this method or {@link #withOutputDir}, but not both. -432 * @param filePath the StoreFile path to write -433 * @return this (for chained invocation) -434 */ -435 public Builder withFilePath(Path filePath) { -436 Preconditions.checkNotNull(filePath); -437 this.filePath = filePath; -438 return this; -439 } -440 -441 /** -442 * @param favoredNodes an array of favored nodes or possibly null -443 * @return this (for chained invocation) -444 */ -445 public Builder withFavoredNodes(InetSocketAddress[] favoredNodes) { -446 this.favoredNodes = favoredNodes; -447 return this; -448 } -449 -450 public Builder withComparator(CellComparator comparator) { -451 Preconditions.checkNotNull(comparator); -452 this.comparator = comparator; -453 return this; -454 } -455 -456 public Builder withBloomType(BloomType bloomType) { -457 Preconditions.checkNotNull(bloomType); -458 this.bloomType = bloomType; -459 return this; -460 } -461 -462 /** -463 * @param maxKeyCount estimated maximum number of keys we expect to add -464 * @return this (for chained invocation) -465 */ -466 public Builder withMaxKeyCount(long maxKeyCount) { -467 this.maxKeyCount = maxKeyCount; -468 return this; -469 } -470 -471 public Builder withFileContext(HFileContext fileContext) { -472 this.fileContext = fileContext; -473 return this; -474 } -475 -476 public Builder withShouldDropCacheBehind(boolean shouldDropCacheBehind) { -477 this.shouldDropCacheBehind = shouldDropCacheBehind; -478 return this; -479 } -480 -481 /** -482 * Create a store file writer. Client is responsible for closing file when -483 * done. If metadata, add BEFORE closing using -484 * {@link StoreFileWriter#appendMetadata}. -485 */ -486 public StoreFileWriter build() throws IOException { -487 if ((dir == null ? 0 : 1) + (filePath == null ? 0 : 1) != 1) { -488 throw new IllegalArgumentException("Either specify parent directory " + -489 "or file path"); -490 } -491 -492 if (dir == null) { -493 dir = filePath.getParent(); -494 } -495 -496 if (!fs.exists(dir)) { -497 // Handle permission for non-HDFS filesystem properly -498 // See HBASE-17710 -499 HRegionFileSystem.mkdirs(fs, conf, dir); -500 } -501 -502 // set block storage policy for temp path -503 String policyName = this.conf.get(HColumnDescriptor.STORAGE_POLICY); -504 if (null == policyName) { -505 policyName = this.conf.get(HStore.BLOCK_STORAGE_POLICY_KEY); -506 } -507 FSUtils.setStoragePolicy(this.fs, dir, policyName); -508 -509 if (filePath == null) { -510 filePath = getUniqueFile(fs, dir); -511 if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) { -512 bloomType = BloomType.NONE; -513 } -514 } -515 -516 if (comparator == null) { -517 comparator = CellComparator.COMPARATOR; -518 } -519 return new StoreFileWriter(fs, filePath, -520 conf, cacheConf, comparator, bloomType, maxKeyCount, favoredNodes, fileContext, -521 shouldDropCacheBehind, trt); -522 } -523 } -524} +026import java.util.regex.Pattern; +027 +028import org.apache.commons.logging.Log; +029import org.apache.commons.logging.LogFactory; +030import org.apache.hadoop.conf.Configuration; +031import org.apache.hadoop.fs.FileSystem; +032import org.apache.hadoop.fs.Path; +033import org.apache.hadoop.hbase.Cell; +034import org.apache.hadoop.hbase.CellComparator; +035import org.apache.hadoop.hbase.CellUtil; +036import org.apache.hadoop.hbase.HColumnDescriptor; +037import org.apache.hadoop.hbase.HConstants; +038import org.apache.hadoop.hbase.KeyValue; +039import org.apache.hadoop.hbase.classification.InterfaceAudience; +040import org.apache.hadoop.hbase.io.hfile.CacheConfig; +041import org.apache.hadoop.hbase.io.hfile.HFile; +042import org.apache.hadoop.hbase.io.hfile.HFileContext; +043import org.apache.hadoop.hbase.util.BloomContext; +044import org.apache.hadoop.hbase.util.BloomFilterFactory; +045import org.apache.hadoop.hbase.util.BloomFilterWriter; +046import org.apache.hadoop.hbase.util.Bytes; +047import org.apache.hadoop.hbase.util.FSUtils; +048import org.apache.hadoop.hbase.util.RowBloomContext; +049import org.apache.hadoop.hbase.util.RowColBloomContext; +050import org.apache.hadoop.io.WritableUtils; +051 +052/** +053 * A StoreFile writer. Use this to read/write HBase Store Files. It is package +054 * local because it is an implementation detail of the HBase regionserver. +055 */ +056@InterfaceAudience.Private +057public class StoreFileWriter implements CellSink, ShipperListener { +058 private static final Log LOG = LogFactory.getLog(StoreFileWriter.class.getName()); +059 private static final Pattern dash = Pattern.compile("-"); +060 private final BloomFilterWriter generalBloomFilterWriter; +061 private final BloomFilterWriter deleteFamilyBloomFilterWriter; +062 private final BloomType bloomType; +063 private long earliestPutTs = HConstants.LATEST_TIMESTAMP; +064 private long deleteFamilyCnt = 0; +065 private BloomContext bloomContext = null; +066 private BloomContext deleteFamilyBloomContext = null; +067 +068 /** +069 * timeRangeTrackerSet is used to figure if we were passed a filled-out TimeRangeTracker or not. +070 * When flushing a memstore, we set the TimeRangeTracker that it accumulated during updates to +071 * memstore in here into this Writer and use this variable to indicate that we do not need to +072 * recalculate the timeRangeTracker bounds; it was done already as part of add-to-memstore. +073 * A completed TimeRangeTracker is not set in cases of compactions when it is recalculated. +074 */ +075 private final boolean timeRangeTrackerSet; +076 final TimeRangeTracker timeRangeTracker; +077 +078 protected HFile.Writer writer; +079 +080 /** +081 * Creates an HFile.Writer that also write helpful meta data. +082 * @param fs file system to write to +083 * @param path file name to create +084 * @param conf user configuration +085 * @param comparator key comparator +086 * @param bloomType bloom filter setting +087 * @param maxKeys the expected maximum number of keys to be added. Was used +088 * for Bloom filter size in {@link HFile} format version 1. +089 * @param fileContext - The HFile context +090 * @param shouldDropCacheBehind Drop pages written to page cache after writing the store file. +091 * @throws IOException problem writing to FS +092 */ +093 StoreFileWriter(FileSystem fs, Path path, final Configuration conf, CacheConfig cacheConf, +094 final CellComparator comparator, BloomType bloomType, long maxKeys, +095 InetSocketAddress[] favoredNodes, HFileContext fileContext, boolean shouldDropCacheBehind) +096 throws IOException { +097 this(fs, path, conf, cacheConf, comparator, bloomType, maxKeys, favoredNodes, fileContext, +098 shouldDropCacheBehind, null); +099 } +100 +101 /** +102 * Creates an HFile.Writer that also write helpful meta data. +103 * @param fs file system to write to +104 * @param path file name to create +105 * @param conf user configuration +106 * @param comparator key comparator +107 * @param bloomType bloom filter setting +108 * @param maxKeys the expected maximum number of keys to be added. Was used +109 * for Bloom filter size in {@link HFile} format version 1. +110 * @param favoredNodes +111 * @param fileContext - The HFile context +112 * @param shouldDropCacheBehind Drop pages written to page cache after writing the store file. +113 * @param trt Ready-made timetracker to use. +114 * @throws IOException problem writing to FS +115 */ +116 private StoreFileWriter(FileSystem fs, Path path, +117 final Configuration conf, +118 CacheConfig cacheConf, +119 final CellComparator comparator, BloomType bloomType, long maxKeys, +120 InetSocketAddress[] favoredNodes, HFileContext fileContext, +121 boolean shouldDropCacheBehind, final TimeRangeTracker trt) +122 throws IOException { +123 // If passed a TimeRangeTracker, use it. Set timeRangeTrackerSet so we don't destroy it. +124 // TODO: put the state of the TRT on the TRT; i.e. make a read-only version (TimeRange) when +125 // it no longer writable. +126 this.timeRangeTrackerSet = trt != null; +127 this.timeRangeTracker = this.timeRangeTrackerSet? trt: new TimeRangeTracker(); +128 // TODO : Change all writers to be specifically created for compaction context +129 writer = HFile.getWriterFactory(conf, cacheConf) +130 .withPath(fs, path) +131 .withComparator(comparator) +132 .withFavoredNodes(favoredNodes) +133 .withFileContext(fileContext) +134 .withShouldDropCacheBehind(shouldDropCacheBehind) +135 .create(); +136 +137 generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite( +138 conf, cacheConf, bloomType, +139 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); +140 +141 if (generalBloomFilterWriter != null) { +142 this.bloomType = bloomType; +143 if (LOG.isTraceEnabled()) { +144 LOG.trace("Bloom filter type for " + path + ": " + this.bloomType + ", " + +145 generalBloomFilterWriter.getClass().getSimpleName()); +146 } +147 // init bloom context +148 switch (bloomType) { +149 case ROW: +150 bloomContext = new RowBloomContext(generalBloomFilterWriter, comparator); +151 break; +152 case ROWCOL: +153 bloomContext = new RowColBloomContext(generalBloomFilterWriter, comparator); +154 break; +155 default: +156 throw new IOException( +157 "Invalid Bloom filter type: " + bloomType + " (ROW or ROWCOL expected)"); +158 } +159 } else { +160 // Not using Bloom filters. +161 this.bloomType = BloomType.NONE; +162 } +163 +164 // initialize delete family Bloom filter when there is NO RowCol Bloom +165 // filter +166 if (this.bloomType != BloomType.ROWCOL) { +167 this.deleteFamilyBloomFilterWriter = BloomFilterFactory +168 .createDeleteBloomAtWrite(conf, cacheConf, +169 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); +170 deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter, comparator); +171 } else { +172 deleteFamilyBloomFilterWriter = null; +173 } +174 if (deleteFamilyBloomFilterWriter != null && LOG.isTraceEnabled()) { +175 LOG.trace("Delete Family Bloom filter type for " + path + ": " + +176 deleteFamilyBloomFilterWriter.getClass().getSimpleName()); +177 } +178 } +179 +180 /** +181 * Writes meta data. +182 * Call before {@link #close()} since its written as meta data to this file. +183 * @param maxSequenceId Maximum sequence id. +184 * @param majorCompaction True if this file is product of a major compaction +185 * @throws IOException problem writing to FS +186 */ +187 public void appendMetadata(final long maxSequenceId, final boolean majorCompaction) +188 throws IOException { +189 writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); +190 writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, +191 Bytes.toBytes(majorCompaction)); +192 appendTrackedTimestampsToMetadata(); +193 } +194 +195 /** +196 * Writes meta data. +197 * Call before {@link #close()} since its written as meta data to this file. +198 * @param maxSequenceId Maximum sequence id. +199 * @param majorCompaction True if this file is product of a major compaction +200 * @param mobCellsCount The number of mob cells. +201 * @throws IOException problem writing to FS +202 */ +203 public void appendMetadata(final long maxSequenceId, final boolean majorCompaction, +204 final long mobCellsCount) throws IOException { +205 writer.appendFileInfo(StoreFile.MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId)); +206 writer.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction)); +207 writer.appendFileInfo(StoreFile.MOB_CELLS_COUNT, Bytes.toBytes(mobCellsCount)); +208 appendTrackedTimestampsToMetadata(); +209 } +210 +211 /** +212 * Add TimestampRange and earliest put timestamp to Metadata +213 */ +214 public void appendTrackedTimestampsToMetadata() throws IOException { +215 appendFileInfo(StoreFile.TIMERANGE_KEY, WritableUtils.toByteArray(timeRangeTracker)); +216 appendFileInfo(StoreFile.EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); +217 } +218 +219 /** +220 * Record the earlest Put timestamp. +221 * +222 * If the timeRangeTracker is not set, +223 * update TimeRangeTracker to include the timestamp of this key +224 */ +225 public void trackTimestamps(final Cell cell) { +226 if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { +227 earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); +228 } +229 if (!timeRangeTrackerSet) { +230 timeRangeTracker.includeTimestamp(cell); +231 } +232 } +233 +234 private void appendGeneralBloomfilter(final Cell cell) throws IOException { +235 if (this.generalBloomFilterWriter != null) { +236 /* +237 * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png +238 * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp +239 * +240 * 2 Types of Filtering: +241 * 1. Row = Row +242 * 2. RowCol = Row + Qualifier +243 */ +244 bloomContext.writeBloom(cell); +245 } +246 } +247 +248 private void appendDeleteFamilyBloomFilter(final Cell cell) +249 throws IOException { +250 if (!CellUtil.isDeleteFamily(cell) && !CellUtil.isDeleteFamilyVersion(cell)) { +251 return; +252 } +253 +254 // increase the number of delete family in the store file +255 deleteFamilyCnt++; +256 if (this.deleteFamilyBloomFilterWriter != null) { +257 deleteFamilyBloomContext.writeBloom(cell); +258 } +259 } +260 +261 @Override +262 public void append(final Cell cell) throws IOException { +263 appendGeneralBloomfilter(cell); +264 appendDeleteFamilyBloomFilter(cell); +265 writer.append(cell); +266 trackTimestamps(cell); +267 } +268 +269 @Override +270 public void beforeShipped() throws IOException { +271 // For now these writer will always be of type ShipperListener true. +272 // TODO : Change all writers to be specifically created for compaction context +273 writer.beforeShipped(); +274 if (generalBloomFilterWriter != null) { +275 generalBloomFilterWriter.beforeShipped(); +276 } +277 if (deleteFamilyBloomFilterWriter != null) { +278 deleteFamilyBloomFilterWriter.beforeShipped(); +279 } +280 } +281 +282 public Path getPath() { +283 return this.writer.getPath(); +284 } +285 +286 public boolean hasGeneralBloom() { +287 return this.generalBloomFilterWriter != null; +288 } +289 +290 /** +291 * For unit testing only. +292 * +293 * @return the Bloom filter used by this writer. +294 */ +295 BloomFilterWriter getGeneralBloomWriter() { +296 return generalBloomFilterWriter; +297 } +298 +299 private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException { +300 boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0); +301 if (haveBloom) { +302 bfw.compactBloom(); +303 } +304 return haveBloom; +305 } +306 +307 private boolean closeGeneralBloomFilter() throws IOException { +308 boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter); +309 +310 // add the general Bloom filter writer and append file info +311 if (hasGeneralBloom) { +312 writer.addGeneralBloomFilter(generalBloomFilterWriter); +313 writer.appendFileInfo(StoreFile.BLOOM_FILTER_TYPE_KEY, +314 Bytes.toBytes(bloomType.toString())); +315 bloomContext.addLastBloomKey(writer); +316 } +317 return hasGeneralBloom; +318 } +319 +320 private boolean closeDeleteFamilyBloomFilter() throws IOException { +321 boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter); +322 +323 // add the delete family Bloom filter writer +324 if (hasDeleteFamilyBloom) { +325 writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter); +326 } +327 +328 // append file info about the number of delete family kvs +329 // even if there is no delete family Bloom. +330 writer.appendFileInfo(StoreFile.DELETE_FAMILY_COUNT, +331 Bytes.toBytes(this.deleteFamilyCnt)); +332 +333 return hasDeleteFamilyBloom; +334 } +335 +336 public void close() throws IOException { +337 boolean hasGeneralBloom = this.closeGeneralBloomFilter(); +338 boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter(); +339 +340 writer.close(); +341 +342 // Log final Bloom filter statistics. This needs to be done after close() +343 // because compound Bloom filters might be finalized as part of closing. +344 if (LOG.isTraceEnabled()) { +345 LOG.trace((hasGeneralBloom ? "" : "NO ") + "General Bloom and " + +346 (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily" + " was added to HFile " + +347 getPath()); +348 } +349 +350 } +351 +352 public void appendFileInfo(byte[] key, byte[] value) throws IOException { +353 writer.appendFileInfo(key, value); +354 } +355 +356 /** For use in testing. +357 */ +358 HFile.Writer getHFileWriter() { +359 return writer; +360 } +361 +362 /** +363 * @param fs +364 * @param dir Directory to create file in. +365 * @return random filename inside passed <code>dir</code> +366 */ +367 static Path getUniqueFile(final FileSystem fs, final Path dir) throws IOException { +368 if (!fs.getFileStatus(dir).isDirectory()) { +369 throw new IOException("Expecting " + dir.toString() + " to be a directory"); +370 } +371 return new Path(dir, dash.matcher(UUID.randomUUID().toString()).replaceAll("")); +372 } +373 +374 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="ICAST_INTEGER_MULTIPLY_CAST_TO_LONG", +375 justification="Will not overflow") +376 public static class Builder { +377 private final Configuration conf; +378 private final CacheConfig cacheConf; +379 private final FileSystem fs; +380 +381 private CellComparator comparator = CellComparator.COMPARATOR; +382 private BloomType bloomType = BloomType.NONE; +383 private long maxKeyCount = 0; +384 private Path dir; +385 private Path filePath; +386 private InetSocketAddress[] favoredNodes; +387 private HFileContext fileContext; +388 private TimeRangeTracker trt; +389 private boolean shouldDropCacheBehind; +390 +391 public Builder(Configuration conf, CacheConfig cacheConf, +392 FileSystem fs) { +393 this.conf = conf; +394 this.cacheConf = cacheConf; +395 this.fs = fs; +396 } +397 +398 /** +399 * Creates Builder with cache configuration disabled +400 */ +401 public Builder(Configuration conf, FileSystem fs) { +402 this.conf = conf; +403 this.cacheConf = CacheConfig.DISABLED; +404 this.fs = fs; +405 } +406 +407 /** +408 * @param trt A premade TimeRangeTracker to use rather than build one per append (building one +409 * of these is expensive so good to pass one in if you have one). +410 * @return this (for chained invocation) +411 */ +412 public Builder withTimeRangeTracker(final TimeRangeTracker trt) { +413 Preconditions.checkNotNull(trt); +414 this.trt = trt; +415 return this; +416 } +417 +418 /** +419 * Use either this method or {@link #withFilePath}, but not both. +420 * @param dir Path to column family directory. The directory is created if +421 * does not exist. The file is given a unique name within this +422 * directory. +423 * @return this (for chained invocation) +424 */ +425 public Builder withOutputDir(Path dir) { +426 Preconditions.checkNotNull(dir); +427 this.dir = dir; +428 return this; +