From commits-return-7601-archive-asf-public=cust-asf.ponee.io@pulsar.incubator.apache.org Wed May 2 15:17:27 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 08C3A18065D for ; Wed, 2 May 2018 15:17:26 +0200 (CEST) Received: (qmail 36063 invoked by uid 500); 2 May 2018 13:17:26 -0000 Mailing-List: contact commits-help@pulsar.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pulsar.incubator.apache.org Delivered-To: mailing list commits@pulsar.incubator.apache.org Received: (qmail 36052 invoked by uid 99); 2 May 2018 13:17:26 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 02 May 2018 13:17:26 +0000 From: GitBox To: commits@pulsar.apache.org Subject: [GitHub] zhaijack commented on a change in pull request #1678: PIP-17: provide BlockAwareSegmentInputStream implementation and test Message-ID: <152526704561.27114.1714428660201228078.gitbox@gitbox.apache.org> Date: Wed, 02 May 2018 13:17:25 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit zhaijack commented on a change in pull request #1678: PIP-17: provide BlockAwareSegmentInputStream implementation and test URL: https://github.com/apache/incubator-pulsar/pull/1678#discussion_r185491684 ########## File path: pulsar-broker/src/main/java/org/apache/pulsar/broker/s3offload/impl/BlockAwareSegmentInputStream.java ########## @@ -0,0 +1,219 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pulsar.broker.s3offload.impl; + +import static com.google.common.base.Preconditions.checkState; + +import com.google.common.collect.Lists; +import com.google.common.primitives.Ints; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.PooledByteBufAllocator; +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.ExecutionException; +import org.apache.bookkeeper.client.api.LedgerEntries; +import org.apache.bookkeeper.client.api.LedgerEntry; +import org.apache.bookkeeper.client.api.ReadHandle; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * + * The BlockAwareSegmentInputStream for each cold storage data block. + * It contains a byte buffer, which contains all the content for this data block. + * DataBlockHeader + entries(each with format[[entry_size -- int][entry_id -- long][entry_data]]) + * + */ +public class BlockAwareSegmentInputStream extends InputStream { + private static final Logger log = LoggerFactory.getLogger(BlockAwareSegmentInputStream.class); + + private static final byte[] blockEndPadding = Ints.toByteArray(0xFEDCDEAD); + + private final ReadHandle ledger; + private final long startEntryId; + private final int blockSize; + + // Number of Message entries that read from ledger and been readout from this InputStream. + private int blockEntryCount; + // Number of payload Bytes read from ledger, and has been has been kept in this InputStream. + private int payloadBytesHave; + // Number of bytes that has been kept in this InputStream. + private int blockBytesHave; + + // tracking read status for both header and entries. + // Bytes that already been read from this InputStream + private int bytesReadOffset = 0; + // Byte from this index is all padding byte + private int dataBlockFullOffset; + private final InputStream dataBlockHeaderStream; + + // how many entries want to read from ReadHandle each time. + private static final int entriesNumberEachRead = 100; + // buf the entry size and entry id. + private static final int entryHeaderSize = 4 /* entry size*/ + 8 /* entry id */; + // Keep a list of all entries ByteBuf, each element contains 2 buf: entry header and entry content. + private List entriesByteBuf = null; + + public BlockAwareSegmentInputStream(ReadHandle ledger, long startEntryId, int blockSize) { + this.ledger = ledger; + this.startEntryId = startEntryId; + this.blockSize = blockSize; + this.dataBlockHeaderStream = DataBlockHeaderImpl.of(blockSize, startEntryId).toStream(); + this.blockBytesHave = DataBlockHeaderImpl.getDataStartOffset(); + this.payloadBytesHave = 0; + this.blockEntryCount = 0; + this.dataBlockFullOffset = blockSize; + this.entriesByteBuf = Lists.newLinkedList(); + } + + // read ledger entries. + private int readEntries() throws IOException { + checkState(bytesReadOffset >= DataBlockHeaderImpl.getDataStartOffset()); + checkState(bytesReadOffset < dataBlockFullOffset); + + try { + // once reach the end of entry buffer, start a new read. + if (entriesByteBuf.isEmpty()) { + readNextEntriesFromLedger(); + log.debug("After readNextEntriesFromLedger: bytesReadOffset: {}, blockBytesHave: {}", + bytesReadOffset, blockBytesHave); + } + + // always read from the first ByteBuf in the list, once read all of its content remove it. + ByteBuf entryByteBuf = entriesByteBuf.get(0); + int ret = entryByteBuf.readByte(); + bytesReadOffset ++; + + if (entryByteBuf.readableBytes() == 0) { + entryByteBuf.release(); + entriesByteBuf.remove(0); + } + + return ret; + } catch (InterruptedException | ExecutionException e) { + log.error("Exception when get CompletableFuture. ", e); + throw new IOException(e); + } + } + + // read entries from ledger, and pre-handle the returned ledgerEntries. + private void readNextEntriesFromLedger() throws InterruptedException, ExecutionException { Review comment: Thanks, changed it. First thought is to make readEntries handling easier, since it is called for each byte read; seems not too much compute added after change. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services