From issues-return-6009-archive-asf-public=cust-asf.ponee.io@phoenix.apache.org Sat Apr 13 00:15:45 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id B9CB31807CC for ; Sat, 13 Apr 2019 02:15:43 +0200 (CEST) Received: (qmail 48924 invoked by uid 500); 13 Apr 2019 00:15:42 -0000 Mailing-List: contact issues-help@phoenix.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@phoenix.apache.org Delivered-To: mailing list issues@phoenix.apache.org Received: (qmail 48595 invoked by uid 99); 13 Apr 2019 00:15:42 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 13 Apr 2019 00:15:42 +0000 From: GitBox To: issues@phoenix.apache.org Subject: [GitHub] [phoenix] dbwong commented on a change in pull request #482: PHOENIX-4925 Use Segment tree to organize Guide Post Info Message-ID: <155511454229.8503.8368775859762436821.gitbox@gitbox.apache.org> Date: Sat, 13 Apr 2019 00:15:42 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit dbwong commented on a change in pull request #482: PHOENIX-4925 Use Segment tree to organize Guide Post Info URL: https://github.com/apache/phoenix/pull/482#discussion_r275092992 ########## File path: phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java ########## @@ -17,138 +17,771 @@ */ package org.apache.phoenix.schema.stats; -import java.util.Collections; -import java.util.List; +import java.util.*; -import org.apache.hadoop.hbase.io.ImmutableBytesWritable; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.phoenix.util.ByteUtil; -import org.apache.phoenix.util.SizedUtil; +import org.apache.hadoop.hbase.util.Pair; +import org.apache.phoenix.schema.SortOrder; +import org.apache.phoenix.util.ScanUtil; +import org.apache.phoenix.util.ScanUtil.BytesComparator; +import org.apache.phoenix.query.KeyRange; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.google.common.primitives.Longs; -/** - * A class that holds the guidePosts of a region and also allows combining the - * guidePosts of different regions when the GuidePostsInfo is formed for a table. - */ -public class GuidePostsInfo { - public final static GuidePostsInfo NO_GUIDEPOST = - new GuidePostsInfo(Collections. emptyList(), - new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY), - Collections. emptyList(), 0, 0, Collections. emptyList()) { - @Override - public int getEstimatedSize() { - return 0; - } - }; - - public final static byte[] EMPTY_GUIDEPOST_KEY = ByteUtil.EMPTY_BYTE_ARRAY; - +class GuidePostTreeNode { /** - * the total number of guidePosts for the table combining all the guidePosts per region per cf. + * The key range of the guide posts that this node covers */ - private final ImmutableBytesWritable guidePosts; + private final KeyRange keyRange; + + /** + * The accumulated estimation info of the guide posts that this node covers + */ + private final GuidePostEstimation totalEstimation; + + public GuidePostTreeNode(KeyRange keyRange, GuidePostEstimation totalEstimation) { + this.keyRange = keyRange; + this.totalEstimation = totalEstimation; + } + + public KeyRange getKeyRange() { + return this.keyRange; + } + + public GuidePostEstimation getTotalEstimation() { + return this.totalEstimation; + } + /** - * Maximum length of a guidePost collected + * Merge the two child tree nodes into this node which contains the merged key range + * and the "sum" of the estimation info of the child nodes. + * @param left + * @param right + * @return the parent node of the given nodes */ - private final int maxLength; + public static GuidePostTreeNode merge(GuidePostTreeNode left, GuidePostTreeNode right) { + KeyRange tempKeyRange = KeyRange.getKeyRange( + left.getKeyRange().getLowerRange(), left.getKeyRange().isLowerInclusive(), + right.getKeyRange().getUpperRange(), right.getKeyRange().isUpperInclusive()); + + GuidePostEstimation tempEstimation = GuidePostEstimation.merge( + left.getTotalEstimation(), right.getTotalEstimation()); + + return new GuidePostTreeNode(tempKeyRange, tempEstimation); + } +} + +final class GuidePostTreeLeaf extends GuidePostTreeNode { /** - * Number of guidePosts + * The index of the guide post chunk in the chunk array. */ - private final int guidePostsCount; + private int guidePostChunkIndex; + + public GuidePostTreeLeaf(int guidePostChunkIndex, KeyRange keyRange, + GuidePostEstimation totalEstimation) { + super(keyRange, totalEstimation); + + this.guidePostChunkIndex = guidePostChunkIndex; + } + + public int getGuidePostChunkIndex() { + return this.guidePostChunkIndex; + } +} + +@VisibleForTesting +final class DecodedGuidePostChunk { /** - * The rowCounts of each guidePost traversed + * The index of the guide post chunk in the chunk array. */ - private final long[] rowCounts; + private final int guidePostChunkIndex; + /** - * The bytecounts of each guidePost traversed + * The guide posts in this chunk. */ - private final long[] byteCounts; + private final List guidePosts; + + public DecodedGuidePostChunk(int guidePostChunkIndex, List guidePosts) { + assert (guidePostChunkIndex != GuidePostChunk.INVALID_GUIDEPOST_CHUNK_INDEX); + assert (guidePosts.size() > 0); + this.guidePostChunkIndex = guidePostChunkIndex; + this.guidePosts = guidePosts; + } + + public int getGuidePostChunkIndex() { + return guidePostChunkIndex; + } + + public List getGuidePosts() { + return guidePosts; + } + /** - * Estimate of byte size of this instance + * The guide post boundaries: + * gp_0, gp_1, ..., gp_i0, ..., gp_i1, ..., gp_i2, ..., gp_in, ..., gp_n + * The guide post boundaries: + * gp_i0, gp_i1, ..., gp_in, ..., gp_n + * The key space split by the guide post chunks: + * (UNBOUND, gp_i0](gp_i0, gp_i1](gp_i1, gp_i2]...(gp_in, gp_n](gp_n, UNBOUND) + * The last guide post chunk is a DUMMY chunk which contains one DUMMY guide post + * KeyRang.UNBOUND with 0 estimated rows/bytes and Long.MAX last update timestamp. + * @param key + * @param isInclusive + * @param isLowerBound + * @return the zero-based local index of the guide post with the given key + */ + public int locateGuidePost(byte[] key, boolean isInclusive, boolean isLowerBound) { + if (guidePosts.get(0) == KeyRange.UNBOUND) { + return 0; + } + + // case # isInclusive isLowerBound Found exact match (index >= 0)? index returned + // 1 Y Y Y index + // 2 Y N Y index + // 3 N Y Y (index + 1) + // comment for case 3 above: + // return the index + 1, as the exclusive key will be contained in the next guide post, + // since we're matching on the end key of the guide post. + // 4 N N Y index + // 5 Y Y N -(index + 1) + // 6 Y N N -(index + 1) + // 7 N Y N -(index + 1) + // 8 N N N -(index + 1) + int index = Collections.binarySearch(guidePosts, key, Bytes.BYTES_COMPARATOR); + if (index < 0) { + index = -(index + 1); + } else if (isLowerBound && !isInclusive) { + index += 1; // case 3 above + } + + return index < guidePosts.size() ? index : GuidePost.INVALID_GUIDEPOST_INDEX; + } +} + +/** + * A class that holds the guide posts of a tenant or a table. It also allows combining + * the guide posts of different tenants when the GuidePostsInfo is formed for a table. + */ +public class GuidePostsInfo { + private static class InnerRangeQueryResult { + /** + * The accumulated estimation info within the search range + */ + private final GuidePostEstimation totalEstimation; + + /** + * The iterator for the guide posts within the search range + */ + private final GuidePostIterator guidePostIterator; + + public InnerRangeQueryResult(GuidePostEstimation totalEstimation, GuidePostIterator guidePostIterator) { + this.totalEstimation = totalEstimation; + this.guidePostIterator = guidePostIterator; + } + + public GuidePostEstimation getTotalEstimation() { + return totalEstimation; + } + + public GuidePostIterator getGuidePostIterator() { + return guidePostIterator; + } + } + + private static class InnerPointLookupResult { + /** + * The node's index in the tree which is in the representation of an array + */ + private final int treeNodeIndex; + + /** + * The index of the guidepost chunk containing the point + */ + private final int guidePostChunkIndex; + + /** + * The local index (Zero-based) of the guidepost containing the point + */ + private final int guidePostLocalIndex; + + public InnerPointLookupResult(int treeNodeIndex, int guidePostChunkIndex, int guidePostLocalIndex) { + this.treeNodeIndex = treeNodeIndex; + this.guidePostChunkIndex = guidePostChunkIndex; + this.guidePostLocalIndex = guidePostLocalIndex; + } + + public int getTreeNodeIndex() { + return treeNodeIndex; + } + + public int getGuidePostChunkIndex() { + return guidePostChunkIndex; + } + + public int getGuidePostLocalIndex() { + return guidePostLocalIndex; + } + } + + private static class DecodedGuidePostChunkCache { + /** + * The guide post chunks. The guide posts held by this tree are split into chunks + */ + private final List guidePostChunks; + + private final LinkedList decodedGuidePostChunks; + + public DecodedGuidePostChunkCache(List guidePostChunks) { + this.guidePostChunks = guidePostChunks; + decodedGuidePostChunks = new LinkedList<>(); + } + + public DecodedGuidePostChunk get(int searchChunkIndex, boolean removePreviousEntries) { + int listIndex = 0; + while (listIndex < decodedGuidePostChunks.size()) { + int currentChunkIndex = decodedGuidePostChunks.get(listIndex).getGuidePostChunkIndex(); + if (currentChunkIndex < searchChunkIndex) { + if (removePreviousEntries) { + decodedGuidePostChunks.remove(listIndex); + } else { + listIndex++; + } + } else if (currentChunkIndex == searchChunkIndex) { + return decodedGuidePostChunks.get(listIndex); + } else { + break; + } + } + + DecodedGuidePostChunk decodedChunk = guidePostChunks.get(searchChunkIndex).decode(); + decodedGuidePostChunks.add(listIndex, decodedChunk); + return decodedChunk; + } + } + + private static class GuidePostIterator implements Iterator { + /** + * The guide post chunks. The guide posts held by this tree are split into chunks + */ + private final List guidePostChunks; + + /** + * The index of the first guide post in the search range, which is the local index + * (Zero-based) in the first returned chunk. + */ + private final int indexOfFirstGuidePost; + + /** + * The index of the last guide post in the search range, which is the local index + * (Zero-based) in the last returned chunk. + */ + private final int indexOfLastGuidePost; + + /** + * The index of the first returned chunks + */ + private final int indexOfFirstChunk; + + /** + * The index of the last returned chunks + */ + private final int indexOfLastChunk; + + /** + * The cache for the decoded guide post chunks + */ + private final DecodedGuidePostChunkCache decodedGuidePostChunkCache; + + private int indexOfCurrentGuidePost; + + private int indexOfCurrentGuidePostChunk; + + private DecodedGuidePostChunk decodedGuidePostChunk; + + public GuidePostIterator(List guidePostChunks, + int indexOfFirstChunk, int indexOfFirstGuidePost, + int indexOfLastChunk, int indexOfLastGuidePost, + DecodedGuidePostChunkCache decodedGuidePostChunkCache) { + this.guidePostChunks = guidePostChunks; + this.indexOfFirstChunk = indexOfFirstChunk; + this.indexOfFirstGuidePost = indexOfFirstGuidePost; + this.indexOfLastChunk = indexOfLastChunk; + this.indexOfLastGuidePost = indexOfLastGuidePost; + this.decodedGuidePostChunkCache = decodedGuidePostChunkCache; + + this.indexOfCurrentGuidePostChunk = this.indexOfFirstChunk; + this.indexOfCurrentGuidePost = this.indexOfFirstGuidePost; + this.decodedGuidePostChunk = decodedGuidePostChunkCache.get(indexOfCurrentGuidePostChunk, true); + } + + private GuidePost getGuidePost() { + byte[] key = decodedGuidePostChunk.getGuidePosts().get(indexOfCurrentGuidePost); + GuidePostEstimation estimation = guidePostChunks.get( + indexOfCurrentGuidePostChunk).getEstimations().get(indexOfCurrentGuidePost); + return new GuidePost(key, estimation); + } + + @Override + public boolean hasNext() { + return indexOfCurrentGuidePostChunk < indexOfLastChunk || indexOfCurrentGuidePost <= indexOfLastGuidePost; + } + + @Override + public GuidePost next() { + GuidePost guidePost = getGuidePost(); + + indexOfCurrentGuidePost++; + if (indexOfCurrentGuidePost == guidePostChunks.get(indexOfCurrentGuidePostChunk).getGuidePostsCount() + && indexOfCurrentGuidePostChunk < indexOfLastChunk) { + indexOfCurrentGuidePostChunk++; + indexOfCurrentGuidePost = 0; + decodedGuidePostChunk = decodedGuidePostChunkCache.get(indexOfCurrentGuidePostChunk, true); + } + + return guidePost; + } + + @Override + public void remove() { + assert false; // Should never be called. + } + + public GuidePost last() { + if (indexOfCurrentGuidePostChunk != indexOfLastChunk) { + indexOfCurrentGuidePostChunk = indexOfLastChunk; + decodedGuidePostChunk = decodedGuidePostChunkCache.get(indexOfLastChunk, true); + } + + indexOfCurrentGuidePost = indexOfLastGuidePost; + + return getGuidePost(); + } + } + + public final static GuidePostsInfo NO_GUIDEPOST = + new GuidePostsInfo(0, Collections. emptyList()); + + private static BytesComparator comparator; + + /** + * The guide post chunks. The guide posts held by this tree are split into chunks + */ + private final List guidePostChunks; + + /** + * The guide posts info is organized in a tree. The tree is in the representation of array + */ + private final GuidePostTreeNode[] nodes; + + /** + * The size of the tree + */ + private final int treeSize; + + /** + * Estimation of byte size of this instance contributes to cache */ private final int estimatedSize; + /** - * The timestamps at which guideposts were created/updated + * The total count of guide posts collected */ - private final long[] gpTimestamps; + private final int guidePostsCount; /** - * Constructor that creates GuidePostsInfo per region - * - * @param byteCounts - * The bytecounts of each guidePost traversed - * @param guidePosts - * Prefix byte encoded guidePosts - * @param rowCounts - * The rowCounts of each guidePost traversed - * @param maxLength - * Maximum length of a guidePost collected - * @param guidePostsCount - * Number of guidePosts - * @param gpTimestamps - * Times at which guidePosts were updated/created + * Construct the Variant Segment Tree (https://salesforce.quip.com/taWiALFmhquO) Review comment: May want to update to an apache available link. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services