phoenix-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [phoenix] dbwong commented on a change in pull request #482: PHOENIX-4925 Use Segment tree to organize Guide Post Info
Date Sat, 13 Apr 2019 00:15:42 GMT
dbwong commented on a change in pull request #482: PHOENIX-4925 Use Segment tree to organize
Guide Post Info
URL: https://github.com/apache/phoenix/pull/482#discussion_r275093828
 
 

 ##########
 File path: phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java
 ##########
 @@ -17,138 +17,771 @@
  */
 package org.apache.phoenix.schema.stats;
 
-import java.util.Collections;
-import java.util.List;
+import java.util.*;
 
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.phoenix.util.ByteUtil;
-import org.apache.phoenix.util.SizedUtil;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.util.ScanUtil;
+import org.apache.phoenix.util.ScanUtil.BytesComparator;
+import org.apache.phoenix.query.KeyRange;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-import com.google.common.primitives.Longs;
-/**
- *  A class that holds the guidePosts of a region and also allows combining the 
- *  guidePosts of different regions when the GuidePostsInfo is formed for a table.
- */
-public class GuidePostsInfo {
-    public final static GuidePostsInfo NO_GUIDEPOST =
-            new GuidePostsInfo(Collections.<Long> emptyList(),
-                    new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY),
-                    Collections.<Long> emptyList(), 0, 0, Collections.<Long>
emptyList()) {
-                @Override
-                public int getEstimatedSize() {
-                    return 0;
-                }
-            };
-    
-    public final static byte[] EMPTY_GUIDEPOST_KEY = ByteUtil.EMPTY_BYTE_ARRAY;
-    
+class GuidePostTreeNode {
     /**
-     * the total number of guidePosts for the table combining all the guidePosts per region
per cf.
+     * The key range of the guide posts that this node covers
      */
-    private final ImmutableBytesWritable guidePosts;
+    private final KeyRange keyRange;
+
+    /**
+     * The accumulated estimation info of the guide posts that this node covers
+     */
+    private final GuidePostEstimation totalEstimation;
+
+    public GuidePostTreeNode(KeyRange keyRange, GuidePostEstimation totalEstimation) {
+        this.keyRange = keyRange;
+        this.totalEstimation = totalEstimation;
+    }
+
+    public KeyRange getKeyRange() {
+        return this.keyRange;
+    }
+
+    public GuidePostEstimation getTotalEstimation() {
+        return this.totalEstimation;
+    }
+
     /**
-     * Maximum length of a guidePost collected
+     * Merge the two child tree nodes into this node which contains the merged key range
+     * and the "sum" of the estimation info of the child nodes.
+     * @param left
+     * @param right
+     * @return the parent node of the given nodes
      */
-    private final int maxLength;
+    public static GuidePostTreeNode merge(GuidePostTreeNode left, GuidePostTreeNode right)
{
+        KeyRange tempKeyRange = KeyRange.getKeyRange(
+                left.getKeyRange().getLowerRange(), left.getKeyRange().isLowerInclusive(),
+                right.getKeyRange().getUpperRange(), right.getKeyRange().isUpperInclusive());
+
+        GuidePostEstimation tempEstimation = GuidePostEstimation.merge(
+                left.getTotalEstimation(), right.getTotalEstimation());
+
+        return new GuidePostTreeNode(tempKeyRange, tempEstimation);
+    }
+}
+
+final class GuidePostTreeLeaf extends GuidePostTreeNode {
     /**
-     * Number of guidePosts
+     * The index of the guide post chunk in the chunk array.
      */
-    private final int guidePostsCount;
+    private int guidePostChunkIndex;
+
+    public GuidePostTreeLeaf(int guidePostChunkIndex, KeyRange keyRange,
+            GuidePostEstimation totalEstimation) {
+        super(keyRange, totalEstimation);
+
+        this.guidePostChunkIndex = guidePostChunkIndex;
+    }
+
+    public int getGuidePostChunkIndex() {
+        return this.guidePostChunkIndex;
+    }
+}
+
+@VisibleForTesting
+final class DecodedGuidePostChunk {
     /**
-     * The rowCounts of each guidePost traversed
+     * The index of the guide post chunk in the chunk array.
      */
-    private final long[] rowCounts;
+    private final int guidePostChunkIndex;
+
     /**
-     * The bytecounts of each guidePost traversed
+     * The guide posts in this chunk.
      */
-    private final long[] byteCounts;
+    private final List<byte[]> guidePosts;
+
+    public DecodedGuidePostChunk(int guidePostChunkIndex, List<byte[]> guidePosts)
{
+        assert (guidePostChunkIndex != GuidePostChunk.INVALID_GUIDEPOST_CHUNK_INDEX);
+        assert (guidePosts.size() > 0);
+        this.guidePostChunkIndex = guidePostChunkIndex;
+        this.guidePosts = guidePosts;
+    }
+
+    public int getGuidePostChunkIndex() {
+        return guidePostChunkIndex;
+    }
+
+    public List<byte[]> getGuidePosts() {
+        return guidePosts;
+    }
+
     /**
-     * Estimate of byte size of this instance
+     * The guide post boundaries:
+     *     gp_0, gp_1, ..., gp_i0, ..., gp_i1, ..., gp_i2, ..., gp_in, ..., gp_n
+     * The guide post boundaries:
+     *     gp_i0, gp_i1, ..., gp_in, ..., gp_n
+     * The key space split by the guide post chunks:
+     *     (UNBOUND, gp_i0](gp_i0, gp_i1](gp_i1, gp_i2]...(gp_in, gp_n](gp_n, UNBOUND)
+     * The last guide post chunk is a DUMMY chunk which contains one DUMMY guide post
+     * KeyRang.UNBOUND with 0 estimated rows/bytes and Long.MAX last update timestamp.
+     * @param key
+     * @param isInclusive
+     * @param isLowerBound
+     * @return the zero-based local index of the guide post with the given key
+     */
+    public int locateGuidePost(byte[] key, boolean isInclusive, boolean isLowerBound) {
+        if (guidePosts.get(0) == KeyRange.UNBOUND) {
+            return 0;
+        }
+
+        // case #    isInclusive     isLowerBound    Found exact match (index >= 0)? 
  index returned
+        //   1           Y               Y                      Y                       
   index
+        //   2           Y               N                      Y                       
   index
+        //   3           N               Y                      Y                       
(index + 1)
+        //     comment for case 3 above:
+        //         return the index + 1, as the exclusive key will be contained in the next
guide post,
+        //         since we're matching on the end key of the guide post.
+        //   4           N               N                      Y                       
   index
+        //   5           Y               Y                      N                       -(index
+ 1)
+        //   6           Y               N                      N                       -(index
+ 1)
+        //   7           N               Y                      N                       -(index
+ 1)
+        //   8           N               N                      N                       -(index
+ 1)
+        int index = Collections.binarySearch(guidePosts, key, Bytes.BYTES_COMPARATOR);
+        if (index < 0) {
+            index = -(index + 1);
+        } else if (isLowerBound && !isInclusive) {
+            index += 1; // case 3 above
+        }
+
+        return index < guidePosts.size() ? index : GuidePost.INVALID_GUIDEPOST_INDEX;
+    }
+}
+
+/**
+ *  A class that holds the guide posts of a tenant or a table. It also allows combining
+ *  the guide posts of different tenants when the GuidePostsInfo is formed for a table.
+ */
+public class GuidePostsInfo {
+    private static class InnerRangeQueryResult {
+        /**
+         * The accumulated estimation info within the search range
+         */
+        private final GuidePostEstimation totalEstimation;
+
+        /**
+         * The iterator for the guide posts within the search range
+         */
+        private final GuidePostIterator guidePostIterator;
+
+        public InnerRangeQueryResult(GuidePostEstimation totalEstimation, GuidePostIterator
guidePostIterator) {
+            this.totalEstimation = totalEstimation;
+            this.guidePostIterator = guidePostIterator;
+        }
+
+        public GuidePostEstimation getTotalEstimation() {
+            return totalEstimation;
+        }
+
+        public GuidePostIterator getGuidePostIterator() {
+            return guidePostIterator;
+        }
+    }
+
+    private static class InnerPointLookupResult {
+        /**
+         * The node's index in the tree which is in the representation of an array
+         */
+        private final int treeNodeIndex;
+
+        /**
+         * The index of the guidepost chunk containing the point
+         */
+        private final int guidePostChunkIndex;
+
+        /**
+         * The local index (Zero-based) of the guidepost containing the point
+         */
+        private final int guidePostLocalIndex;
+
+        public InnerPointLookupResult(int treeNodeIndex, int guidePostChunkIndex, int guidePostLocalIndex)
{
+            this.treeNodeIndex = treeNodeIndex;
+            this.guidePostChunkIndex = guidePostChunkIndex;
+            this.guidePostLocalIndex = guidePostLocalIndex;
+        }
+
+        public int getTreeNodeIndex() {
+            return treeNodeIndex;
+        }
+
+        public int getGuidePostChunkIndex() {
+            return guidePostChunkIndex;
+        }
+
+        public int getGuidePostLocalIndex() {
+            return guidePostLocalIndex;
+        }
+    }
+
+    private static class DecodedGuidePostChunkCache {
+        /**
+         * The guide post chunks. The guide posts held by this tree are split into chunks
+         */
+        private final List<GuidePostChunk> guidePostChunks;
+
+        private final LinkedList<DecodedGuidePostChunk> decodedGuidePostChunks;
+
+        public DecodedGuidePostChunkCache(List<GuidePostChunk> guidePostChunks) {
+            this.guidePostChunks = guidePostChunks;
+            decodedGuidePostChunks = new LinkedList<>();
+        }
+
+        public DecodedGuidePostChunk get(int searchChunkIndex, boolean removePreviousEntries)
{
+            int listIndex = 0;
+            while (listIndex < decodedGuidePostChunks.size()) {
+                int currentChunkIndex = decodedGuidePostChunks.get(listIndex).getGuidePostChunkIndex();
+                if (currentChunkIndex < searchChunkIndex) {
+                    if (removePreviousEntries) {
+                        decodedGuidePostChunks.remove(listIndex);
+                    } else {
+                        listIndex++;
+                    }
+                } else if (currentChunkIndex == searchChunkIndex) {
+                    return decodedGuidePostChunks.get(listIndex);
+                } else {
+                    break;
+                }
+            }
+
+            DecodedGuidePostChunk decodedChunk = guidePostChunks.get(searchChunkIndex).decode();
+            decodedGuidePostChunks.add(listIndex, decodedChunk);
+            return decodedChunk;
+        }
+    }
+
+    private static class GuidePostIterator implements Iterator<GuidePost> {
+        /**
+         * The guide post chunks. The guide posts held by this tree are split into chunks
+         */
+        private final List<GuidePostChunk> guidePostChunks;
+
+        /**
+         * The index of the first guide post in the search range, which is the local index
+         * (Zero-based) in the first returned chunk.
+         */
+        private final int indexOfFirstGuidePost;
+
+        /**
+         * The index of the last guide post in the search range, which is the local index
+         * (Zero-based) in the last returned chunk.
+         */
+        private final int indexOfLastGuidePost;
+
+        /**
+         * The index of the first returned chunks
+         */
+        private final int indexOfFirstChunk;
+
+        /**
+         * The index of the last returned chunks
+         */
+        private final int indexOfLastChunk;
+
+        /**
+         * The cache for the decoded guide post chunks
+         */
+        private final DecodedGuidePostChunkCache decodedGuidePostChunkCache;
+
+        private int indexOfCurrentGuidePost;
+
+        private int indexOfCurrentGuidePostChunk;
+
+        private DecodedGuidePostChunk decodedGuidePostChunk;
+
+        public GuidePostIterator(List<GuidePostChunk> guidePostChunks,
+                int indexOfFirstChunk, int indexOfFirstGuidePost,
+                int indexOfLastChunk, int indexOfLastGuidePost,
+                DecodedGuidePostChunkCache decodedGuidePostChunkCache) {
+            this.guidePostChunks = guidePostChunks;
+            this.indexOfFirstChunk = indexOfFirstChunk;
+            this.indexOfFirstGuidePost = indexOfFirstGuidePost;
+            this.indexOfLastChunk = indexOfLastChunk;
+            this.indexOfLastGuidePost = indexOfLastGuidePost;
+            this.decodedGuidePostChunkCache = decodedGuidePostChunkCache;
+
+            this.indexOfCurrentGuidePostChunk = this.indexOfFirstChunk;
+            this.indexOfCurrentGuidePost = this.indexOfFirstGuidePost;
+            this.decodedGuidePostChunk = decodedGuidePostChunkCache.get(indexOfCurrentGuidePostChunk,
true);
+        }
+
+        private GuidePost getGuidePost() {
+            byte[] key = decodedGuidePostChunk.getGuidePosts().get(indexOfCurrentGuidePost);
+            GuidePostEstimation estimation = guidePostChunks.get(
+                    indexOfCurrentGuidePostChunk).getEstimations().get(indexOfCurrentGuidePost);
+            return new GuidePost(key, estimation);
+        }
+
+        @Override
+        public boolean hasNext() {
+            return indexOfCurrentGuidePostChunk < indexOfLastChunk || indexOfCurrentGuidePost
<= indexOfLastGuidePost;
+        }
+
+        @Override
+        public GuidePost next() {
+            GuidePost guidePost = getGuidePost();
+
+            indexOfCurrentGuidePost++;
+            if (indexOfCurrentGuidePost == guidePostChunks.get(indexOfCurrentGuidePostChunk).getGuidePostsCount()
+                    && indexOfCurrentGuidePostChunk < indexOfLastChunk) {
+                indexOfCurrentGuidePostChunk++;
+                indexOfCurrentGuidePost = 0;
+                decodedGuidePostChunk = decodedGuidePostChunkCache.get(indexOfCurrentGuidePostChunk,
true);
+            }
+
+            return guidePost;
+        }
+
+        @Override
+        public void remove() {
+            assert false; // Should never be called.
+        }
+
+        public GuidePost last() {
+            if (indexOfCurrentGuidePostChunk != indexOfLastChunk) {
+                indexOfCurrentGuidePostChunk = indexOfLastChunk;
+                decodedGuidePostChunk = decodedGuidePostChunkCache.get(indexOfLastChunk,
true);
+            }
+
+            indexOfCurrentGuidePost = indexOfLastGuidePost;
+
+            return getGuidePost();
+        }
+    }
+
+    public final static GuidePostsInfo NO_GUIDEPOST =
+            new GuidePostsInfo(0, Collections.<GuidePostChunk> emptyList());
+
+    private static BytesComparator comparator;
+
+    /**
+     * The guide post chunks. The guide posts held by this tree are split into chunks
+     */
+    private final List<GuidePostChunk> guidePostChunks;
+
+    /**
+     * The guide posts info is organized in a tree. The tree is in the representation of
array
+     */
+    private final GuidePostTreeNode[] nodes;
+
+    /**
+     * The size of the tree
+     */
+    private final int treeSize;
+
+    /**
+     * Estimation of byte size of this instance contributes to cache
      */
     private final int estimatedSize;
+
     /**
-     * The timestamps at which guideposts were created/updated
+     * The total count of guide posts collected
      */
-    private final long[] gpTimestamps;
+    private final int guidePostsCount;
 
     /**
-     * Constructor that creates GuidePostsInfo per region
-     * 
-     * @param byteCounts
-     *            The bytecounts of each guidePost traversed
-     * @param guidePosts
-     *            Prefix byte encoded guidePosts
-     * @param rowCounts
-     *            The rowCounts of each guidePost traversed
-     * @param maxLength
-     *            Maximum length of a guidePost collected
-     * @param guidePostsCount
-     *            Number of guidePosts
-     * @param gpTimestamps
-     *            Times at which guidePosts were updated/created
+     * Construct the Variant Segment Tree (https://salesforce.quip.com/taWiALFmhquO)
+     * from the given guide post chunks
+     * @param guidePostChunks -- the guide post chunks from which the tree is built
      */
-    public GuidePostsInfo(List<Long> byteCounts, ImmutableBytesWritable guidePosts,
List<Long> rowCounts, int maxLength,
-            int guidePostsCount, List<Long> updateTimes) {
-        this.guidePosts = new ImmutableBytesWritable(guidePosts);
-        this.maxLength = maxLength;
+    public GuidePostsInfo(int guidePostsCount, List<GuidePostChunk> guidePostChunks)
{
+        this.guidePostChunks = guidePostChunks;
         this.guidePostsCount = guidePostsCount;
-        this.rowCounts = Longs.toArray(rowCounts);
-        this.byteCounts = Longs.toArray(byteCounts);
-        this.gpTimestamps = Longs.toArray(updateTimes);
-        // Those Java equivalents of sizeof() in C/C++, mentioned on the Web, might be overkilled
here.
-        int estimatedSize = SizedUtil.OBJECT_SIZE
-                + SizedUtil.IMMUTABLE_BYTES_WRITABLE_SIZE + guidePosts.getLength() // guidePosts
-                + SizedUtil.INT_SIZE // maxLength
-                + SizedUtil.INT_SIZE // guidePostsCount
-                + SizedUtil.ARRAY_SIZE + this.rowCounts.length * SizedUtil.LONG_SIZE // rowCounts
-                + SizedUtil.ARRAY_SIZE + this.byteCounts.length * SizedUtil.LONG_SIZE //
byteCounts
-                + SizedUtil.ARRAY_SIZE + this.gpTimestamps.length * SizedUtil.LONG_SIZE //
gpTimestamps
-                + SizedUtil.INT_SIZE; // estimatedSize
-        this.estimatedSize = estimatedSize;
-    }
-    
-    public ImmutableBytesWritable getGuidePosts() {
-        return guidePosts;
+        this.comparator = ScanUtil.getComparator(true, SortOrder.ASC);
+
+        if (this.guidePostChunks != null && this.guidePostChunks.size() > 0) {
+            int n = this.guidePostChunks.size();
+            int height = (int) (Math.ceil(Math.log(n) / Math.log(2))); // The height of this
tree
+            this.treeSize = 2 * (int) Math.pow(2, height) - 1;
+            this.nodes = new GuidePostTreeNode[this.treeSize];
+
+            // We calculate how much this instance contributes to cache approximately,
+            // so we only count the size of guide post chunk array.
+            // The contribution to cache from other fields are ignorable.
+            int byteCount = 0;
+            for (GuidePostChunk chunk : this.guidePostChunks) {
+                byteCount += chunk.getEstimatedSize();
+            }
+            this.estimatedSize = byteCount;
+
+            Construct(this.guidePostChunks, 0, n - 1, 0);
+        } else {
+            this.treeSize = 0;
+            this.nodes = null;
 
 Review comment:
   dislike the use of null for presence consider Optional.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message