asterixdb-notifications mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Luo Chen (Code Review)" <do-not-re...@asterixdb.incubator.apache.org>
Subject Change in asterixdb[master]: [ASTERIXDB-2133] Remove unncessary binary search in GroupFra...
Date Tue, 17 Oct 2017 23:10:18 GMT
Luo Chen has uploaded a new change for review.

  https://asterix-gerrit.ics.uci.edu/2079

Change subject: [ASTERIXDB-2133] Remove unncessary binary search in GroupFrameAccessor
......................................................................

[ASTERIXDB-2133] Remove unncessary binary search in GroupFrameAccessor

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- GroupFrameAccessor holds a list of frames from a run during the merge
step of merge sort. However, everytime we access a tuple, it performs
binary search to get the physical tuple index. This patch fixes this
by remembering the last accessed frame. It is expected that tuples
are accessed sequentially (since it's the merge step), which greatly
reduces binary searches

Change-Id: I4a1b19ad47f6b1dda4bd5c417932e4c9ba36a714
---
M hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
1 file changed, 20 insertions(+), 11 deletions(-)


  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/79/2079/1

diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
index cac50e7..afee464 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
@@ -57,10 +57,11 @@
     private final RecordDescriptor recordDescriptor;
     private final int minFrameSize;
     private final FrameTupleAccessor frameTupleAccessor;
-    private int lastTupleIndex;
     private int lastFrameId;
+    private int lastFrameStart;
+    private int lastFrameEnd;
     private ByteBuffer buffer;
-    private List<InnerFrameInfo> innerFrameInfos;
+    private final List<InnerFrameInfo> innerFrameInfos;
 
     public GroupFrameAccessor(int minFrameSize, RecordDescriptor recordDescriptor) {
         this.minFrameSize = minFrameSize;
@@ -127,8 +128,9 @@
     @Override
     public void reset(ByteBuffer buffer) {
         this.buffer = buffer;
-        this.lastTupleIndex = -1;
         this.lastFrameId = -1;
+        this.lastFrameStart = -1;
+        this.lastFrameEnd = -1;
         parseGroupedBuffer(0, buffer.limit());
     }
 
@@ -153,12 +155,16 @@
 
     private int resetSubTupleAccessor(int tupleIndex) {
         assert tupleIndex < getTupleCount();
-        if (innerFrameInfos.size() == 1) {
-            return tupleIndex;
+        //if (innerFrameInfos.size() == 1) {
+        // return tupleIndex;
+        //}
+        if (tupleIndex >= lastFrameStart && tupleIndex < lastFrameEnd) {
+            // a special optimization path
+            // since GroupFrameAccessor is used by merge, it is expected that tuples are
accessed sequentially
+            // thus, if tuple still fit into the last frame, we do not need to perform binary
search
+            return tupleIndex - lastFrameStart;
         }
-        if (tupleIndex == lastTupleIndex) {
-            return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId
- 1).tupleCount : lastTupleIndex;
-        }
+        // we perform binary search to get the frame Id
         int subFrameId = Collections.binarySearch(innerFrameInfos, tupleIndex);
         if (subFrameId >= 0) {
             subFrameId++;
@@ -166,9 +172,12 @@
             subFrameId = -subFrameId - 1;
         }
         frameTupleAccessor.reset(buffer, innerFrameInfos.get(subFrameId).start, innerFrameInfos.get(subFrameId).length);
-        lastTupleIndex = tupleIndex;
         lastFrameId = subFrameId;
-        return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount
: lastTupleIndex;
+        lastFrameStart = lastFrameId > 0 ? innerFrameInfos.get(lastFrameId - 1).tupleCount
: 0;
+        lastFrameEnd = innerFrameInfos.get(lastFrameId).tupleCount;
+
+        return lastFrameId > 0 ? tupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount
: tupleIndex;
+
     }
 
-}
+}
\ No newline at end of file

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/2079
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I4a1b19ad47f6b1dda4bd5c417932e4c9ba36a714
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Luo Chen <cluo8@uci.edu>

Mime
View raw message