hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject [1/2] hive git commit: HIVE-17172 : add ordering checks to DiskRangeList (Sergey Shelukhin, reviewed by Deepak Jaiswal and Prasanth Jayachandran)
Date Wed, 02 Aug 2017 20:42:07 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-2 766ba7f48 -> 6615ed013
  refs/heads/master 24dc65c6e -> 044426387


HIVE-17172 : add ordering checks to DiskRangeList (Sergey Shelukhin, reviewed by Deepak Jaiswal
and Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/04442638
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/04442638
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/04442638

Branch: refs/heads/master
Commit: 0444263875a40fdd5363b6eecdea2c5f5865b977
Parents: 24dc65c
Author: sergey <sershe@apache.org>
Authored: Wed Aug 2 13:38:48 2017 -0700
Committer: sergey <sershe@apache.org>
Committed: Wed Aug 2 13:38:48 2017 -0700

----------------------------------------------------------------------
 .../hive/llap/cache/LowLevelCacheImpl.java      |  44 ++++----
 .../hadoop/hive/common/io/DiskRangeList.java    |  52 ++++++++-
 .../hive/common/io/TestDiskRangeList.java       | 106 +++++++++++++++++++
 3 files changed, 181 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/04442638/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
index 82ea0c0..cc96d17 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
@@ -236,33 +236,41 @@ public class LowLevelCacheImpl implements LowLevelCache, BufferUsageManager,
Lla
   private DiskRangeList addCachedBufferToIter(
       DiskRangeList currentNotCached, DiskRangeList currentCached, BooleanRef gotAllData)
{
     if (currentNotCached.getOffset() >= currentCached.getOffset()) {
-      if (currentNotCached.getEnd() <= currentCached.getEnd()) {  // we assume it's always
"==" now
+      // Cached buffer has the same (or lower) offset as the requested buffer.
+      if (currentNotCached.getEnd() <= currentCached.getEnd()) {
         // Replace the entire current DiskRange with new cached range.
+        // In case of an inexact match in either of the below it may throw. We do not currently
+        // support the case where the caller requests a single cache buffer via multiple
smaller
+        // sub-ranges; if that happens, this may throw. Noone does it now, though.
+        // TODO: should we actively assert here for cache buffers larger than range?
         currentNotCached.replaceSelfWith(currentCached);
         return null;
       } else {
-        // Insert the new cache range before the disk range.
+        // This cache range is a prefix of the requested one; the above also applies.
+        // The cache may still contain the rest of the requested range, so don't set gotAllData.
         currentNotCached.insertPartBefore(currentCached);
         return currentNotCached;
       }
+    }
+
+    // Some part of the requested range is not cached - the cached offset is past the requested.
+    if (gotAllData != null) {
+      gotAllData.value = false;
+    }
+    if (currentNotCached.getEnd() <= currentCached.getEnd()) {
+      // The cache buffer comprises the tail of the requested range (and possibly overshoots
it).
+      // The same as above applies - may throw if cache buffer is larger than the requested
range,
+      // and there's another range after this that starts in the middle of this cache buffer.
+      // Currently, we cache at exact offsets, so the latter should never happen.
+      currentNotCached.insertPartAfter(currentCached);
+      return null;  // No more matches expected.
     } else {
-      // There's some part of current buffer that is not cached.
-      if (gotAllData != null) {
-        gotAllData.value = false;
-      }
-      assert currentNotCached.getOffset() < currentCached.getOffset()
-        || currentNotCached.prev == null
-        || currentNotCached.prev.getEnd() <= currentCached.getOffset();
-      long endOffset = currentNotCached.getEnd();
+      // The cached buffer is in the middle of the requested range.
+      // The remaining tail of the latter may still be available further.
+      DiskRangeList tail = new DiskRangeList(currentCached.getEnd(), currentNotCached.getEnd());
       currentNotCached.insertPartAfter(currentCached);
-      if (endOffset <= currentCached.getEnd()) { // we assume it's always "==" now
-        return null;  // No more matches expected...
-      } else {
-        // Insert the new disk range after the cache range. TODO: not strictly necessary
yet?
-        currentNotCached = new DiskRangeList(currentCached.getEnd(), endOffset);
-        currentCached.insertAfter(currentNotCached);
-        return currentNotCached;
-      }
+      currentCached.insertAfter(tail);
+      return tail;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/04442638/storage-api/src/java/org/apache/hadoop/hive/common/io/DiskRangeList.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/common/io/DiskRangeList.java b/storage-api/src/java/org/apache/hadoop/hive/common/io/DiskRangeList.java
index 83b7ec2..1088e76 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/common/io/DiskRangeList.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/common/io/DiskRangeList.java
@@ -40,16 +40,51 @@ public class DiskRangeList extends DiskRange {
     other.prev = this.prev;
     other.next = this.next;
     if (this.prev != null) {
+      checkOrder(this.prev, other, this);
       this.prev.next = other;
     }
     if (this.next != null) {
+      checkOrder(other, this.next, this);
       this.next.prev = other;
     }
     this.next = this.prev = null;
     return other;
   }
 
-  private void checkArg(DiskRangeList other) throws AssertionError {
+  private final static void checkOrder(DiskRangeList prev, DiskRangeList next, DiskRangeList
ref) {
+    if (prev.getEnd() <= next.getOffset()) return;
+    assertInvalidOrder(ref.prev == null ? ref : ref.prev, prev, next);
+  }
+
+  private final static void assertInvalidOrder(
+      DiskRangeList ref, DiskRangeList prev, DiskRangeList next) {
+    String error = "Elements not in order " + prev + " and " + next
+        + "; trying to insert into " + stringifyDiskRanges(ref);
+    LOG.error(error);
+    throw new AssertionError(error);
+  }
+
+  // TODO: this duplicates a method in ORC, but the method should actually be here.
+  public final static String stringifyDiskRanges(DiskRangeList range) {
+    StringBuilder buffer = new StringBuilder();
+    buffer.append("[");
+    boolean isFirst = true;
+    while (range != null) {
+      if (!isFirst) {
+        buffer.append(", {");
+      } else {
+        buffer.append("{");
+      }
+      isFirst = false;
+      buffer.append(range.toString());
+      buffer.append("}");
+      range = range.next;
+    }
+    buffer.append("]");
+    return buffer.toString();
+  }
+
+  private void checkArg(DiskRangeList other) {
     if (other == this) {
       // The only case where duplicate elements matter... the others are handled by the below.
       throw new AssertionError("Inserting self into the list [" + other + "]");
@@ -67,11 +102,14 @@ public class DiskRangeList extends DiskRange {
    */
   public DiskRangeList insertPartBefore(DiskRangeList other) {
     checkArg(other);
-    assert other.end >= this.offset;
+    if (other.end <= this.offset || other.end > this.end) {
+      assertInvalidOrder(this.prev == null ? this : this.prev, other, this);
+    }
     this.offset = other.end;
     other.prev = this.prev;
     other.next = this;
     if (this.prev != null) {
+      checkOrder(this.prev, other, this.prev);
       this.prev.next = other;
     }
     this.prev = other;
@@ -85,9 +123,11 @@ public class DiskRangeList extends DiskRange {
    * */
   public DiskRangeList insertAfter(DiskRangeList other) {
     checkArg(other);
+    checkOrder(this, other, this);
     other.next = this.next;
     other.prev = this;
     if (this.next != null) {
+      checkOrder(other, this.next, this);
       this.next.prev = other;
     }
     this.next = other;
@@ -100,7 +140,10 @@ public class DiskRangeList extends DiskRange {
    * @return the new element.
    */
   public DiskRangeList insertPartAfter(DiskRangeList other) {
-    assert other.offset <= this.end;
+    // The only allowed non-overlapping option is extra bytes at the end.
+    if (other.offset > this.end || other.offset <= this.offset || other.end <= this.offset)
{
+      assertInvalidOrder(this.prev == null ? this : this.prev, this, other);
+    }
     this.end = other.offset;
     return insertAfter(other);
   }
@@ -241,5 +284,8 @@ public class DiskRangeList extends DiskRange {
     assert newEnd >= this.offset;
     assert this.next == null || this.next.offset >= newEnd;
     this.end = newEnd;
+    if (this.next != null) {
+      checkOrder(this, this.next, this);
+    }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/04442638/storage-api/src/test/org/apache/hadoop/hive/common/io/TestDiskRangeList.java
----------------------------------------------------------------------
diff --git a/storage-api/src/test/org/apache/hadoop/hive/common/io/TestDiskRangeList.java
b/storage-api/src/test/org/apache/hadoop/hive/common/io/TestDiskRangeList.java
new file mode 100644
index 0000000..a76c7ad
--- /dev/null
+++ b/storage-api/src/test/org/apache/hadoop/hive/common/io/TestDiskRangeList.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.common.io;
+
+import org.junit.Test;
+
+public class TestDiskRangeList {
+  @Test
+  public void testErrorConditions() throws Exception {
+    DiskRangeList d510 = new DiskRangeList(0, 10);
+    d510.insertPartBefore(new DiskRangeList(0, 5));
+    DiskRangeList d1015 = d510.insertAfter(new DiskRangeList(10, 15));
+    try {
+      d510.replaceSelfWith(d510); // The arg is self.
+      fail();
+    } catch (AssertionError error) {}
+    DiskRangeList existing = new DiskRangeList(0, 10);
+    existing.insertPartBefore(new DiskRangeList(0, 5));
+    try {
+      d510.replaceSelfWith(existing); // The arg is part of another list.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d510.replaceSelfWith(new DiskRangeList(4, 10)); // Not sequential with previous.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d510.replaceSelfWith(new DiskRangeList(5, 11)); // Not sequential with next.
+      fail();
+    } catch (AssertionError error) {}
+
+
+    try {
+      d510.insertPartBefore(d510); // The arg is self.
+      fail();
+    } catch (AssertionError error) {}
+    existing = new DiskRangeList(5, 7);
+    existing.insertPartBefore(new DiskRangeList(5, 6));
+    try {
+      d510.insertPartBefore(existing); // The arg is part of another list.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d510.insertPartBefore(new DiskRangeList(3, 4)); // Not a part.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d510.insertPartBefore(new DiskRangeList(4, 6)); // Not sequential with previous.
+      fail();
+    } catch (AssertionError error) {}
+
+
+    try {
+      d510.insertAfter(d510); // The arg is self.
+      fail();
+    } catch (AssertionError error) {}
+    existing = new DiskRangeList(15, 20);
+    existing.insertAfter(new DiskRangeList(20, 25));
+    try {
+      d1015.insertAfter(existing); // The arg is part of another list.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d1015.insertAfter(new DiskRangeList(14, 20)); // Not sequential.
+      fail();
+    } catch (AssertionError error) {}
+    d1015.insertAfter(new DiskRangeList(20, 25));
+    try {
+      d1015.insertAfter(new DiskRangeList(15, 21)); // Not sequential with next.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d1015.insertPartAfter(new DiskRangeList(16, 20)); // Not a part.
+      fail();
+    } catch (AssertionError error) {}
+    try {
+      d1015.insertPartAfter(new DiskRangeList(9, 11)); // Not a part.
+      fail();
+    } catch (AssertionError error) {}
+
+    try {
+      d1015.setEnd(21); // Not sequential with next.
+      fail();
+    } catch (AssertionError error) {}
+  }
+
+  private void fail() throws Exception {
+    throw new Exception(); // Don't use Assert.fail, we are catching assertion errors.
+  }
+}


Mime
View raw message