hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject svn commit: r1668772 - in /hive/branches/llap: llap-client/src/java/org/apache/hadoop/hive/llap/io/api/cache/ llap-server/src/java/org/apache/hadoop/hive/llap/cache/ llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/ llap-server/src/test/org...
Date Tue, 24 Mar 2015 02:21:21 GMT
Author: sershe
Date: Tue Mar 24 02:21:21 2015
New Revision: 1668772

URL: http://svn.apache.org/r1668772
Log:
HIVE-10063 : LLAP: LowLevelCacheImpl does not match cached ranges properly (Sergey Shelukhin)

Modified:
    hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/cache/LowLevelCache.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
    hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
    hive/branches/llap/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java

Modified: hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/cache/LowLevelCache.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/cache/LowLevelCache.java?rev=1668772&r1=1668771&r2=1668772&view=diff
==============================================================================
--- hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/cache/LowLevelCache.java
(original)
+++ hive/branches/llap/llap-client/src/java/org/apache/hadoop/hive/llap/io/api/cache/LowLevelCache.java
Tue Mar 24 02:21:21 2015
@@ -29,10 +29,25 @@ public interface LowLevelCache {
     HIGH
     // TODO: we could add more priorities, e.g. tiered-high, where we always evict it last.
   }
+
   /**
-   * Gets file data for particular offsets. Null entries mean no data.
-   * @param base base offset for the ranges (stripe offset in case of ORC).
-   * @return 
+   * Gets file data for particular offsets. The range list is modified in place; it is then
+   * returned (since the list head could have changed). Ranges are replaced with cached ranges.
+   * In case of partial overlap with cached data, full cache blocks are always returned;
+   * there's no capacity for partial matches in return type. The rules are as follows:
+   * 1) If the requested range starts in the middle of a cached range, that cached range
will not
+   *    be returned by default (e.g. if [100,200) and [200,300) are cached, the request for
+   *    [150,300) will only return [200,300) from cache). This may be configurable in impls.
+   *    This is because we assume well-known range start offsets are used (rg/stripe offsets),
so
+   *    a request from the middle of the start doesn't make sense.
+   * 2) If the requested range ends in the middle of a cached range, that entire cached range
will
+   *    be returned (e.g. if [100,200) and [200,300) are cached, the request for [100,250)
will
+   *    return both ranges). It should really be same as #1, however currently ORC uses estimated
+   *    end offsets; we do in fact know in such cases that partially-matched cached block
(rg)
+   *    can be thrown away, the reader will never touch it; but we need code in the reader
to
+   *    handle such cases to avoid disk reads for these "tails" vs real unmatched ranges.
+   *    Some sort of InvalidCacheChunk could be placed to avoid them. TODO
+   * @param base base offset for the ranges (stripe/stream offset in case of ORC).
    */
   DiskRangeList getFileData(long fileId, DiskRangeList range, long baseOffset);
 

Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java?rev=1668772&r1=1668771&r2=1668772&view=diff
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
(original)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelCacheImpl.java
Tue Mar 24 02:21:21 2015
@@ -47,15 +47,16 @@ public class LowLevelCacheImpl implement
   private final LowLevelCachePolicy cachePolicy;
   private final long cleanupInterval;
   private LlapDaemonCacheMetrics metrics;
+  private final boolean doAssumeGranularBlocks;
 
   public LowLevelCacheImpl(LlapDaemonCacheMetrics metrics, LowLevelCachePolicy cachePolicy,
-      Allocator allocator) {
-    this(metrics, cachePolicy, allocator, DEFAULT_CLEANUP_INTERVAL);
+      Allocator allocator, boolean doAssumeGranularBlocks) {
+    this(metrics, cachePolicy, allocator, doAssumeGranularBlocks, DEFAULT_CLEANUP_INTERVAL);
   }
 
   @VisibleForTesting
-  LowLevelCacheImpl(LlapDaemonCacheMetrics metrics,
-      LowLevelCachePolicy cachePolicy, Allocator allocator, long cleanupInterval) {
+  LowLevelCacheImpl(LlapDaemonCacheMetrics metrics, LowLevelCachePolicy cachePolicy,
+      Allocator allocator, boolean doAssumeGranularBlocks, long cleanupInterval) {
     if (LlapIoImpl.LOGL.isInfoEnabled()) {
       LlapIoImpl.LOG.info("Low level cache; cleanup interval " + cleanupInterval + "sec");
     }
@@ -63,6 +64,7 @@ public class LowLevelCacheImpl implement
     this.allocator = allocator;
     this.cleanupInterval = cleanupInterval;
     this.metrics = metrics;
+    this.doAssumeGranularBlocks = doAssumeGranularBlocks;
   }
 
   public void init() {
@@ -105,8 +107,16 @@ public class LowLevelCacheImpl implement
 
   private void getOverlappingRanges(long baseOffset, DiskRangeList currentNotCached,
       ConcurrentSkipListMap<Long, LlapDataBuffer> cache) {
+    long absOffset = currentNotCached.offset + baseOffset;
+    if (!doAssumeGranularBlocks) {
+      // This currently only happens in tests. See getFileData comment on the interface.
+      Long prevOffset = cache.floorKey(absOffset);
+      if (prevOffset != null) {
+        absOffset = prevOffset;
+      }
+    }
     Iterator<Map.Entry<Long, LlapDataBuffer>> matches = cache.subMap(
-        currentNotCached.offset + baseOffset, currentNotCached.end + baseOffset)
+        absOffset, currentNotCached.end + baseOffset)
         .entrySet().iterator();
     long cacheEnd = -1;
     while (matches.hasNext()) {
@@ -150,7 +160,7 @@ public class LowLevelCacheImpl implement
     // Both currentNotCached and currentCached already include baseOffset.
     long startOffset = baseOffset + currentNotCached.offset,
         endOffset = baseOffset + currentNotCached.end;
-    if (startOffset == currentCached.offset) {
+    if (startOffset >= currentCached.offset) {
       if (endOffset <= currentCached.end) {  // we assume it's always "==" now
         // Replace the entire current DiskRange with new cached range.
         currentNotCached.replaceSelfWith(currentCached);
@@ -162,7 +172,8 @@ public class LowLevelCacheImpl implement
         return currentNotCached;
       }
     } else {
-      assert startOffset < currentCached.offset;
+      assert startOffset < currentCached.offset
+        || currentNotCached.prev == null || currentNotCached.prev.end <= currentCached.offset;
       currentNotCached.end = currentCached.offset - baseOffset;
       currentNotCached.insertAfter(currentCached);
       if (endOffset <= currentCached.end) { // we assume it's always "==" now

Modified: hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java?rev=1668772&r1=1668771&r2=1668772&view=diff
==============================================================================
--- hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
(original)
+++ hive/branches/llap/llap-server/src/java/org/apache/hadoop/hive/llap/io/api/impl/LlapIoImpl.java
Tue Mar 24 02:21:21 2015
@@ -97,7 +97,7 @@ public class LlapIoImpl implements LlapI
       // Allocator uses memory manager to request memory.
       allocator = new BuddyAllocator(conf, memManager, cacheMetrics);
       // Cache uses allocator to allocate and deallocate.
-      orcCache = new LowLevelCacheImpl(cacheMetrics, cachePolicy, allocator);
+      orcCache = new LowLevelCacheImpl(cacheMetrics, cachePolicy, allocator, true);
       // And finally cache policy uses cache to notify it of eviction. The cycle is complete!
       cachePolicy.setEvictionListener(new EvictionDispatcher(orcCache, metadataCache));
       orcCache.init();

Modified: hive/branches/llap/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/llap/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java?rev=1668772&r1=1668771&r2=1668772&view=diff
==============================================================================
--- hive/branches/llap/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java
(original)
+++ hive/branches/llap/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestLowLevelCacheImpl.java
Tue Mar 24 02:21:21 2015
@@ -91,7 +91,7 @@ public class TestLowLevelCacheImpl {
   public void testGetPut() {
     LowLevelCacheImpl cache = new LowLevelCacheImpl(
         LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(),
-        new DummyAllocator(), -1); // no cleanup thread
+        new DummyAllocator(), true, -1); // no cleanup thread
     long fn1 = 1, fn2 = 2;
     LlapMemoryBuffer[] fakes = new LlapMemoryBuffer[] { fb(), fb(), fb(), fb(), fb(), fb()
};
     verifyRefcount(fakes, 1, 1, 1, 1, 1, 1);
@@ -149,7 +149,7 @@ public class TestLowLevelCacheImpl {
   public void testMultiMatch() {
     LowLevelCacheImpl cache = new LowLevelCacheImpl(
         LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(),
-        new DummyAllocator(), -1); // no cleanup thread
+        new DummyAllocator(), true, -1); // no cleanup thread
     long fn = 1;
     LlapMemoryBuffer[] fakes = new LlapMemoryBuffer[] { fb(), fb() };
     assertNull(cache.putFileData(
@@ -165,10 +165,24 @@ public class TestLowLevelCacheImpl {
   }
 
   @Test
+  public void testMultiMatchNonGranular() {
+    LowLevelCacheImpl cache = new LowLevelCacheImpl(
+        LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(),
+        new DummyAllocator(), false, -1); // no cleanup thread
+    long fn = 1;
+    LlapMemoryBuffer[] fakes = new LlapMemoryBuffer[] { fb(), fb() };
+    assertNull(cache.putFileData(
+        fn, new DiskRange[] { dr(2, 4), dr(6, 8) }, fakes, 0, Priority.NORMAL));
+    // We expect cache requests from the middle here
+    verifyCacheGet(cache, fn, 3, 4, fakes[0]);
+    verifyCacheGet(cache, fn, 3, 7, fakes[0], dr(4, 6), fakes[1]);
+  }
+
+  @Test
   public void testStaleValueGet() {
     LowLevelCacheImpl cache = new LowLevelCacheImpl(
         LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(),
-        new DummyAllocator(), -1); // no cleanup thread
+        new DummyAllocator(), true, -1); // no cleanup thread
     long fn1 = 1, fn2 = 2;
     LlapMemoryBuffer[] fakes = new LlapMemoryBuffer[] { fb(), fb(), fb() };
     assertNull(cache.putFileData(fn1, drs(1, 2), fbs(fakes, 0, 1), 0, Priority.NORMAL));
@@ -187,7 +201,7 @@ public class TestLowLevelCacheImpl {
   public void testStaleValueReplace() {
     LowLevelCacheImpl cache = new LowLevelCacheImpl(
         LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(),
-        new DummyAllocator(), -1); // no cleanup thread
+        new DummyAllocator(), true, -1); // no cleanup thread
     long fn1 = 1, fn2 = 2;
     LlapMemoryBuffer[] fakes = new LlapMemoryBuffer[] {
         fb(), fb(), fb(), fb(), fb(), fb(), fb(), fb(), fb() };
@@ -232,7 +246,7 @@ public class TestLowLevelCacheImpl {
 
     LlapDaemonCacheMetrics metrics = LlapDaemonCacheMetrics.create("test", "1");
     LowLevelCacheImpl cache = new LowLevelCacheImpl(metrics,
-        new DummyCachePolicy(), new DummyAllocator(), -1); // no cleanup thread
+        new DummyCachePolicy(), new DummyAllocator(), true, -1); // no cleanup thread
     long fn = 1;
     LlapMemoryBuffer[] fakes = new LlapMemoryBuffer[]{fb(), fb(), fb()};
     cache.putFileData(fn, new DiskRange[]{dr(0, 100), dr(300, 500), dr(800, 1000)},
@@ -269,8 +283,8 @@ public class TestLowLevelCacheImpl {
 
   @Test
   public void testMTTWithCleanup() {
-    final LowLevelCacheImpl cache = new LowLevelCacheImpl(
-        LlapDaemonCacheMetrics.create("test", "1"), new DummyCachePolicy(), new DummyAllocator(),
1);
+    final LowLevelCacheImpl cache = new LowLevelCacheImpl(LlapDaemonCacheMetrics.create(
+        "test", "1"), new DummyCachePolicy(), new DummyAllocator(), true, 1);
     final long fn1 = 1, fn2 = 2;
     final int offsetsToUse = 8;
     final CountDownLatch cdlIn = new CountDownLatch(4), cdlOut = new CountDownLatch(1);



Mime
View raw message