hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject hive git commit: HIVE-13346 : LLAP doesn't update metadata priority when reusing from cache; some tweaks in LRFU policy (Sergey Shelukhin, reviewed by Prasanth Jayachandran)
Date Thu, 28 Apr 2016 00:41:08 GMT
Repository: hive
Updated Branches:
  refs/heads/master c3dd00b27 -> 58450d121


HIVE-13346 : LLAP doesn't update metadata priority when reusing from cache; some tweaks in
LRFU policy (Sergey Shelukhin, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/58450d12
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/58450d12
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/58450d12

Branch: refs/heads/master
Commit: 58450d121437b614427bf8dac8c4eca2f7e29365
Parents: c3dd00b
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Wed Apr 27 16:54:05 2016 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Wed Apr 27 17:40:56 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |  4 +-
 data/conf/hive-site.xml                         |  2 +-
 .../llap/cache/LowLevelLrfuCachePolicy.java     |  9 +++--
 .../hive/llap/io/metadata/OrcMetadataCache.java | 42 ++++++++++++--------
 .../hive/llap/cache/TestOrcMetadataCache.java   | 17 +++++++-
 5 files changed, 49 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/58450d12/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 49d748c..eeb9b84 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2575,7 +2575,7 @@ public class HiveConf extends Configuration {
         "LLAP IO memory usage; 'cache' (the default) uses data and metadata cache with a\n"
+
         "custom off-heap allocator, 'allocator' uses the custom allocator without the caches,\n"
+
         "'none' doesn't use either (this mode may result in significant performance degradation)"),
-    LLAP_ALLOCATOR_MIN_ALLOC("hive.llap.io.allocator.alloc.min", "128Kb", new SizeValidator(),
+    LLAP_ALLOCATOR_MIN_ALLOC("hive.llap.io.allocator.alloc.min", "16Kb", new SizeValidator(),
         "Minimum allocation possible from LLAP buddy allocator. Allocations below that are\n"
+
         "padded to minimum allocation. For ORC, should generally be the same as the expected\n"
+
         "compression buffer size, or next lowest power of 2. Must be a power of 2."),
@@ -2590,7 +2590,7 @@ public class HiveConf extends Configuration {
         "Maximum size for IO allocator or ORC low-level cache.", "hive.llap.io.cache.orc.size"),
     LLAP_ALLOCATOR_DIRECT("hive.llap.io.allocator.direct", true,
         "Whether ORC low-level cache should use direct allocation."),
-    LLAP_USE_LRFU("hive.llap.io.use.lrfu", false,
+    LLAP_USE_LRFU("hive.llap.io.use.lrfu", true,
         "Whether ORC low-level cache should use LRFU cache policy instead of default (FIFO)."),
     LLAP_LRFU_LAMBDA("hive.llap.io.lrfu.lambda", 0.01f,
         "Lambda for ORC low-level cache LRFU cache policy. Must be in [0, 1]. 0 makes LRFU\n"
+

http://git-wip-us.apache.org/repos/asf/hive/blob/58450d12/data/conf/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index cbb5546..041b67d 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -288,7 +288,7 @@
 
 <property>
   <name>hive.llap.io.use.lrfu</name>
-  <value>false</value>
+  <value>true</value>
 </property>
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/58450d12/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelLrfuCachePolicy.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelLrfuCachePolicy.java
b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelLrfuCachePolicy.java
index bbff3cc..5a0b27f 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelLrfuCachePolicy.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/cache/LowLevelLrfuCachePolicy.java
@@ -100,7 +100,11 @@ public class LowLevelLrfuCachePolicy implements LowLevelCachePolicy {
     buffer.priority = F0;
     buffer.lastUpdate = time;
     if (priority == Priority.HIGH) {
-      buffer.priority *= 8; // this is arbitrary
+      // This is arbitrary. Note that metadata may come from a big scan and nuke all the
data
+      // from some small frequently accessed tables, because it gets such a large priority
boost
+      // to start with. Think of the multiplier as the number of accesses after which the
data
+      // becomes more important than some random read-once metadata, in a pure-LFU scheme.
+      buffer.priority *= 3;
     } else {
       assert priority == Priority.NORMAL;
     }
@@ -256,7 +260,7 @@ public class LowLevelLrfuCachePolicy implements LowLevelCachePolicy {
     heap[ix] = buffer;
   }
 
-  // Note: almost never called (unless buffers are very large or we evict a lot).
+  // Note: almost never called (unless buffers are very large or we evict a lot, or LFU).
   private LlapCacheableBuffer evictFromHeapUnderLock(long time) {
     while (true) {
       if (heapSize == 0) return null;
@@ -287,7 +291,6 @@ public class LowLevelLrfuCachePolicy implements LowLevelCachePolicy {
     // down; therefore, we can update priorities of other blocks as we go for part of the
heap -
     // we correct any discrepancy w/the parent after expiring priority, and any block we
expire
     // the priority for already has lower priority than that of its children.
-    // TODO: avoid expiring priorities if times are close? might be needlessly expensive.
     int ix = buffer.indexInHeap;
     double priority = buffer.priority;
     while (true) {

http://git-wip-us.apache.org/repos/asf/hive/blob/58450d12/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
index 66713d3..3f4f43b 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcMetadataCache.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.llap.io.metadata;
 
+import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer;
+
 import java.io.IOException;
 import java.util.concurrent.ConcurrentHashMap;
 
@@ -51,15 +53,7 @@ public class OrcMetadataCache {
     memoryManager.reserveMemory(memUsage, false);
     OrcFileMetadata val = metadata.putIfAbsent(metaData.getFileKey(), metaData);
     // See OrcFileMetadata; it is always unlocked, so we just "touch" it here to simulate
use.
-    if (val == null) {
-      val = metaData;
-      policy.cache(val, Priority.HIGH);
-    } else {
-      memoryManager.releaseMemory(memUsage);
-      policy.notifyLock(val);
-    }
-    policy.notifyUnlock(val);
-    return val;
+    return touchOnPut(metaData, val, memUsage);
   }
 
   public OrcStripeMetadata putStripeMetadata(OrcStripeMetadata metaData) {
@@ -67,17 +61,22 @@ public class OrcMetadataCache {
     memoryManager.reserveMemory(memUsage, false);
     OrcStripeMetadata val = stripeMetadata.putIfAbsent(metaData.getKey(), metaData);
     // See OrcStripeMetadata; it is always unlocked, so we just "touch" it here to simulate
use.
-    if (val == null) {
-      val = metaData;
-      policy.cache(val, Priority.HIGH);
+    return touchOnPut(metaData, val, memUsage);
+  }
+
+  private <T extends LlapCacheableBuffer> T touchOnPut(T newVal, T oldVal, long memUsage)
{
+    if (oldVal == null) {
+      oldVal = newVal;
+      policy.cache(oldVal, Priority.HIGH);
     } else {
       memoryManager.releaseMemory(memUsage);
-      policy.notifyLock(val);
+      policy.notifyLock(oldVal);
     }
-    policy.notifyUnlock(val);
-    return val;
+    policy.notifyUnlock(oldVal);
+    return oldVal;
   }
 
+
   public void putIncompleteCbs(Object fileKey, DiskRange[] ranges, long baseOffset) {
     if (estimateErrors == null) return;
     OrcFileEstimateErrors errorData = estimateErrors.get(fileKey);
@@ -110,11 +109,20 @@ public class OrcMetadataCache {
   }
 
   public OrcStripeMetadata getStripeMetadata(OrcBatchKey stripeKey) throws IOException {
-    return stripeMetadata.get(stripeKey);
+    return touchOnGet(stripeMetadata.get(stripeKey));
   }
 
   public OrcFileMetadata getFileMetadata(Object fileKey) throws IOException {
-    return metadata.get(fileKey);
+    return touchOnGet(metadata.get(fileKey));
+  }
+
+
+  private <T extends LlapCacheableBuffer> T touchOnGet(T result) {
+    if (result != null) {
+      policy.notifyLock(result);
+      policy.notifyUnlock(result); // Never locked for eviction; Java object.
+    }
+    return result;
   }
 
   public DiskRangeList getIncompleteCbs(Object fileKey, DiskRangeList ranges, long baseOffset,

http://git-wip-us.apache.org/repos/asf/hive/blob/58450d12/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
----------------------------------------------------------------------
diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
index 3f2e750..40edb28 100644
--- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
+++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestOrcMetadataCache.java
@@ -29,16 +29,18 @@ import org.junit.Test;
 
 public class TestOrcMetadataCache {
   private static class DummyCachePolicy implements LowLevelCachePolicy {
-    public DummyCachePolicy() {
-    }
+    int lockCount = 0, unlockCount = 0;
 
     public void cache(LlapCacheableBuffer buffer, Priority pri) {
+      ++lockCount;
     }
 
     public void notifyLock(LlapCacheableBuffer buffer) {
+      ++lockCount;
     }
 
     public void notifyUnlock(LlapCacheableBuffer buffer) {
+      ++unlockCount;
     }
 
     public long evictSomeBlocks(long memoryToReserve) {
@@ -54,6 +56,11 @@ public class TestOrcMetadataCache {
 
     public void setParentDebugDumper(LlapOomDebugDump dumper) {
     }
+
+    public void verifyEquals(int i) {
+      assertEquals(i, lockCount);
+      assertEquals(i, unlockCount);
+    }
   }
 
   private static class DummyMemoryManager implements MemoryManager {
@@ -92,14 +99,19 @@ public class TestOrcMetadataCache {
     OrcFileMetadata ofm1 = OrcFileMetadata.createDummy(1), ofm2 = OrcFileMetadata.createDummy(2);
     assertSame(ofm1, cache.putFileMetadata(ofm1));
     assertEquals(1, mm.allocs);
+    cp.verifyEquals(1);
     assertSame(ofm2, cache.putFileMetadata(ofm2));
     assertEquals(2, mm.allocs);
+    cp.verifyEquals(2);
     assertSame(ofm1, cache.getFileMetadata(1));
     assertSame(ofm2, cache.getFileMetadata(2));
+    cp.verifyEquals(4);
     OrcFileMetadata ofm3 = OrcFileMetadata.createDummy(1);
     assertSame(ofm1, cache.putFileMetadata(ofm3));
     assertEquals(2, mm.allocs);
+    cp.verifyEquals(5);
     assertSame(ofm1, cache.getFileMetadata(1));
+    cp.verifyEquals(6);
 
     OrcStripeMetadata osm1 = OrcStripeMetadata.createDummy(1), osm2 = OrcStripeMetadata.createDummy(2);
     assertSame(osm1, cache.putStripeMetadata(osm1));
@@ -112,5 +124,6 @@ public class TestOrcMetadataCache {
     assertSame(osm1, cache.putStripeMetadata(osm3));
     assertEquals(4, mm.allocs);
     assertSame(osm1, cache.getStripeMetadata(osm3.getKey()));
+    cp.verifyEquals(12);
   }
 }


Mime
View raw message