Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 960B1200D1B for ; Thu, 12 Oct 2017 19:58:02 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 94B541609E8; Thu, 12 Oct 2017 17:58:02 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id E2B921609E4 for ; Thu, 12 Oct 2017 19:58:01 +0200 (CEST) Received: (qmail 26271 invoked by uid 500); 12 Oct 2017 17:58:01 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 26257 invoked by uid 99); 12 Oct 2017 17:58:01 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 12 Oct 2017 17:58:01 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id EA64FDFAF5; Thu, 12 Oct 2017 17:58:00 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: mithun@apache.org To: commits@hive.apache.org Message-Id: <91e625a987aa4d46abb94e0843ebe96f@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Date: Thu, 12 Oct 2017 17:58:00 +0000 (UTC) archived-at: Thu, 12 Oct 2017 17:58:02 -0000 Repository: hive Updated Branches: refs/heads/branch-2.3 0c56cf696 -> 92d3070f2 HIVE-17669: Cache to optimize SearchArgument deserialization (Mithun Radhakrishnan reviewed by Prasanth Jayachandran) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/92d3070f Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/92d3070f Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/92d3070f Branch: refs/heads/branch-2.3 Commit: 92d3070f20eb7361aced6539e59a2d9fe90f8609 Parents: 0c56cf6 Author: Mithun Radhakrishnan Authored: Thu Oct 12 10:41:38 2017 -0700 Committer: Mithun RK Committed: Thu Oct 12 10:49:55 2017 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hive/conf/HiveConf.java | 3 + .../hive/ql/io/sarg/ConvertAstToSearchArg.java | 76 ++++++++++++++++++-- 2 files changed, 74 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/92d3070f/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 073c087..41d12ce 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1371,6 +1371,9 @@ public class HiveConf extends Configuration { "references for the cached object. Setting this to true can help avoid out of memory\n" + "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + "overall query performance."), + HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB("hive.io.sarg.cache.max.weight.mb", 10, + "The max weight allowed for the SearchArgument Cache. By default, the cache allows a max-weight of 10MB, " + + "after which entries will be evicted."), HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + http://git-wip-us.apache.org/repos/asf/hive/blob/92d3070f/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java index 997334b..2de8319 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/ConvertAstToSearchArg.java @@ -21,13 +21,18 @@ package org.apache.hadoop.hive.ql.io.sarg; import java.sql.Date; import java.sql.Timestamp; import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.Weigher; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; -import org.apache.hadoop.hive.ql.io.sarg.LiteralDelegate; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -436,14 +441,75 @@ public class ConvertAstToSearchArg { } } - public static final String SARG_PUSHDOWN = "sarg.pushdown"; + private static volatile Cache sargsCache = null; + + private static synchronized Cache initializeAndGetSargsCache(Configuration conf) { + if (sargsCache == null) { + sargsCache = CacheBuilder.newBuilder() + .weigher(new Weigher() { + @Override + public int weigh(String key, SearchArgument value) { + return key.length(); + } + }) + .maximumWeight( + HiveConf.getIntVar(conf, + HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) * 1024 *1024 + ) + .build(); // Can't use CacheLoader because SearchArguments may be built either from Kryo strings, + // or from expressions. + } + return sargsCache; + } + + private static Cache getSargsCache(Configuration conf) { + return sargsCache == null? initializeAndGetSargsCache(conf) : sargsCache; + } + + private static boolean isSargsCacheEnabled(Configuration conf) { + return HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVE_IO_SARG_CACHE_MAX_WEIGHT_MB) > 0; + } + + private static SearchArgument getSearchArgumentFromString(Configuration conf, final String sargString) { + + try { + return isSargsCacheEnabled(conf)? + getSargsCache(conf).get(sargString, new Callable() { + @Override + public SearchArgument call() { + return create(sargString); + } + }) + : create(sargString); + } + catch (ExecutionException exception) { + throw new RuntimeException(exception); + } + } + + private static SearchArgument getSearchArgumentFromExpression(final Configuration conf, final String sargString) { + + try { + return isSargsCacheEnabled(conf)? + getSargsCache(conf).get(sargString, new Callable() { + @Override + public SearchArgument call() { + return create(conf, SerializationUtilities.deserializeExpression(sargString)); + } + }) + : create(conf, SerializationUtilities.deserializeExpression(sargString)); + } + catch (ExecutionException exception) { + throw new RuntimeException(exception); + } + } + public static SearchArgument create(Configuration conf, ExprNodeGenericFuncDesc expression) { return new ConvertAstToSearchArg(conf, expression).buildSearchArgument(); } - private final static ThreadLocal kryo = new ThreadLocal() { protected Kryo initialValue() { return new Kryo(); } }; @@ -459,9 +525,9 @@ public class ConvertAstToSearchArg { public static SearchArgument createFromConf(Configuration conf) { String sargString; if ((sargString = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR)) != null) { - return create(conf, SerializationUtilities.deserializeExpression(sargString)); + return getSearchArgumentFromExpression(conf, sargString); } else if ((sargString = conf.get(SARG_PUSHDOWN)) != null) { - return create(sargString); + return getSearchArgumentFromString(conf, sargString); } return null; }