Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id BCB0E17640 for ; Tue, 16 Jun 2015 00:18:34 +0000 (UTC) Received: (qmail 4375 invoked by uid 500); 16 Jun 2015 00:18:34 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 4330 invoked by uid 500); 16 Jun 2015 00:18:34 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 4318 invoked by uid 99); 16 Jun 2015 00:18:34 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 16 Jun 2015 00:18:34 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 704EAE10A2; Tue, 16 Jun 2015 00:18:34 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sershe@apache.org To: commits@hive.apache.org Message-Id: <5faad63744394760bda43c35efe6b8bf@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-10940 : HiveInputFormat::pushFilters serializes PPD objects for each getRecordReader call (Sergey Shelukhin, reviewed by Prasanth J, Gopal V) Date: Tue, 16 Jun 2015 00:18:34 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master ed7b70a51 -> b080e061d HIVE-10940 : HiveInputFormat::pushFilters serializes PPD objects for each getRecordReader call (Sergey Shelukhin, reviewed by Prasanth J, Gopal V) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b080e061 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b080e061 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b080e061 Branch: refs/heads/master Commit: b080e061d48e3058be9a3f01083b46d69c65e882 Parents: ed7b70a Author: Sergey Shelukhin Authored: Mon Jun 15 17:18:19 2015 -0700 Committer: Sergey Shelukhin Committed: Mon Jun 15 17:18:19 2015 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/io/HiveInputFormat.java | 38 +++++++++++--------- .../hadoop/hive/ql/plan/TableScanDesc.java | 20 ++++++++++- 2 files changed, 41 insertions(+), 17 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b080e061/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java index 54e1d4e..2ff3951 100755 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java @@ -450,25 +450,31 @@ public class HiveInputFormat return; } - Serializable filterObject = scanDesc.getFilterObject(); - if (filterObject != null) { - jobConf.set( - TableScanDesc.FILTER_OBJECT_CONF_STR, - Utilities.serializeObject(filterObject)); + String serializedFilterObj = scanDesc.getSerializedFilterObject(); + String serializedFilterExpr = scanDesc.getSerializedFilterExpr(); + boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null; + if (!hasObj) { + Serializable filterObject = scanDesc.getFilterObject(); + if (filterObject != null) { + serializedFilterObj = Utilities.serializeObject(filterObject); + } + } + if (serializedFilterObj != null) { + jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj); + } + if (!hasExpr) { + serializedFilterExpr = Utilities.serializeExpression(filterExpr); } - String filterText = filterExpr.getExprString(); - String filterExprSerialized = Utilities.serializeExpression(filterExpr); if (LOG.isDebugEnabled()) { - LOG.debug("Filter text = " + filterText); - LOG.debug("Filter expression = " + filterExprSerialized); - } - jobConf.set( - TableScanDesc.FILTER_TEXT_CONF_STR, - filterText); - jobConf.set( - TableScanDesc.FILTER_EXPR_CONF_STR, - filterExprSerialized); + LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = " + + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " (" + + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" : + (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" : "new") + + ")"))); + } + jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText); + jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr); } protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass, http://git-wip-us.apache.org/repos/asf/hive/blob/b080e061/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java index 6282380..9e9a2a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import org.apache.hadoop.hive.ql.exec.PTFUtils; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.parse.TableSample; @@ -72,6 +73,9 @@ public class TableScanDesc extends AbstractOperatorDesc { private ExprNodeGenericFuncDesc filterExpr; private transient Serializable filterObject; + private String serializedFilterExpr; + private String serializedFilterObject; + // Both neededColumnIDs and neededColumns should never be null. // When neededColumnIDs is an empty list, @@ -101,7 +105,6 @@ public class TableScanDesc extends AbstractOperatorDesc { private transient final Table tableMetadata; - public TableScanDesc() { this(null, null); } @@ -144,7 +147,11 @@ public class TableScanDesc extends AbstractOperatorDesc { } public void setFilterExpr(ExprNodeGenericFuncDesc filterExpr) { + // TODO: we could avoid serialization if it's the same expr. Check? this.filterExpr = filterExpr; + if (filterExpr != null) { + serializedFilterExpr = Utilities.serializeExpression(filterExpr); + } } public Serializable getFilterObject() { @@ -153,6 +160,9 @@ public class TableScanDesc extends AbstractOperatorDesc { public void setFilterObject(Serializable filterObject) { this.filterObject = filterObject; + if (filterObject != null) { + serializedFilterObject = Utilities.serializeObject(filterObject); + } } public void setNeededColumnIDs(List neededColumnIDs) { @@ -281,4 +291,12 @@ public class TableScanDesc extends AbstractOperatorDesc { public void setTableSample(TableSample tableSample) { this.tableSample = tableSample; } + + public String getSerializedFilterExpr() { + return serializedFilterExpr; + } + + public String getSerializedFilterObject() { + return serializedFilterObject; + } }