hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ser...@apache.org
Subject hive git commit: HIVE-10940 : HiveInputFormat::pushFilters serializes PPD objects for each getRecordReader call (Sergey Shelukhin, reviewed by Prasanth J, Gopal V)
Date Tue, 16 Jun 2015 00:18:34 GMT
Repository: hive
Updated Branches:
  refs/heads/master ed7b70a51 -> b080e061d


HIVE-10940 : HiveInputFormat::pushFilters serializes PPD objects for each getRecordReader
call (Sergey Shelukhin, reviewed by Prasanth J, Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b080e061
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b080e061
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b080e061

Branch: refs/heads/master
Commit: b080e061d48e3058be9a3f01083b46d69c65e882
Parents: ed7b70a
Author: Sergey Shelukhin <sershe@apache.org>
Authored: Mon Jun 15 17:18:19 2015 -0700
Committer: Sergey Shelukhin <sershe@apache.org>
Committed: Mon Jun 15 17:18:19 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/HiveInputFormat.java      | 38 +++++++++++---------
 .../hadoop/hive/ql/plan/TableScanDesc.java      | 20 ++++++++++-
 2 files changed, 41 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b080e061/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
index 54e1d4e..2ff3951 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
@@ -450,25 +450,31 @@ public class HiveInputFormat<K extends WritableComparable, V extends
Writable>
       return;
     }
 
-    Serializable filterObject = scanDesc.getFilterObject();
-    if (filterObject != null) {
-      jobConf.set(
-          TableScanDesc.FILTER_OBJECT_CONF_STR,
-          Utilities.serializeObject(filterObject));
+    String serializedFilterObj = scanDesc.getSerializedFilterObject();
+    String serializedFilterExpr = scanDesc.getSerializedFilterExpr();
+    boolean hasObj = serializedFilterObj != null, hasExpr = serializedFilterExpr != null;
+    if (!hasObj) {
+      Serializable filterObject = scanDesc.getFilterObject();
+      if (filterObject != null) {
+        serializedFilterObj = Utilities.serializeObject(filterObject);
+      }
+    }
+    if (serializedFilterObj != null) {
+      jobConf.set(TableScanDesc.FILTER_OBJECT_CONF_STR, serializedFilterObj);
+    }
+    if (!hasExpr) {
+      serializedFilterExpr = Utilities.serializeExpression(filterExpr);
     }
-
     String filterText = filterExpr.getExprString();
-    String filterExprSerialized = Utilities.serializeExpression(filterExpr);
     if (LOG.isDebugEnabled()) {
-      LOG.debug("Filter text = " + filterText);
-      LOG.debug("Filter expression = " + filterExprSerialized);
-    }
-    jobConf.set(
-      TableScanDesc.FILTER_TEXT_CONF_STR,
-      filterText);
-    jobConf.set(
-      TableScanDesc.FILTER_EXPR_CONF_STR,
-      filterExprSerialized);
+      LOG.debug("Pushdown initiated with filterText = " + filterText + ", filterExpr = "
+          + filterExpr + ", serializedFilterExpr = " + serializedFilterExpr + " ("
+          + (hasExpr ? "desc" : "new") + ")" + (serializedFilterObj == null ? "" :
+            (", serializedFilterObj = " + serializedFilterObj + " (" + (hasObj ? "desc" :
"new")
+                + ")")));
+    }
+    jobConf.set(TableScanDesc.FILTER_TEXT_CONF_STR, filterText);
+    jobConf.set(TableScanDesc.FILTER_EXPR_CONF_STR, serializedFilterExpr);
   }
 
   protected void pushProjectionsAndFilters(JobConf jobConf, Class inputFormatClass,

http://git-wip-us.apache.org/repos/asf/hive/blob/b080e061/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
index 6282380..9e9a2a2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/TableScanDesc.java
@@ -24,6 +24,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.ql.exec.PTFUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.TableSample;
@@ -72,6 +73,9 @@ public class TableScanDesc extends AbstractOperatorDesc {
 
   private ExprNodeGenericFuncDesc filterExpr;
   private transient Serializable filterObject;
+  private String serializedFilterExpr;
+  private String serializedFilterObject;
+
 
   // Both neededColumnIDs and neededColumns should never be null.
   // When neededColumnIDs is an empty list,
@@ -101,7 +105,6 @@ public class TableScanDesc extends AbstractOperatorDesc {
 
   private transient final Table tableMetadata;
 
-
   public TableScanDesc() {
     this(null, null);
   }
@@ -144,7 +147,11 @@ public class TableScanDesc extends AbstractOperatorDesc {
   }
 
   public void setFilterExpr(ExprNodeGenericFuncDesc filterExpr) {
+    // TODO: we could avoid serialization if it's the same expr. Check?
     this.filterExpr = filterExpr;
+    if (filterExpr != null) {
+      serializedFilterExpr = Utilities.serializeExpression(filterExpr);
+    }
   }
 
   public Serializable getFilterObject() {
@@ -153,6 +160,9 @@ public class TableScanDesc extends AbstractOperatorDesc {
 
   public void setFilterObject(Serializable filterObject) {
     this.filterObject = filterObject;
+    if (filterObject != null) {
+      serializedFilterObject = Utilities.serializeObject(filterObject);
+    }
   }
 
   public void setNeededColumnIDs(List<Integer> neededColumnIDs) {
@@ -281,4 +291,12 @@ public class TableScanDesc extends AbstractOperatorDesc {
   public void setTableSample(TableSample tableSample) {
     this.tableSample = tableSample;
   }
+
+  public String getSerializedFilterExpr() {
+    return serializedFilterExpr;
+  }
+
+  public String getSerializedFilterObject() {
+    return serializedFilterObject;
+  }
 }


Mime
View raw message