hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sank...@apache.org
Subject hive git commit: HIVE-20646: Partition filter condition is not pushed down to metastore query if it has IS NOT NULL (Sankar Hariappan, reviewed by Daniel Dai)
Date Sat, 06 Oct 2018 08:15:13 GMT
Repository: hive
Updated Branches:
  refs/heads/master a4b087b18 -> 78e45be15


HIVE-20646: Partition filter condition is not pushed down to metastore query if it has IS
NOT NULL (Sankar Hariappan, reviewed by Daniel Dai)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/78e45be1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/78e45be1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/78e45be1

Branch: refs/heads/master
Commit: 78e45be1556025f324d26384acf2631a3319e421
Parents: a4b087b
Author: Sankar Hariappan <sankarh@apache.org>
Authored: Sat Oct 6 13:44:51 2018 +0530
Committer: Sankar Hariappan <sankarh@apache.org>
Committed: Sat Oct 6 13:44:51 2018 +0530

----------------------------------------------------------------------
 .../ppr/PartitionExpressionForMetastore.java    | 13 ++++++--
 .../hadoop/hive/ql/plan/ExprNodeDescUtils.java  | 34 +++++++++++++++++++-
 .../hive/metastore/TestMetastoreExpr.java       | 10 ++++--
 .../metastore/PartitionExpressionProxy.java     |  9 ++++--
 .../DefaultPartitionExpressionProxy.java        |  2 +-
 .../hive/metastore/MetaStoreDirectSql.java      | 13 ++++++--
 .../hadoop/hive/metastore/ObjectStore.java      | 25 +++++++++-----
 .../hive/metastore/PartFilterExprUtil.java      |  4 +--
 .../MockPartitionExpressionForMetastore.java    |  2 +-
 .../hadoop/hive/metastore/TestOldSchema.java    |  2 +-
 10 files changed, 90 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
index 637caa6..11c02f3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionExpressionForMetastore.java
@@ -48,8 +48,17 @@ public class PartitionExpressionForMetastore implements PartitionExpressionProxy
   private static final Logger LOG = LoggerFactory.getLogger(PartitionExpressionForMetastore.class);
 
   @Override
-  public String convertExprToFilter(byte[] exprBytes) throws MetaException {
-    return deserializeExpr(exprBytes).getExprString();
+  public String convertExprToFilter(byte[] exprBytes, String defaultPartitionName) throws
MetaException {
+    ExprNodeGenericFuncDesc expr = deserializeExpr(exprBytes);
+    if ((defaultPartitionName != null) && (!defaultPartitionName.isEmpty())) {
+      try {
+        ExprNodeDescUtils.replaceNullFiltersWithDefaultPartition(expr, defaultPartitionName);
+      } catch (SemanticException ex) {
+        LOG.error("Failed to replace \"is null\" and \"is not null\" expression with default
partition", ex);
+        throw new MetaException(ex.getMessage());
+      }
+    }
+    return expr.getExprString();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
index 66c1025..c274fd7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java
@@ -36,6 +36,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNull;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -47,7 +49,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.util.ReflectionUtils;
 
-import com.google.common.collect.Lists;
 
 public class ExprNodeDescUtils {
 
@@ -145,6 +146,37 @@ public class ExprNodeDescUtils {
     }
   }
 
+  public static void replaceNullFiltersWithDefaultPartition(ExprNodeDesc origin,
+                                            String defaultPartitionName) throws SemanticException
{
+    // Convert "ptn_col isnull" to "ptn_col = default_partition" and
+    // "ptn_col isnotnull" to "ptn_col <> default_partition"
+    String fnName = null;
+    if (origin instanceof ExprNodeGenericFuncDesc) {
+      if (((ExprNodeGenericFuncDesc) origin).getGenericUDF() instanceof GenericUDFOPNull)
{
+        fnName = "=";
+      } else if (((ExprNodeGenericFuncDesc) origin).getGenericUDF() instanceof GenericUDFOPNotNull)
{
+        fnName = "<>";
+      }
+    }
+    // Found an expression for function "isnull" or "isnotnull"
+    if (fnName != null) {
+      List<ExprNodeDesc> children = origin.getChildren();
+      assert(children.size() == 1);
+      ExprNodeConstantDesc defaultPartition = new ExprNodeConstantDesc(defaultPartitionName);
+      children.add(defaultPartition);
+      ((ExprNodeGenericFuncDesc) origin).setChildren(children);
+
+      ((ExprNodeGenericFuncDesc) origin).setGenericUDF(
+              FunctionRegistry.getFunctionInfo(fnName).getGenericUDF());
+    } else {
+      if (origin.getChildren() != null) {
+        for (ExprNodeDesc child : origin.getChildren()) {
+          replaceNullFiltersWithDefaultPartition(child, defaultPartitionName);
+        }
+      }
+    }
+  }
+
   /**
    * return true if predicate is already included in source
     */

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java b/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java
index b6b27bc..1becbb8 100644
--- a/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java
+++ b/ql/src/test/org/apache/hadoop/hive/metastore/TestMetastoreExpr.java
@@ -140,10 +140,16 @@ public class TestMetastoreExpr extends TestCase {
     checkExpr(3, dbName, tblName,
         e.val(32).val(31).intCol("p2").val(false).pred("between", 4).build());
 
+    addPartition(client, tbl, Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "36"), "part5");
+    addPartition(client, tbl, Lists.newArrayList("p16", "__HIVE_DEFAULT_PARTITION__"), "part6");
+
     // Apply isnull and instr (not supported by pushdown) via name filtering.
-    checkExpr(4, dbName, tblName, e.val("p").strCol("p1")
+    checkExpr(5, dbName, tblName, e.val("p").strCol("p1")
         .fn("instr", TypeInfoFactory.intTypeInfo, 2).val(0).pred("<=", 2).build());
-    checkExpr(0, dbName, tblName, e.intCol("p2").pred("isnull", 1).build());
+    checkExpr(1, dbName, tblName, e.intCol("p2").pred("isnull", 1).build());
+    checkExpr(1, dbName, tblName, e.val("__HIVE_DEFAULT_PARTITION__").intCol("p2").pred("=",
2).build());
+    checkExpr(5, dbName, tblName, e.intCol("p1").pred("isnotnull", 1).build());
+    checkExpr(5, dbName, tblName, e.val("__HIVE_DEFAULT_PARTITION__").strCol("p1").pred("!=",
2).build());
 
     // Cannot deserialize => throw the specific exception.
     try {

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
index 105511d..badb1ca 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/PartitionExpressionProxy.java
@@ -34,10 +34,13 @@ public interface PartitionExpressionProxy {
 
   /**
    * Converts serialized Hive expression into filter in the format suitable for Filter.g.
-   * @param expr Serialized expression.
-   * @return The filter string.
+   * The isnull and isnotnull expressions are converted to = and != default partition respectively
+   * for push down to metastore SQL.
+   * @param exprBytes Serialized expression.
+   * @param defaultPartitionName Default partition name.
+   * @return Filter string.
    */
-  public String convertExprToFilter(byte[] expr) throws MetaException;
+  String convertExprToFilter(byte[] exprBytes, String defaultPartitionName) throws MetaException;
 
   /**
    * Filters the partition names via serialized Hive expression.

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DefaultPartitionExpressionProxy.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DefaultPartitionExpressionProxy.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DefaultPartitionExpressionProxy.java
index ec543be..5833817 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DefaultPartitionExpressionProxy.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/DefaultPartitionExpressionProxy.java
@@ -30,7 +30,7 @@ import java.util.List;
  */
 public class DefaultPartitionExpressionProxy implements PartitionExpressionProxy {
   @Override
-  public String convertExprToFilter(byte[] expr) throws MetaException {
+  public String convertExprToFilter(byte[] expr, String defaultPartitionName) throws MetaException
{
     throw new UnsupportedOperationException();
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 571c789..0a25b77 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -513,11 +513,17 @@ class MetaStoreDirectSql {
 
   public boolean generateSqlFilterForPushdown(
       Table table, ExpressionTree tree, SqlFilterForPushdown result) throws MetaException
{
+    return generateSqlFilterForPushdown(table, tree, null, result);
+  }
+
+  public boolean generateSqlFilterForPushdown(Table table, ExpressionTree tree, String defaultPartitionName,
+                                              SqlFilterForPushdown result) throws MetaException
{
     // Derby and Oracle do not interpret filters ANSI-properly in some cases and need a workaround.
     boolean dbHasJoinCastBug = DatabaseProduct.hasJoinOperationOrderBug(dbType);
     result.table = table;
     result.filter = PartitionFilterGenerator.generateSqlFilter(table, tree, result.params,
-        result.joins, dbHasJoinCastBug, defaultPartName, dbType, schema);
+            result.joins, dbHasJoinCastBug, ((defaultPartitionName == null) ? defaultPartName
: defaultPartitionName),
+            dbType, schema);
     return result.filter != null;
   }
 
@@ -1306,7 +1312,8 @@ class MetaStoreDirectSql {
         nodeValue = MetaStoreUtils.PARTITION_DATE_FORMAT.get().format(nodeValue);
       }
 
-      if (colType != valType) {
+      boolean isDefaultPartition = (valType == FilterType.String) && defaultPartName.equals(nodeValue);
+      if ((colType != valType) && (!isDefaultPartition)) {
         // It's not clear how filtering for e.g. "stringCol > 5" should work (which side
is
         // to be coerced?). Let the expression evaluation sort this one out, not metastore.
         filterBuffer.setError("Cannot push down filter for "
@@ -1336,7 +1343,7 @@ class MetaStoreDirectSql {
         params.add(nodeValue);
       }
       String tableColumn = tableValue;
-      if (colType != FilterType.String) {
+      if ((colType != FilterType.String) && (!isDefaultPartition)) {
         // The underlying database field is varchar, we need to compare numbers.
         if (colType == FilterType.Integral) {
           tableValue = "cast(" + tableValue + " as decimal(21,0))";

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index 466902d..5372714 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -3384,7 +3384,9 @@ public class ObjectStore implements RawStore, Configurable {
       final String defaultPartitionName, final  short maxParts, List<Partition> result,
       boolean allowSql, boolean allowJdo) throws TException {
     assert result != null;
-    final ExpressionTree exprTree = PartFilterExprUtil.makeExpressionTree(expressionProxy,
expr);
+
+    final ExpressionTree exprTree = PartFilterExprUtil.makeExpressionTree(expressionProxy,
expr,
+                                                    getDefaultPartitionName(defaultPartitionName));
     final AtomicBoolean hasUnknownPartitions = new AtomicBoolean(false);
     result.addAll(new GetListHelper<Partition>(catName, dbName, tblName, allowSql,
allowJdo) {
       @Override
@@ -3393,7 +3395,7 @@ public class ObjectStore implements RawStore, Configurable {
         List<Partition> result = null;
         if (exprTree != null) {
           SqlFilterForPushdown filter = new SqlFilterForPushdown();
-          if (directSql.generateSqlFilterForPushdown(ctx.getTable(), exprTree, filter)) {
+          if (directSql.generateSqlFilterForPushdown(ctx.getTable(), exprTree, defaultPartitionName,
filter)) {
             return directSql.getPartitionsViaSqlFilter(filter, null);
           }
         }
@@ -3425,7 +3427,16 @@ public class ObjectStore implements RawStore, Configurable {
     return hasUnknownPartitions.get();
   }
 
-
+  /**
+   * Gets the default partition name.
+   * @param inputDefaultPartName Incoming default partition name.
+   * @return Valid default partition name
+   */
+  private String getDefaultPartitionName(String inputDefaultPartName) {
+    return (((inputDefaultPartName == null) || (inputDefaultPartName.isEmpty()))
+            ? MetastoreConf.getVar(getConf(), ConfVars.DEFAULTPARTITIONNAME)
+            : inputDefaultPartName);
+  }
 
   /**
    * Gets the partition names from a table, pruned using an expression.
@@ -3440,10 +3451,8 @@ public class ObjectStore implements RawStore, Configurable {
       String defaultPartName, short maxParts, List<String> result) throws MetaException
{
     result.addAll(getPartitionNamesNoTxn(table.getCatName(),
         table.getDbName(), table.getTableName(), maxParts));
-    if (defaultPartName == null || defaultPartName.isEmpty()) {
-      defaultPartName = MetastoreConf.getVar(getConf(), ConfVars.DEFAULTPARTITIONNAME);
-    }
-    return expressionProxy.filterPartitionsByExpr(table.getPartitionKeys(), expr, defaultPartName,
result);
+    return expressionProxy.filterPartitionsByExpr(table.getPartitionKeys(), expr,
+            getDefaultPartitionName(defaultPartName), result);
   }
 
   /**
@@ -3879,7 +3888,7 @@ public class ObjectStore implements RawStore, Configurable {
   @Override
   public int getNumPartitionsByExpr(String catName, String dbName, String tblName,
                                     byte[] expr) throws MetaException, NoSuchObjectException
{
-    final ExpressionTree exprTree = PartFilterExprUtil.makeExpressionTree(expressionProxy,
expr);
+    final ExpressionTree exprTree = PartFilterExprUtil.makeExpressionTree(expressionProxy,
expr, null);
     final byte[] tempExpr = expr; // Need to be final to pass it to an inner class
 
 

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java
index 2fcc162..8b5354d 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartFilterExprUtil.java
@@ -42,12 +42,12 @@ public class PartFilterExprUtil {
 
 
   public static ExpressionTree makeExpressionTree(PartitionExpressionProxy expressionProxy,
-      byte[] expr) throws MetaException {
+      byte[] expr, String defaultPartitionName) throws MetaException {
     // We will try pushdown first, so make the filter. This will also validate the expression,
     // if serialization fails we will throw incompatible metastore error to the client.
     String filter = null;
     try {
-      filter = expressionProxy.convertExprToFilter(expr);
+      filter = expressionProxy.convertExprToFilter(expr, defaultPartitionName);
     } catch (MetaException ex) {
       // TODO MS-SPLIT - for now we have construct this by reflection because IMetaStoreClient
       // can't be

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
index 346fd98..6e8b42d 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/MockPartitionExpressionForMetastore.java
@@ -30,7 +30,7 @@ import java.util.List;
  */
 public class MockPartitionExpressionForMetastore implements PartitionExpressionProxy {
   @Override
-  public String convertExprToFilter(byte[] expr) throws MetaException {
+  public String convertExprToFilter(byte[] expr, String defaultPartitionName) throws MetaException
{
     return null;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/78e45be1/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java
b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java
index 36f91eb..27c5bba 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestOldSchema.java
@@ -66,7 +66,7 @@ public class TestOldSchema {
 
   public static class MockPartitionExpressionProxy implements PartitionExpressionProxy {
     @Override
-    public String convertExprToFilter(byte[] expr) throws MetaException {
+    public String convertExprToFilter(byte[] expr, String defaultPartitionName) throws MetaException
{
       return null;
     }
 


Mime
View raw message