spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-22464][SQL] No pushdown for Hive metastore partition predicates containing null-safe equality
Date Tue, 07 Nov 2017 20:57:49 GMT
Repository: spark
Updated Branches:
  refs/heads/master 1d341042d -> 0846a4473


[SPARK-22464][SQL] No pushdown for Hive metastore partition predicates containing null-safe
equality

## What changes were proposed in this pull request?
`<=>` is not supported by Hive metastore partition predicate pushdown. We should not
push down it to Hive metastore when they are be using in partition predicates.

## How was this patch tested?
Added a test case

Author: gatorsmile <gatorsmile@gmail.com>

Closes #19682 from gatorsmile/fixLimitPushDown.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0846a447
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0846a447
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0846a447

Branch: refs/heads/master
Commit: 0846a44736d9a71ba3234ad5de4c8de9e7fe9f6c
Parents: 1d34104
Author: gatorsmile <gatorsmile@gmail.com>
Authored: Tue Nov 7 21:57:43 2017 +0100
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Tue Nov 7 21:57:43 2017 +0100

----------------------------------------------------------------------
 .../apache/spark/sql/hive/client/HiveShim.scala | 29 ++++++++++++++------
 .../spark/sql/hive/client/HiveClientSuite.scala |  9 ++++++
 2 files changed, 30 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0846a447/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 5c1ff2b..bd1b300 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -592,6 +592,19 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     }
   }
 
+
+  /**
+   * An extractor that matches all binary comparison operators except null-safe equality.
+   *
+   * Null-safe equality is not supported by Hive metastore partition predicate pushdown
+   */
+  object SpecialBinaryComparison {
+    def unapply(e: BinaryComparison): Option[(Expression, Expression)] = e match {
+      case _: EqualNullSafe => None
+      case _ => Some((e.left, e.right))
+    }
+  }
+
   private def convertBasicFilters(table: Table, filters: Seq[Expression]): String = {
     // hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
     lazy val varcharKeys = table.getPartitionKeys.asScala
@@ -600,14 +613,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       .map(col => col.getName).toSet
 
     filters.collect {
-      case op @ BinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
+      case op @ SpecialBinaryComparison(a: Attribute, Literal(v, _: IntegralType)) =>
         s"${a.name} ${op.symbol} $v"
-      case op @ BinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
+      case op @ SpecialBinaryComparison(Literal(v, _: IntegralType), a: Attribute) =>
         s"$v ${op.symbol} ${a.name}"
-      case op @ BinaryComparison(a: Attribute, Literal(v, _: StringType))
+      case op @ SpecialBinaryComparison(a: Attribute, Literal(v, _: StringType))
         if !varcharKeys.contains(a.name) =>
         s"""${a.name} ${op.symbol} ${quoteStringLiteral(v.toString)}"""
-      case op @ BinaryComparison(Literal(v, _: StringType), a: Attribute)
+      case op @ SpecialBinaryComparison(Literal(v, _: StringType), a: Attribute)
         if !varcharKeys.contains(a.name) =>
         s"""${quoteStringLiteral(v.toString)} ${op.symbol} ${a.name}"""
     }.mkString(" and ")
@@ -666,16 +679,16 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       case InSet(a: Attribute, ExtractableValues(values))
           if !varcharKeys.contains(a.name) && values.nonEmpty =>
         convertInToOr(a, values)
-      case op @ BinaryComparison(a: Attribute, ExtractableLiteral(value))
+      case op @ SpecialBinaryComparison(a: Attribute, ExtractableLiteral(value))
           if !varcharKeys.contains(a.name) =>
         s"${a.name} ${op.symbol} $value"
-      case op @ BinaryComparison(ExtractableLiteral(value), a: Attribute)
+      case op @ SpecialBinaryComparison(ExtractableLiteral(value), a: Attribute)
           if !varcharKeys.contains(a.name) =>
         s"$value ${op.symbol} ${a.name}"
-      case op @ And(expr1, expr2)
+      case And(expr1, expr2)
           if convert.isDefinedAt(expr1) || convert.isDefinedAt(expr2) =>
         (convert.lift(expr1) ++ convert.lift(expr2)).mkString("(", " and ", ")")
-      case op @ Or(expr1, expr2)
+      case Or(expr1, expr2)
           if convert.isDefinedAt(expr1) && convert.isDefinedAt(expr2) =>
         s"(${convert(expr1)} or ${convert(expr2)})"
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/0846a447/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 3eedcf7..ce53ace 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -78,6 +78,15 @@ class HiveClientSuite(version: String)
     assert(filteredPartitions.size == testPartitionCount)
   }
 
+  test("getPartitionsByFilter: ds<=>20170101") {
+    // Should return all partitions where <=> is not supported
+    testMetastorePartitionFiltering(
+      "ds<=>20170101",
+      20170101 to 20170103,
+      0 to 23,
+      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+  }
+
   test("getPartitionsByFilter: ds=20170101") {
     testMetastorePartitionFiltering(
       "ds=20170101",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message