spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wenc...@apache.org
Subject spark git commit: [SPARK-21787][SQL] Support for pushing down filters for DateType in native OrcFileFormat
Date Fri, 08 Dec 2017 01:52:29 GMT
Repository: spark
Updated Branches:
  refs/heads/master aa1764ba1 -> 0ba8f4b21


[SPARK-21787][SQL] Support for pushing down filters for DateType in native OrcFileFormat

## What changes were proposed in this pull request?

This PR support for pushing down filters for DateType in ORC

## How was this patch tested?

Pass the Jenkins with newly add and updated test cases.

Author: Dongjoon Hyun <dongjoon@apache.org>

Closes #18995 from dongjoon-hyun/SPARK-21787.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ba8f4b2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ba8f4b2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ba8f4b2

Branch: refs/heads/master
Commit: 0ba8f4b21167fe98a9f06036c4fc4ccd8e3287b4
Parents: aa1764b
Author: Dongjoon Hyun <dongjoon@apache.org>
Authored: Fri Dec 8 09:52:16 2017 +0800
Committer: Wenchen Fan <wenchen@databricks.com>
Committed: Fri Dec 8 09:52:16 2017 +0800

----------------------------------------------------------------------
 .../execution/datasources/orc/OrcFilters.scala  |  3 +-
 .../datasources/orc/OrcFilterSuite.scala        | 29 ++++++++++++++++----
 2 files changed, 25 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0ba8f4b2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
index cec256c..4f44ae4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
@@ -82,8 +82,7 @@ private[orc] object OrcFilters {
    * Both CharType and VarcharType are cleaned at AstBuilder.
    */
   private def isSearchableType(dataType: DataType) = dataType match {
-    // TODO: SPARK-21787 Support for pushing down filters for DateType in ORC
-    case BinaryType | DateType => false
+    case BinaryType => false
     case _: AtomicType => true
     case _ => false
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/0ba8f4b2/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index a5f6b68..8680b86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -316,6 +316,30 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     }
   }
 
+  test("filter pushdown - date") {
+    val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
+      Date.valueOf(day)
+    }
+    withOrcDataFrame(dates.map(Tuple1(_))) { implicit df =>
+      checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
+
+      checkFilterPredicate('_1 === dates(0), PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate('_1 <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+
+      checkFilterPredicate('_1 < dates(1), PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate('_1 > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate('_1 >= dates(3), PredicateLeaf.Operator.LESS_THAN)
+
+      checkFilterPredicate(Literal(dates(0)) === '_1, PredicateLeaf.Operator.EQUALS)
+      checkFilterPredicate(Literal(dates(0)) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
+      checkFilterPredicate(Literal(dates(1)) > '_1, PredicateLeaf.Operator.LESS_THAN)
+      checkFilterPredicate(Literal(dates(2)) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(0)) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
+      checkFilterPredicate(Literal(dates(3)) <= '_1, PredicateLeaf.Operator.LESS_THAN)
+    }
+  }
+
   test("no filter pushdown - non-supported types") {
     implicit class IntToBinary(int: Int) {
       def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
@@ -328,11 +352,6 @@ class OrcFilterSuite extends OrcTest with SharedSQLContext {
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
       checkNoFilterPredicate('_1 <=> 1.b)
     }
-    // DateType
-    val stringDate = "2015-01-01"
-    withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
-      checkNoFilterPredicate('_1 === Date.valueOf(stringDate))
-    }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
       checkNoFilterPredicate('_1.isNotNull)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message