spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marmb...@apache.org
Subject spark git commit: [SPARK-4468][SQL] Fixes Parquet filter creation for inequality predicates with literals on the left hand side
Date Wed, 19 Nov 2014 01:41:56 GMT
Repository: spark
Updated Branches:
  refs/heads/master 7f22fa81e -> 423baea95


[SPARK-4468][SQL] Fixes Parquet filter creation for inequality predicates with literals on
the left hand side

For expressions like `10 < someVar`, we should create an `Operators.Gt` filter, but right
now an `Operators.Lt` is created. This issue affects all inequality predicates with literals
on the left hand side.

(This bug existed before #3317 and affects branch-1.1. #3338 was opened to backport this to
branch-1.1.)

<!-- Reviewable:start -->
[<img src="https://reviewable.io/review_button.png" height=40 alt="Review on Reviewable"/>](https://reviewable.io/reviews/apache/spark/3334)
<!-- Reviewable:end -->

Author: Cheng Lian <lian@databricks.com>

Closes #3334 from liancheng/fix-parquet-comp-filter and squashes the following commits:

0130897 [Cheng Lian] Fixes Parquet comparison filter generation


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/423baea9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/423baea9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/423baea9

Branch: refs/heads/master
Commit: 423baea953996a66dde671ff6db2fb1f32fbe8cb
Parents: 7f22fa8
Author: Cheng Lian <lian@databricks.com>
Authored: Tue Nov 18 17:41:54 2014 -0800
Committer: Michael Armbrust <michael@databricks.com>
Committed: Tue Nov 18 17:41:54 2014 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/parquet/ParquetFilters.scala   |  8 ++++----
 .../apache/spark/sql/parquet/ParquetQuerySuite.scala    | 12 ++++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/423baea9/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
index 3a9e149..6fb5f49 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala
@@ -135,22 +135,22 @@ private[sql] object ParquetFilters {
       case LessThan(NamedExpression(name, _), Literal(value, dataType)) =>
         makeLt.lift(dataType).map(_(name, value))
       case LessThan(Literal(value, dataType), NamedExpression(name, _)) =>
-        makeLt.lift(dataType).map(_(name, value))
+        makeGt.lift(dataType).map(_(name, value))
 
       case LessThanOrEqual(NamedExpression(name, _), Literal(value, dataType)) =>
         makeLtEq.lift(dataType).map(_(name, value))
       case LessThanOrEqual(Literal(value, dataType), NamedExpression(name, _)) =>
-        makeLtEq.lift(dataType).map(_(name, value))
+        makeGtEq.lift(dataType).map(_(name, value))
 
       case GreaterThan(NamedExpression(name, _), Literal(value, dataType)) =>
         makeGt.lift(dataType).map(_(name, value))
       case GreaterThan(Literal(value, dataType), NamedExpression(name, _)) =>
-        makeGt.lift(dataType).map(_(name, value))
+        makeLt.lift(dataType).map(_(name, value))
 
       case GreaterThanOrEqual(NamedExpression(name, _), Literal(value, dataType)) =>
         makeGtEq.lift(dataType).map(_(name, value))
       case GreaterThanOrEqual(Literal(value, dataType), NamedExpression(name, _)) =>
-        makeGtEq.lift(dataType).map(_(name, value))
+        makeLtEq.lift(dataType).map(_(name, value))
 
       case And(lhs, rhs) =>
         (createFilter(lhs) ++ createFilter(rhs)).reduceOption(FilterApi.and)

http://git-wip-us.apache.org/repos/asf/spark/blob/423baea9/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index d31a9d8..7ee4f3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -461,9 +461,21 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
     }
 
     checkFilter[Operators.Eq[Integer]]('a.int === 1)
+    checkFilter[Operators.Eq[Integer]](Literal(1) === 'a.int)
+
     checkFilter[Operators.Lt[Integer]]('a.int < 4)
+    checkFilter[Operators.Lt[Integer]](Literal(4) > 'a.int)
+    checkFilter[Operators.LtEq[Integer]]('a.int <= 4)
+    checkFilter[Operators.LtEq[Integer]](Literal(4) >= 'a.int)
+
+    checkFilter[Operators.Gt[Integer]]('a.int > 4)
+    checkFilter[Operators.Gt[Integer]](Literal(4) < 'a.int)
+    checkFilter[Operators.GtEq[Integer]]('a.int >= 4)
+    checkFilter[Operators.GtEq[Integer]](Literal(4) <= 'a.int)
+
     checkFilter[Operators.And]('a.int === 1 && 'a.int < 4)
     checkFilter[Operators.Or]('a.int === 1 || 'a.int < 4)
+    checkFilter[Operators.Not](!('a.int === 1))
 
     checkFilter('a.int > 'b.int, defined = false)
     checkFilter(('a.int > 'b.int) && ('a.int > 'b.int), defined = false)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message