spark-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From dongj...@apache.org
Subject spark git commit: [SPARK-26233][SQL] CheckOverflow when encoding a decimal value
Date Tue, 04 Dec 2018 18:33:42 GMT
Repository: spark
Updated Branches:
  refs/heads/master f982ca07e -> 556d83e0d


[SPARK-26233][SQL] CheckOverflow when encoding a decimal value

## What changes were proposed in this pull request?

When we encode a Decimal from external source we don't check for overflow. That method is
useful not only in order to enforce that we can represent the correct value in the specified
range, but it also changes the underlying data to the right precision/scale. Since in our
code generation we assume that a decimal has exactly the same precision and scale of its data
type, missing to enforce it can lead to corrupted output/results when there are subsequent
transformations.

## How was this patch tested?

added UT

Closes #23210 from mgaido91/SPARK-26233.

Authored-by: Marco Gaido <marcogaido91@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/556d83e0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/556d83e0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/556d83e0

Branch: refs/heads/master
Commit: 556d83e0d87a8f899f29544eb5ca4999a84c96c1
Parents: f982ca0
Author: Marco Gaido <marcogaido91@gmail.com>
Authored: Tue Dec 4 10:33:27 2018 -0800
Committer: Dongjoon Hyun <dongjoon@apache.org>
Committed: Tue Dec 4 10:33:27 2018 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/encoders/RowEncoder.scala | 4 ++--
 .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala  | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/556d83e0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index d905f8f..8ca3d35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -106,12 +106,12 @@ object RowEncoder {
         returnNullable = false)
 
     case d: DecimalType =>
-      StaticInvoke(
+      CheckOverflow(StaticInvoke(
         Decimal.getClass,
         d,
         "fromDecimal",
         inputObject :: Nil,
-        returnNullable = false)
+        returnNullable = false), d)
 
     case StringType =>
       StaticInvoke(

http://git-wip-us.apache.org/repos/asf/spark/blob/556d83e0/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 0f90083..525c7ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1647,6 +1647,15 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkDataset(ds, data: _*)
     checkAnswer(ds.select("x"), Seq(Row(1), Row(2)))
   }
+
+  test("SPARK-26233: serializer should enforce decimal precision and scale") {
+    val s = StructType(Seq(StructField("a", StringType), StructField("b", DecimalType(38,
8))))
+    val encoder = RowEncoder(s)
+    implicit val uEnc = encoder
+    val df = spark.range(2).map(l => Row(l.toString, BigDecimal.valueOf(l + 0.1111)))
+    checkAnswer(df.groupBy(col("a")).agg(first(col("b"))),
+      Seq(Row("0", BigDecimal.valueOf(0.1111)), Row("1", BigDecimal.valueOf(1.1111))))
+  }
 }
 
 case class TestDataUnion(x: Int, y: Int, z: Int)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


Mime
View raw message