carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [03/47] incubator-carbondata git commit: Problem: Value displayed as Null after increase in precision for decimal datatype after aggregation (#845)
Date Mon, 01 Aug 2016 10:05:01 GMT
Problem: Value displayed as Null after increase in precision for decimal datatype after aggregation
(#845)

While creating a table if user specifies a precision for decimal datatype column and then
executes an aggregation query on that column then in case if after aggregation the precision
of the resulted value becomes greater than the user configure precision value, spark displays
the aggregated value as null

Solution: Set the precision of the aggregated result as the precision for the decimal type
while sending the aggregated result to spark

Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/6b7b41af
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/6b7b41af
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/6b7b41af

Branch: refs/heads/master
Commit: 6b7b41af98c161f28814aa4b08e5d2eb913217aa
Parents: a5714ac
Author: manishgupta88 <tomanishgupta18@gmail.com>
Authored: Thu Jul 21 21:55:40 2016 +0530
Committer: sujith71955 <sujithchacko.2010@gmail.com>
Committed: Thu Jul 21 21:55:40 2016 +0530

----------------------------------------------------------------------
 .../carbondata/spark/agg/CarbonAggregates.scala | 21 +++++++++++++++++---
 .../carbondata/spark/util/CarbonScalaUtil.scala | 12 +++++++++++
 .../resources/decimalBoundaryDataCarbon.csv     | 12 +++++++++++
 .../test/resources/decimalBoundaryDataHive.csv  | 11 ++++++++++
 .../testsuite/bigdecimal/TestBigDecimal.scala   | 18 +++++++++++++++++
 5 files changed, 71 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b7b41af/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
b/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
index 1a52688..de685ab 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
@@ -17,6 +17,8 @@
 
 package org.carbondata.spark.agg
 
+import java.math.BigDecimal
+
 import scala.language.implicitConversions
 
 import org.apache.spark.sql.catalyst.InternalRow
@@ -26,6 +28,7 @@ import org.apache.spark.sql.types._
 
 import org.carbondata.query.aggregator.MeasureAggregator
 import org.carbondata.query.aggregator.impl._
+import org.carbondata.spark.util.CarbonScalaUtil
 
 case class CountCarbon(child: Expression) extends UnaryExpression with PartialAggregate1
{
   override def references: AttributeSet = child.references
@@ -366,7 +369,10 @@ case class AverageFunctionCarbon(expr: Expression, base: AggregateExpression1,
f
       } else {
         avg match {
           case avg: AvgBigDecimalAggregator =>
-            Cast(Literal(avg.getBigDecimalValue), base.dataType).eval(null)
+            val decimalValue: BigDecimal = avg.getBigDecimalValue
+            val updatedDataType = CarbonScalaUtil
+              .getDecimalDataTypeWithUpdatedPrecision(decimalValue, base.dataType)
+            Cast(Literal(decimalValue), updatedDataType).eval(null)
           case avg: AvgLongAggregator =>
             Cast(Literal(avg.getDoubleValue), base.dataType).eval(null)
           case avg: AvgTimestampAggregator =>
@@ -483,7 +489,10 @@ case class SumFunctionCarbon(expr: Expression, base: AggregateExpression1,
final
       } else {
         sum match {
           case s: SumBigDecimalAggregator =>
-            Cast(Literal(sum.getBigDecimalValue), base.dataType).eval(input)
+            val decimalValue: BigDecimal = sum.getBigDecimalValue
+            val updatedDataType = CarbonScalaUtil
+              .getDecimalDataTypeWithUpdatedPrecision(decimalValue, base.dataType)
+            Cast(Literal(decimalValue), updatedDataType).eval(input)
           case s: SumLongAggregator =>
             Cast(Literal(sum.getLongValue), base.dataType).eval(input)
           case _ =>
@@ -680,7 +689,13 @@ case class SumDisctinctFunctionCarbon(expr: Expression, base: AggregateExpressio
         null
       }
       else {
-      Cast(Literal(distinct.getValueObject), base.dataType).eval(null)
+        val updatedDataType = base.dataType match {
+          case decimal: DecimalType =>
+            CarbonScalaUtil
+              .getDecimalDataTypeWithUpdatedPrecision(distinct.getBigDecimalValue, base.dataType)
+          case _ => base.dataType
+        }
+        Cast(Literal(distinct.getValueObject), updatedDataType).eval(null)
       }
     }
     else {

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b7b41af/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
b/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
index 6cd9986..20d45dc 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
@@ -105,6 +105,18 @@ object CarbonScalaUtil {
     }
   }
 
+  def getDecimalDataTypeWithUpdatedPrecision(decimalValue: java.math.BigDecimal,
+      currentDataType: org.apache.spark.sql.types.DataType): org.apache.spark.sql.types.DataType
= {
+    var newDataType: org.apache.spark.sql.types.DataType = currentDataType
+    if (null != decimalValue) {
+      val precision = decimalValue.precision
+      if (precision <= DecimalType.MAX_PRECISION) {
+        newDataType = DecimalType(precision, decimalValue.scale())
+      }
+    }
+    newDataType
+  }
+
 
   case class TransformHolder(rdd: Any, mataData: CarbonMetaData)
 

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b7b41af/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv b/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
new file mode 100644
index 0000000..c64a9bf
--- /dev/null
+++ b/integration/spark/src/test/resources/decimalBoundaryDataCarbon.csv
@@ -0,0 +1,12 @@
+ID,date,country,name,phonetype,serialname,salary
+1,2015/7/23,china,aaa1,phone197,ASD69643,12345678901234510.0000000000
+2,2015/7/24,china,aaa2,phone756,ASD42892,12345678901234520.0000000000
+3,2015/7/25,china,aaa3,phone1904,ASD37014,12345678901234530.0000000000
+4,2015/7/26,china,aaa4,phone2435,ASD66902,12345678901234560.0000000000
+5,2015/7/27,china,aaa5,phone2441,ASD90633,22345678901234560.0000000000
+6,2015/7/28,china,aaa6,phone294,ASD59961,32345678901234560.0000000000
+7,2015/7/29,china,aaa7,phone610,ASD14875,42345678901234560.0000000000
+8,2015/7/30,china,aaa8,phone1848,ASD57308,52345678901234560.0000000000
+9,2015/7/18,china,aaa9,phone706,ASD86717,62345678901234560.0000000000
+10,2015/7/19,usa,aaa10,phone685,ASD30505,72345678901234560.0000000000
+11,2015/7/18,china,aaa11,phone1554,ASD26101,82345678901234560.0000000000

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b7b41af/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/decimalBoundaryDataHive.csv b/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
new file mode 100644
index 0000000..a2faaf1
--- /dev/null
+++ b/integration/spark/src/test/resources/decimalBoundaryDataHive.csv
@@ -0,0 +1,11 @@
+1,2015/7/23,china,aaa1,phone197,ASD69643,12345678901234510.0000000000
+2,2015/7/24,china,aaa2,phone756,ASD42892,12345678901234520.0000000000
+3,2015/7/25,china,aaa3,phone1904,ASD37014,12345678901234530.0000000000
+4,2015/7/26,china,aaa4,phone2435,ASD66902,12345678901234560.0000000000
+5,2015/7/27,china,aaa5,phone2441,ASD90633,22345678901234560.0000000000
+6,2015/7/28,china,aaa6,phone294,ASD59961,32345678901234560.0000000000
+7,2015/7/29,china,aaa7,phone610,ASD14875,42345678901234560.0000000000
+8,2015/7/30,china,aaa8,phone1848,ASD57308,52345678901234560.0000000000
+9,2015/7/18,china,aaa9,phone706,ASD86717,62345678901234560.0000000000
+10,2015/7/19,usa,aaa10,phone685,ASD30505,72345678901234560.0000000000
+11,2015/7/18,china,aaa11,phone1554,ASD26101,82345678901234560.0000000000

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/6b7b41af/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
index 3763e35..95dd1b4 100644
--- a/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
+++ b/integration/spark/src/test/scala/org/carbondata/spark/testsuite/bigdecimal/TestBigDecimal.scala
@@ -108,6 +108,24 @@ class TestBigDecimal extends QueryTest with BeforeAndAfterAll {
     checkAnswer(sql("select salary from carbonTable where salary<=45234525465882.24"),
       sql("select salary from hiveTable where salary<=45234525465882.24"))
   }
+
+  test("test aggregation on big decimal column with increased precision") {
+    sql("drop table if exists carbonBigDecimal")
+    sql("drop table if exists hiveBigDecimal")
+    sql("create table if not exists carbonBigDecimal (ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary decimal(27, 10)) STORED BY 'org.apache.carbondata.format'")
+    sql("create table if not exists hiveBigDecimal(ID Int, date Timestamp, country String,
name String, phonetype String, serialname String, salary decimal(27, 10))row format delimited
fields terminated by ','")
+    sql("LOAD DATA LOCAL INPATH './src/test/resources/decimalBoundaryDataCarbon.csv' into
table carbonBigDecimal")
+    sql("LOAD DATA local inpath './src/test/resources/decimalBoundaryDataHive.csv' INTO table
hiveBigDecimal")
+
+    checkAnswer(sql("select sum(salary) from carbonBigDecimal"),
+      sql("select sum(salary) from hiveBigDecimal"))
+
+    checkAnswer(sql("select sum(distinct salary) from carbonBigDecimal"),
+      sql("select sum(distinct salary) from hiveBigDecimal"))
+
+    sql("drop table if exists carbonBigDecimal")
+    sql("drop table if exists hiveBigDecimal")
+  }
   
 
   override def afterAll {


Mime
View raw message