carbondata-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ravipes...@apache.org
Subject [30/50] [abbrv] incubator-carbondata git commit: [CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0 will be used.
Date Thu, 22 Sep 2016 05:36:28 GMT
[CARBONDATA-250] Filter result is not proper when Double data type values with 0.0 and -0.0
will be used.


Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/c083264a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/c083264a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/c083264a

Branch: refs/heads/branch-0.1
Commit: c083264a730506bc44fc6387e378d08cab8cc334
Parents: 8b6429a
Author: sujith71955 <sujithchacko.2010@gmail.com>
Authored: Sun Sep 18 04:01:10 2016 +0530
Committer: ravipesala <ravi.pesala@gmail.com>
Committed: Thu Sep 22 10:02:11 2016 +0530

----------------------------------------------------------------------
 .../conditional/EqualToExpression.java          |  3 ++-
 .../carbondata/scan/filter/FilterUtil.java      | 21 +++++++++++++++++++-
 .../test/resources/Test_Data1_Logrithmic.csv    |  3 +++
 .../GrtLtFilterProcessorTestCase.scala          | 11 ++++++++++
 4 files changed, 36 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
index 12a3e32..8f7fa0a 100644
--- a/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
+++ b/core/src/main/java/org/apache/carbondata/scan/expression/conditional/EqualToExpression.java
@@ -24,6 +24,7 @@ import org.apache.carbondata.scan.expression.Expression;
 import org.apache.carbondata.scan.expression.ExpressionResult;
 import org.apache.carbondata.scan.expression.exception.FilterIllegalMemberException;
 import org.apache.carbondata.scan.expression.exception.FilterUnsupportedException;
+import org.apache.carbondata.scan.filter.FilterUtil;
 import org.apache.carbondata.scan.filter.intf.ExpressionType;
 import org.apache.carbondata.scan.filter.intf.RowIntf;
 
@@ -78,7 +79,7 @@ public class EqualToExpression extends BinaryConditionalExpression {
         result = val1.getInt().equals(val2.getInt());
         break;
       case DOUBLE:
-        result = val1.getDouble().equals(val2.getDouble());
+        result = FilterUtil.nanSafeEqualsDoubles(val1.getDouble(), val2.getDouble());
         break;
       case TIMESTAMP:
         result = val1.getTime().equals(val2.getTime());

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
index 71ac1bf..b7cacb1 100644
--- a/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
+++ b/core/src/main/java/org/apache/carbondata/scan/filter/FilterUtil.java
@@ -1390,6 +1390,26 @@ public final class FilterUtil {
   }
 
   /**
+   * This method will compare double values for its equality and also it will preserve
+   * the -0.0 and 0.0 equality as per == ,also preserve NaN equality check as per
+   * java.lang.Double.equals()
+   *
+   * @param d1 double value for equality check
+   * @param d2 double value for equality check
+   * @return boolean after comparing two double values.
+   */
+  public static boolean nanSafeEqualsDoubles(Double d1, Double d2) {
+    Boolean xIsNan = Double.isNaN(d1);
+    Boolean yIsNan = Double.isNaN(d2);
+    if ((xIsNan && yIsNan) || (d1.doubleValue() == d2.doubleValue())) {
+
+      return true;
+    }
+    return false;
+
+  }
+
+  /**
    * This method will prepare the list with all unknown expressions
    *
    * @param expression
@@ -1406,5 +1426,4 @@ public final class FilterUtil {
       getUnknownExpressionsList(child, lst);
     }
   }
-
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
new file mode 100644
index 0000000..0f0312d
--- /dev/null
+++ b/integration/spark/src/test/resources/Test_Data1_Logrithmic.csv
@@ -0,0 +1,3 @@
+c1_int,c2_Bigint,c3_Decimal,c4_double,c5_string,c6_Timestamp,c7_Datatype_Desc
+2147483646,9223372036854775807,0.12345678900987654321123456789012345638,1.7976931348623157E308,SQL
Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can
run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition
Language statements to define and create tables. Carbon DDL is highly flexible and is very
easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon
supports a variety of data management functions for loading data to table and maintaining
the data in table. Carbon supports bulkloading historical data as well as incrementally loading
new data. Loaded data can be deleted based on load time or a specific load can be undone Carbon
file format is a columnar store in HDFS  it has many features that a modern columnar format
hasz such as splittablez compression schema zcomplex data type and so on. Carbon has following
unique features Stores data along with index: it can signi
 ficantly accelerate query performance and reduces the scans and CPU resources where there
are filters in the query. Carbon index consists of multiple level of indicesz a processing
framework can leverage this index to reduce the task it needs to schedule and processz and
it can also do skip scan in more finer grain unit in task side scanning instead of scanning
the whole file.Operable encoded data Through supporting efficient compression and global encoding
schemes  can query on compressed encoded dataz the data can be converted to encoded data just
before returning the results to the usersz which is Column group Allow multiple columns to
form a column group that would be stored as row format. This reduces the row reconstruction
cost at query time Supports for various use cases with one single Data format  like interactive
OLAP-style queryz Sequential Access SQL Capability Carbon is fully compliant with Spark SQL
and supports all SQL queries which can run directly on Spark SQL Easy 
 Table Definition: Carbon supports easy to use DDL Data Definition Language statements to
define and create tables. Carbon DDL is highly flexible and is very easy to use as well as
powerful enough to define complex tables Easy Data anagement  Carbon supports a variety of
data management functions for loading data to table and maintaining the data in table. Carbon
supports bulkloading historical data as well as incrementally loading new data. Loaded data
can be deleted based on load time or a specific load can be undone  it can significantly accelerate
query performance and reduces the scans and CPU resources where there are filters in the query.
Carbon index consists of multiple level of indicesz a processing framework can leverage this
index to reduce the task it needs to schedule and processz and it can also do skip scan in
more finer grain unit in task side scanning instead of scanning the whole file Operable encoded
data Through supporting efficient compression and global encodin
 g schemes  can query on compressed encoded dataz the data can be converted to encoded data
just before returning the results to the usersz which is Column group Allow multiple columns
to form a column group that would be stored as row format This reduces the row reconstruction
cost at query time Supports for various use cases with one single  Sequential Access Carbon
has following unique features Stores data along with index: it can significantly accelerate
query performance and reduces the scans and CPU resources where there are filters in the query.
Carbon index consists of multiple level of indicesz a processing framework can leverage this
index to reduce the task it needs to schedule and processz and it can also do skip scan in
more finer grain unit in task side scanning instead of scanning the whole file Operable encoded
data Through supporting efficient compression and global encoding schemes  can query on compressed
encoded dataz the data can be converted to encoded data just
  before returning the results to the users which is Column group Allow multiple columns to
form a column group that would be stored as row format Allow multiple columns to form format
Allow carbon is fu,2017-07-01 12:07:28,Max_range_values-1
+2147483646,9223372036854775807,12345678900987654321123456789012345678,1.7976931348623157E308,SQL
Capability Carbon is fully compliant with Spark SQL and supports all SQL queries which can
run directly on Spark SQL Easy Table Definition: Carbon supports easy to use DDL Data Definition
Language statements to define and create tables. Carbon DDL is highly flexible and is very
easy to use as well as powerful enough to define complex tables Easy Data anagement  Carbon
supports a variety of data management functions for loading data to table and maintaining
the data in table. Carbon supports bulkloading historical data as well as incrementally loading
new data. Loaded data can be deleted based on load time or a specific load can be undone Carbon
file format is a columnar store in HDFS  it has many features that a modern columnar format
hasz such as splittablez compression schema zcomplex data type and so on. Carbon has following
unique features Stores data along with index: it can signifi
 cantly accelerate query performance and reduces the scans and CPU resources where there are
filters in the query. Carbon index consists of multiple level of indicesz a processing framework
can leverage this index to reduce the task it needs to schedule and processz and it can also
do skip scan in more finer grain unit in task side scanning instead of scanning the whole
file.Operable encoded data Through supporting efficient compression and global encoding schemes
 can query on compressed encoded dataz the data can be converted to encoded data just before
returning the results to the usersz which is Column group Allow multiple columns to form a
column group that would be stored as row format. This reduces the row reconstruction cost
at query time Supports for various use cases with one single Data format  like interactive
OLAP-style queryz Sequential Access SQL Capability Carbon is fully compliant with Spark SQL
and supports all SQL queries which can run directly on Spark SQL Easy Ta
 ble Definition: Carbon supports easy to use DDL Data Definition Language statements to define
and create tables. Carbon DDL is highly flexible and is very easy to use as well as powerful
enough to define complex tables Easy Data anagement  Carbon supports a variety of data management
functions for loading data to table and maintaining the data in table. Carbon supports bulkloading
historical data as well as incrementally loading new data. Loaded data can be deleted based
on load time or a specific load can be undone  it can significantly accelerate query performance
and reduces the scans and CPU resources where there are filters in the query. Carbon index
consists of multiple level of indicesz a processing framework can leverage this index to reduce
the task it needs to schedule and processz and it can also do skip scan in more finer grain
unit in task side scanning instead of scanning the whole file Operable encoded data Through
supporting efficient compression and global encoding 
 schemes  can query on compressed encoded dataz the data can be converted to encoded data
just before returning the results to the usersz which is Column group Allow multiple columns
to form a column group that would be stored as row format This reduces the row reconstruction
cost at query time Supports for various use cases with one single  Sequential Access Carbon
has following unique features Stores data along with index: it can significantly accelerate
query performance and reduces the scans and CPU resources where there are filters in the query.
Carbon index consists of multiple level of indicesz a processing framework can leverage this
index to reduce the task it needs to schedule and processz and it can also do skip scan in
more finer grain unit in task side scanning instead of scanning the whole file Operable encoded
data Through supporting efficient compression and global encoding schemes  can query on compressed
encoded dataz the data can be converted to encoded data just b
 efore returning the results to the users which is Column group Allow multiple columns to
form a column group that would be stored as row format Allow multiple columns to form format
Allow carbon is f,2017-07-01 12:07:28,Max_range_values-2

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/c083264a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
index 5278344..b33b65f 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/filterexpr/GrtLtFilterProcessorTestCase.scala
@@ -38,6 +38,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll
{
     sql("drop table if exists a12")
     sql("drop table if exists a12_allnull")
     sql("drop table if exists a12_no_null")
+     sql("drop table if exists Test_Boundary1")
 
     sql(
       "create table a12(empid String,ename String,sal double,deptno int,mgr string,gender
string," +
@@ -53,6 +54,7 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll
{
         " string," +
         "dob timestamp,comm decimal(4,2),desc string) stored by 'org.apache.carbondata.format'"
     )
+    sql("create table Test_Boundary1 (c1_int int,c2_Bigint Bigint,c3_Decimal Decimal(38,38),c4_double
double,c5_string string,c6_Timestamp Timestamp,c7_Datatype_Desc string) STORED BY 'org.apache.carbondata.format'")
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, "yyyy-MM-dd HH:mm:ss")
     val basePath = new File(this.getClass.getResource("/").getPath + "/../../")
@@ -77,6 +79,9 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll
{
          'QUOTECHAR'='"')"""
         .stripMargin
     )
+    
+    sql(
+      s"LOAD DATA INPATH './src/test/resources/Test_Data1_Logrithmic.csv' INTO table Test_Boundary1
OPTIONS('DELIMITER'=',','QUOTECHAR'='','FILEHEADER'='')")
   }
   //mixed value test
   test("Less Than Filter") {
@@ -99,6 +104,12 @@ class GrtLtFilterProcessorTestCase extends QueryTest with BeforeAndAfterAll
{
       Seq(Row(3))
     )
   }
+  test("0.0 and -0.0 equality check for double data type applying log function") {
+    checkAnswer(
+      sql("select log(c4_double,1) from Test_Boundary1 where log(c4_double,1)= -0.0"),
+      Seq(Row(0.0),Row(0.0))
+    )
+  }
 
   test("Greater Than equal to Filter") {
     sql("select count (empid) from a12 where dob >= '2014-07-01 12:07:28'").show()


Mime
View raw message