tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jihoon...@apache.org
Subject [10/17] git commit: TAJO-994: 'count(distinct x)' function counts first null value.
Date Mon, 11 Aug 2014 08:11:39 GMT
TAJO-994: 'count(distinct x)' function counts first null value.

Closes #106


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fcc5da03
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fcc5da03
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fcc5da03

Branch: refs/heads/index_support
Commit: fcc5da03ad1e9a692efad20aaf90a4005f9084d0
Parents: 189cf3f
Author: Hyunsik Choi <hyunsik@apache.org>
Authored: Mon Aug 11 11:22:59 2014 +0900
Committer: Hyunsik Choi <hyunsik@apache.org>
Committed: Mon Aug 11 11:22:59 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  2 +
 .../function/builtin/CountValueDistinct.java    |  4 +-
 .../tajo/engine/query/TestGroupByQuery.java     | 46 ++++++++++++++------
 .../testDistinctAggregationCaseByCase3.sql      |  8 ++++
 .../testDistinctAggregationCaseByCase4.sql      |  7 +++
 .../testDistinctAggregationCaseByCase3.result   |  3 ++
 .../testDistinctAggregationCaseByCase4.result   |  3 ++
 7 files changed, 57 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 7487983..864eaef 100644
--- a/CHANGES
+++ b/CHANGES
@@ -106,6 +106,8 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-994: 'count(distinct x)' function counts first null value. (hyunsik)
+
     TAJO-996: Sometimes, scheduleFetchesByEvenDistributedVolumes loses
     some FetchImpls. (hyunsik)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java
b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java
index c0f5ec3..7a7f3d8 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/builtin/CountValueDistinct.java
@@ -24,7 +24,6 @@ import org.apache.tajo.common.TajoDataTypes.Type;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.datum.DatumFactory;
 import org.apache.tajo.datum.Int8Datum;
-import org.apache.tajo.datum.NullDatum;
 import org.apache.tajo.engine.function.FunctionContext;
 import org.apache.tajo.engine.function.annotation.Description;
 import org.apache.tajo.engine.function.annotation.ParamTypes;
@@ -57,7 +56,8 @@ public final class CountValueDistinct extends CountRows {
   public void merge(FunctionContext context, Tuple part) {
     CountDistinctValueContext distinctContext = (CountDistinctValueContext) context;
     Datum value = part.get(0);
-    if ((distinctContext.latest == null || (!distinctContext.latest.equals(value)) &&
!(value instanceof NullDatum))) {
+
+    if (!value.isNull() && (distinctContext.latest == null || (!distinctContext.latest.equals(value))))
{
       distinctContext.latest = value;
       distinctContext.count++;
     }

http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 79efd92..fe9f990 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -342,7 +342,7 @@ public class TestGroupByQuery extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testDistinctAggregationCasebyCase2() throws Exception {
+  public final void testDistinctAggregationCaseByCase3() throws Exception {
     // first distinct is smaller than second distinct.
     KeyValueSet tableOptions = new KeyValueSet();
     tableOptions.put(StorageConstants.CSVFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
@@ -364,22 +364,40 @@ public class TestGroupByQuery extends QueryTestCaseBase {
 
     TajoTestingCluster.createTable("table10", schema, tableOptions, data);
 
-    ResultSet res = executeString(
-        "select col1 \n" +
-            ",count(distinct col2) as cnt1\n" +
-            ",count(distinct case when col3 is not null then col2 else null end) as cnt2\n"
+
-            "from table10 \n" +
-            "group by col1"
-    );
-    String result = resultSetToString(res);
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
 
-    String expected = "col1,cnt1,cnt2\n" +
-        "-------------------------------\n" +
-        "a,3,1\n";
+    executeString("DROP TABLE table10 PURGE").close();
+  }
 
-    assertEquals(expected, result);
+  @Test
+  public final void testDistinctAggregationCaseByCase4() throws Exception {
+    // Reproduction case for TAJO-994
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.put(StorageConstants.CSVFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.put(StorageConstants.CSVFILE_NULL, "\\\\N");
 
-    executeString("DROP TABLE table10 PURGE").close();
+    Schema schema = new Schema();
+    schema.addColumn("col1", Type.TEXT);
+    schema.addColumn("col2", Type.TEXT);
+
+    String[] data = new String[]{
+        "a|\\N",
+        "a|\\N|",
+        "a|\\N|",
+        "a|\\N|",
+        "a|\\N|",
+        "a|\\N|"
+    };
+
+    TajoTestingCluster.createTable("table11", schema, tableOptions, data);
+
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+
+    executeString("DROP TABLE table11 PURGE").close();
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql
b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql
new file mode 100644
index 0000000..925cadd
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase3.sql
@@ -0,0 +1,8 @@
+select
+  col1,
+  count(distinct col2) as cnt1,
+  count(distinct case when col3 is not null then col2 else null end) as cnt2
+from
+  table10
+group by
+  col1;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql
b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql
new file mode 100644
index 0000000..352f21e
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregationCaseByCase4.sql
@@ -0,0 +1,7 @@
+select
+  col1,
+  count(distinct col2) as cnt
+from
+  table11
+group by
+  col1;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result
b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result
new file mode 100644
index 0000000..320ab29
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase3.result
@@ -0,0 +1,3 @@
+col1,cnt1,cnt2
+-------------------------------
+a,3,0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/fcc5da03/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result
b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result
new file mode 100644
index 0000000..a65e71f
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregationCaseByCase4.result
@@ -0,0 +1,3 @@
+col1,cnt
+-------------------------------
+a,0
\ No newline at end of file


Mime
View raw message