tajo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hyun...@apache.org
Subject git commit: TAJO-833: NPE occurs when using the column as a alias name in the multiple DISTINCT. (Hyoungjun Kim via hyunsik)
Date Wed, 21 May 2014 02:25:26 GMT
Repository: tajo
Updated Branches:
  refs/heads/branch-0.8.1 49bc60b4d -> f0ee8d59e


TAJO-833: NPE occurs when using the column as a alias name in the multiple DISTINCT. (Hyoungjun
Kim via hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f0ee8d59
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f0ee8d59
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f0ee8d59

Branch: refs/heads/branch-0.8.1
Commit: f0ee8d59e9b2873c93c421007bb8f53f0f1b4af3
Parents: 49bc60b
Author: Hyunsik Choi <hyunsik@apache.org>
Authored: Wed May 21 11:24:49 2014 +0900
Committer: Hyunsik Choi <hyunsik@apache.org>
Committed: Wed May 21 11:25:11 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 +
 .../org/apache/tajo/engine/eval/FieldEval.java  |  7 ++-
 .../global/builder/DistinctGroupbyBuilder.java  | 22 +++++--
 .../planner/physical/AggregationExec.java       |  6 +-
 .../DistinctGroupbyHashAggregationExec.java     | 62 ++++++++++++++++----
 .../DistinctGroupbySortAggregationExec.java     |  7 ---
 .../planner/physical/SortAggregateExec.java     |  1 -
 .../tajo/engine/query/TestGroupByQuery.java     |  4 ++
 .../testDistinctAggregation_case8.sql           | 10 ++++
 .../testDistinctAggregation_case8.result        |  6 ++
 .../apache/tajo/storage/TupleComparator.java    | 20 ++++++-
 11 files changed, 121 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 56a530d..a39d479 100644
--- a/CHANGES
+++ b/CHANGES
@@ -368,6 +368,9 @@ Release 0.8.0
     (Keuntae Park via jihoon)
 
   BUG FIXES
+ 
+    TAJO-833: NPE occurs when using the column as a alias name in the multiple 
+    DISTINCT. (Hyoungjun Kim via hyunsik)
 
     TAJO-787: FilterPushDownRule::visitSubQuery does not consider aliased columns. (jaehwa)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/main/java/org/apache/tajo/engine/eval/FieldEval.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/eval/FieldEval.java b/tajo-core/src/main/java/org/apache/tajo/engine/eval/FieldEval.java
index ea2b031..20af854 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/eval/FieldEval.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/eval/FieldEval.java
@@ -42,7 +42,12 @@ public class FieldEval extends EvalNode implements Cloneable {
 	@Override
 	public Datum eval(Schema schema, Tuple tuple) {
 	  if (fieldId == -1) {
-	    fieldId = schema.getColumnId(column.getQualifiedName());
+      // TODO - column namespace should be improved to simplify name handling and resolving.
+      if (column.hasQualifier()) {
+        fieldId = schema.getColumnId(column.getQualifiedName());
+      } else {
+        fieldId = schema.getColumnIdByName(column.getSimpleName());
+      }
       if (fieldId == -1) {
         throw new IllegalStateException("No Such Column Reference: " + column + ", schema:
" + schema);
       }

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/builder/DistinctGroupbyBuilder.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/builder/DistinctGroupbyBuilder.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/builder/DistinctGroupbyBuilder.java
index 1ccd9dc..8727b84 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/builder/DistinctGroupbyBuilder.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/builder/DistinctGroupbyBuilder.java
@@ -263,7 +263,11 @@ public class DistinctGroupbyBuilder {
     int[] secondStageColumnIds = new int[secondStageDistinctNode.getOutSchema().size()];
     int columnIdIndex = 0;
     for (Column column: secondStageDistinctNode.getGroupingColumns()) {
-      secondStageColumnIds[originOutputSchema.getColumnId(column.getQualifiedName())] = columnIdIndex;
+      if (column.hasQualifier()) {
+        secondStageColumnIds[originOutputSchema.getColumnId(column.getQualifiedName())] =
columnIdIndex;
+      } else {
+        secondStageColumnIds[originOutputSchema.getColumnIdByName(column.getSimpleName())]
= columnIdIndex;
+      }
       columnIdIndex++;
     }
 
@@ -312,8 +316,12 @@ public class DistinctGroupbyBuilder {
           int targetIdx = originGroupColumns.size() + uniqueDistinctColumn.size() + aggFuncIdx;
           Target aggFuncTarget = oldTargets[targetIdx];
           secondGroupbyTargets.add(aggFuncTarget);
-          int outputColumnId = originOutputSchema.getColumnId(aggFuncTarget.getNamedColumn().getQualifiedName());
-          secondStageColumnIds[outputColumnId] = columnIdIndex;
+          Column column = aggFuncTarget.getNamedColumn();
+          if (column.hasQualifier()) {
+            secondStageColumnIds[originOutputSchema.getColumnId(column.getQualifiedName())]
= columnIdIndex;
+          } else {
+            secondStageColumnIds[originOutputSchema.getColumnIdByName(column.getSimpleName())]
= columnIdIndex;
+          }
           columnIdIndex++;
         }
         secondStageGroupbyNode.setTargets(secondGroupbyTargets.toArray(new Target[]{}));
@@ -336,8 +344,12 @@ public class DistinctGroupbyBuilder {
           secondStageAggFunction.setArgs(new EvalNode[] {firstEval});
 
           Target secondTarget = secondStageGroupbyNode.getTargets()[secondStageGroupbyNode.getGroupingColumns().length
+ aggFuncIdx];
-          int outputColumnId = originOutputSchema.getColumnId(secondTarget.getNamedColumn().getQualifiedName());
-          secondStageColumnIds[outputColumnId] = columnIdIndex;
+          Column column = secondTarget.getNamedColumn();
+          if (column.hasQualifier()) {
+            secondStageColumnIds[originOutputSchema.getColumnId(column.getQualifiedName())]
= columnIdIndex;
+          } else {
+            secondStageColumnIds[originOutputSchema.getColumnIdByName(column.getSimpleName())]
= columnIdIndex;
+          }
           columnIdIndex++;
           aggFuncIdx++;
         }

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/AggregationExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/AggregationExec.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/AggregationExec.java
index 208973e..2a671e6 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/AggregationExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/AggregationExec.java
@@ -49,7 +49,11 @@ public abstract class AggregationExec extends UnaryPhysicalExec {
     Column col;
     for (int idx = 0; idx < plan.getGroupingColumns().length; idx++) {
       col = keyColumns[idx];
-      groupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName());
+      if (col.hasQualifier()) {
+        groupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName());
+      } else {
+        groupingKeyIds[idx] = inSchema.getColumnIdByName(col.getSimpleName());
+      }
     }
 
     if (plan.hasAggFunctions()) {

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbyHashAggregationExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbyHashAggregationExec.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbyHashAggregationExec.java
index 6458f47..8daad0b 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbyHashAggregationExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbyHashAggregationExec.java
@@ -40,7 +40,6 @@ public class DistinctGroupbyHashAggregationExec extends PhysicalExec {
 
   private HashAggregator[] hashAggregators;
   private PhysicalExec child;
-  private int distinctGroupingKeyNum;
   private int distinctGroupingKeyIds[];
   private boolean first = true;
   private int groupbyNodeNum;
@@ -58,14 +57,22 @@ public class DistinctGroupbyHashAggregationExec extends PhysicalExec {
     this.child = subOp;
     this.child.init();
 
-    distinctGroupingKeyNum = plan.getGroupingColumns().length;
-    distinctGroupingKeyIds = new int[distinctGroupingKeyNum];
-
-    Column[] keyColumns = plan.getGroupingColumns();
-    Column col;
-    for (int idx = 0; idx < plan.getGroupingColumns().length; idx++) {
-      col = keyColumns[idx];
-      distinctGroupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName());
+    List<Integer> distinctGroupingKeyIdList = new ArrayList<Integer>();
+    for (Column col: plan.getGroupingColumns()) {
+      int keyIndex;
+      if (col.hasQualifier()) {
+        keyIndex = inSchema.getColumnId(col.getQualifiedName());
+      } else {
+        keyIndex = inSchema.getColumnIdByName(col.getSimpleName());
+      }
+      if (!distinctGroupingKeyIdList.contains(keyIndex)) {
+        distinctGroupingKeyIdList.add(keyIndex);
+      }
+    }
+    int idx = 0;
+    distinctGroupingKeyIds = new int[distinctGroupingKeyIdList.size()];
+    for (Integer intVal: distinctGroupingKeyIdList) {
+      distinctGroupingKeyIds[idx++] = intVal.intValue();
     }
 
     List<GroupbyNode> groupbyNodes = plan.getGroupByNodes();
@@ -152,6 +159,34 @@ public class DistinctGroupbyHashAggregationExec extends PhysicalExec
{
       return null;
     }
 
+
+    /*
+    Tuple materialization example
+    =============================
+
+    Output Tuple Index: 0(l_orderkey), 1(l_partkey), 2(default.lineitem.l_suppkey), 5(default.lineitem.
+    l_partkey), 8(sum)
+
+              select
+                  lineitem.l_orderkey as l_orderkey,
+                  lineitem.l_partkey as l_partkey,
+                  count(distinct lineitem.l_partkey) as cnt1,
+                  count(distinct lineitem.l_suppkey) as cnt2,
+                  sum(lineitem.l_quantity) as sum1
+              from
+                  lineitem
+              group by
+                  lineitem.l_orderkey, lineitem.l_partkey
+
+    The above case will result in the following materialization
+    ------------------------------------------------------------
+
+    l_orderkey  l_partkey  default.lineitem.l_suppkey  l_orderkey  l_partkey  default.lineitem.l_partkey
 l_orderkey  l_partkey  sum
+        1            1              7311                   1            1               
1                    1           1      53.0
+        1            1              7706
+
+    */
+
     currentAggregatedTuples = new ArrayList<Tuple>();
     int listIndex = 0;
     while (true) {
@@ -296,9 +331,14 @@ public class DistinctGroupbyHashAggregationExec extends PhysicalExec
{
       List<Integer> groupingKeyIdList = new ArrayList<Integer>(distinctGroupingKeyIdSet);
       Column[] keyColumns = groupbyNode.getGroupingColumns();
       Column col;
-      for (int idx = 0; idx < groupbyNode.getGroupingColumns().length; idx++) {
+      for (int idx = 0; idx < keyColumns.length; idx++) {
         col = keyColumns[idx];
-        int keyIndex = inSchema.getColumnId(col.getQualifiedName());
+        int keyIndex;
+        if (col.hasQualifier()) {
+          keyIndex = inSchema.getColumnId(col.getQualifiedName());
+        } else {
+          keyIndex = inSchema.getColumnIdByName(col.getSimpleName());
+        }
         if (!distinctGroupingKeyIdSet.contains(keyIndex)) {
           groupingKeyIdList.add(keyIndex);
         }

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbySortAggregationExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbySortAggregationExec.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbySortAggregationExec.java
index c8457ac..fd79725 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbySortAggregationExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/DistinctGroupbySortAggregationExec.java
@@ -18,7 +18,6 @@
 
 package org.apache.tajo.engine.planner.physical;
 
-import org.apache.tajo.catalog.Column;
 import org.apache.tajo.catalog.statistics.TableStats;
 import org.apache.tajo.engine.planner.logical.DistinctGroupbyNode;
 import org.apache.tajo.engine.planner.logical.GroupbyNode;
@@ -34,8 +33,6 @@ public class DistinctGroupbySortAggregationExec extends PhysicalExec {
 
   private boolean finished = false;
 
-  private int distinctGroupingKeyNum;
-
   private Tuple[] currentTuples;
   private int outColumnNum;
   private int groupbyNodeNum;
@@ -49,9 +46,6 @@ public class DistinctGroupbySortAggregationExec extends PhysicalExec {
     this.aggregateExecs = aggregateExecs;
     this.groupbyNodeNum = plan.getGroupByNodes().size();
 
-    final Column[] keyColumns = plan.getGroupingColumns();
-    distinctGroupingKeyNum = keyColumns.length;
-
     currentTuples = new Tuple[groupbyNodeNum];
     outColumnNum = outSchema.size();
 
@@ -116,7 +110,6 @@ public class DistinctGroupbySortAggregationExec extends PhysicalExec {
         mergeTupleIndex++;
       }
     }
-
     return mergedTuple;
   }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
index 4c4227f..9a415d1 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
@@ -57,7 +57,6 @@ public class SortAggregateExec extends AggregationExec {
     Tuple outputTuple = null;
 
     while(!context.isStopped() && (tuple = child.next()) != null) {
-
       // get a key tuple
       currentKey = new VTuple(groupingKeyIds.length);
       for(int i = 0; i < groupingKeyIds.length; i++) {

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 43cc2f1..84bcaf7 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -249,6 +249,10 @@ public class TestGroupByQuery extends QueryTestCaseBase {
     res = executeFile("testDistinctAggregation_case7.sql");
     assertResultSet(res, "testDistinctAggregation_case7.result");
     res.close();
+
+    res = executeFile("testDistinctAggregation_case8.sql");
+    assertResultSet(res, "testDistinctAggregation_case8.result");
+    res.close();
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case8.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case8.sql
b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case8.sql
new file mode 100644
index 0000000..ed8e363
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case8.sql
@@ -0,0 +1,10 @@
+select
+    lineitem.l_orderkey as l_orderkey,
+    lineitem.l_partkey as l_partkey,
+    count(distinct lineitem.l_partkey) as cnt1,
+    count(distinct lineitem.l_suppkey) as cnt2,
+    sum(lineitem.l_quantity) as sum1
+from
+    lineitem
+group by
+    lineitem.l_orderkey, lineitem.l_partkey
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case8.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case8.result
b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case8.result
new file mode 100644
index 0000000..e234896
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case8.result
@@ -0,0 +1,6 @@
+l_orderkey,l_partkey,cnt1,cnt2,sum1
+-------------------------------
+1,1,1,2,53.0
+2,2,1,1,38.0
+3,2,1,1,45.0
+3,3,1,1,49.0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/f0ee8d59/tajo-storage/src/main/java/org/apache/tajo/storage/TupleComparator.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/TupleComparator.java b/tajo-storage/src/main/java/org/apache/tajo/storage/TupleComparator.java
index 30f2810..51388a4 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/TupleComparator.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/TupleComparator.java
@@ -58,7 +58,11 @@ public class TupleComparator implements Comparator<Tuple>, ProtoObject<TupleComp
     this.asc = new boolean[sortKeys.length];
     this.nullFirsts = new boolean[sortKeys.length];
     for (int i = 0; i < sortKeys.length; i++) {
-      this.sortKeyIds[i] = schema.getColumnId(sortKeys[i].getSortKey().getQualifiedName());
+      if (sortKeys[i].getSortKey().hasQualifier()) {
+        this.sortKeyIds[i] = schema.getColumnId(sortKeys[i].getSortKey().getQualifiedName());
+      } else {
+        this.sortKeyIds[i] = schema.getColumnIdByName(sortKeys[i].getSortKey().getSimpleName());
+      }
           
       this.asc[i] = sortKeys[i].isAscending();
       this.nullFirsts[i]= sortKeys[i].isNullFirst();
@@ -160,4 +164,18 @@ public class TupleComparator implements Comparator<Tuple>, ProtoObject<TupleComp
 
     return builder.build();
   }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+
+    String prefix = "";
+    for (int i = 0; i < sortKeyIds.length; i++) {
+      sb.append(prefix).append("SortKeyId=").append(sortKeyIds[i])
+        .append(",Asc=").append(asc[i])
+        .append(",NullFirst=").append(nullFirsts[i]);
+      prefix = " ,";
+    }
+    return sb.toString();
+  }
 }
\ No newline at end of file


Mime
View raw message