Return-Path: X-Original-To: apmail-tajo-commits-archive@minotaur.apache.org Delivered-To: apmail-tajo-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 6C84210C80 for ; Thu, 27 Feb 2014 04:46:14 +0000 (UTC) Received: (qmail 55805 invoked by uid 500); 27 Feb 2014 04:46:13 -0000 Delivered-To: apmail-tajo-commits-archive@tajo.apache.org Received: (qmail 55780 invoked by uid 500); 27 Feb 2014 04:46:13 -0000 Mailing-List: contact commits-help@tajo.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@tajo.incubator.apache.org Delivered-To: mailing list commits@tajo.incubator.apache.org Received: (qmail 55728 invoked by uid 99); 27 Feb 2014 04:46:12 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 27 Feb 2014 04:46:12 +0000 X-ASF-Spam-Status: No, hits=-2000.5 required=5.0 tests=ALL_TRUSTED,RP_MATCHES_RCVD X-Spam-Check-By: apache.org Received: from [140.211.11.3] (HELO mail.apache.org) (140.211.11.3) by apache.org (qpsmtpd/0.29) with SMTP; Thu, 27 Feb 2014 04:46:10 +0000 Received: (qmail 55655 invoked by uid 99); 27 Feb 2014 04:45:50 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 27 Feb 2014 04:45:50 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id EE37F92E356; Thu, 27 Feb 2014 04:45:49 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: blrunner@apache.org To: commits@tajo.incubator.apache.org Date: Thu, 27 Feb 2014 04:45:50 -0000 Message-Id: <551e5ce8ca644dfdb96643d67aabfdf7@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [2/2] git commit: TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa) X-Virus-Checked: Checked by ClamAV on apache.org TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa) Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/db5c017d Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/db5c017d Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/db5c017d Branch: refs/heads/branch-0.8.0 Commit: db5c017d416561c9dac08c093558d7344aa53be1 Parents: 56fbd99 Author: blrunner Authored: Thu Feb 27 13:45:02 2014 +0900 Committer: blrunner Committed: Thu Feb 27 13:45:02 2014 +0900 ---------------------------------------------------------------------- CHANGES.txt | 2 ++ .../java/org/apache/tajo/benchmark/TPCH.java | 8 +++++++ .../tajo/master/querymaster/Repartitioner.java | 22 ++++++++++++++++++-- .../apache/tajo/engine/query/TestJoinQuery.java | 8 +++++++ .../testInnerJoinWithEmptyTable.sql | 8 +++++++ .../testInnerJoinWithEmptyTable.result | 1 + 6 files changed, 47 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/CHANGES.txt ---------------------------------------------------------------------- diff --git a/CHANGES.txt b/CHANGES.txt index 1d86479..eef17d2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -263,6 +263,8 @@ Release 0.8.0 - unreleased BUG FIXES + TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa) + TAJO-628: The second stage of distinct aggregation can be scheduled to only one node. (hyunsik) http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java index 2e12b1d..5e9c9d3 100644 --- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java +++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java @@ -46,6 +46,8 @@ public class TPCH extends BenchmarkSet { public static final String ORDERS = "orders"; public static final String PARTSUPP = "partsupp"; public static final String SUPPLIER = "supplier"; + public static final String EMPTY_ORDERS = "empty_orders"; + public static final Map tableVolumes = Maps.newHashMap(); @@ -58,6 +60,8 @@ public class TPCH extends BenchmarkSet { tableVolumes.put(ORDERS, 171952161L); tableVolumes.put(PARTSUPP, 118984616L); tableVolumes.put(SUPPLIER, 1409184L); + tableVolumes.put(EMPTY_ORDERS, 0L); + } @Override @@ -131,6 +135,8 @@ public class TPCH extends BenchmarkSet { .addColumn("o_shippriority", Type.INT4) // 7 .addColumn("o_comment", Type.TEXT); // 8 schemas.put(ORDERS, orders); + schemas.put(EMPTY_ORDERS, orders); + Schema partsupp = new Schema() .addColumn("ps_partkey", Type.INT4) // 0 @@ -177,6 +183,8 @@ public class TPCH extends BenchmarkSet { loadTable(ORDERS); loadTable(PARTSUPP) ; loadTable(SUPPLIER); + loadTable(EMPTY_ORDERS); + } private void loadTable(String tableName) throws ServiceException { http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java index 0d3f95e..4a7976f 100644 --- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java +++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java @@ -23,6 +23,7 @@ import com.google.common.collect.Lists; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.tajo.algebra.JoinType; import org.apache.tajo.ExecutionBlockId; import org.apache.tajo.catalog.*; import org.apache.tajo.catalog.statistics.StatisticsUtil; @@ -100,13 +101,30 @@ public class Repartitioner { } catch (PlanningException e) { throw new IOException(e); } - fragments[i] = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(), - tablePath).get(0); + + // if table has no data, storageManager will return empty FileFragment. + // So, we need to handle FileFragment by its size. + // If we don't check its size, it can cause IndexOutOfBoundsException. + List fileFragments = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(), tablePath); + if (fileFragments.size() > 0) { + fragments[i] = fileFragments.get(0); + } else { + fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST}); + } } } LOG.info(String.format("Left Volume: %d, Right Volume: %d", stats[0], stats[1])); + // If one of inner join tables has no input data, + // it should return zero rows. + JoinNode joinNode = PlannerUtil.findMostBottomNode(execBlock.getPlan(), NodeType.JOIN); + if (joinNode != null) { + if ( (joinNode.getJoinType().equals(JoinType.INNER)) && (stats[0] == 0 || stats[1] == 0)) { + return; + } + } + // Assigning either fragments or fetch urls to query units boolean leftSmall = execBlock.isBroadcastTable(scans[0].getCanonicalName()); boolean rightSmall = execBlock.isBroadcastTable(scans[1].getCanonicalName()); http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java index 3a95724..0e925f1 100644 --- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java +++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java @@ -207,4 +207,12 @@ public class TestJoinQuery extends QueryTestCaseBase { assertResultSet(res); cleanupQuery(res); } + + @Test + public final void testInnerJoinWithEmptyTable() throws Exception { + ResultSet res = executeQuery(); + assertResultSet(res); + cleanupQuery(res); + } + } http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql new file mode 100644 index 0000000..00c7884 --- /dev/null +++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql @@ -0,0 +1,8 @@ +select + c_custkey, + orders.o_orderkey +from + customer, empty_orders +where c_custkey = o_orderkey +order by + c_custkey, o_orderkey; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result ---------------------------------------------------------------------- diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result new file mode 100644 index 0000000..c6036d7 --- /dev/null +++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result @@ -0,0 +1 @@ +------------------------------- \ No newline at end of file