Return-Path: X-Original-To: apmail-tajo-commits-archive@minotaur.apache.org Delivered-To: apmail-tajo-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 59A7D18448 for ; Fri, 26 Jun 2015 02:12:35 +0000 (UTC) Received: (qmail 2673 invoked by uid 500); 26 Jun 2015 02:12:35 -0000 Delivered-To: apmail-tajo-commits-archive@tajo.apache.org Received: (qmail 2633 invoked by uid 500); 26 Jun 2015 02:12:35 -0000 Mailing-List: contact commits-help@tajo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@tajo.apache.org Delivered-To: mailing list commits@tajo.apache.org Received: (qmail 2624 invoked by uid 99); 26 Jun 2015 02:12:35 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 26 Jun 2015 02:12:35 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id C1178E36B0; Fri, 26 Jun 2015 02:12:34 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: blrunner@apache.org To: commits@tajo.apache.org Message-Id: <459043b579de4be0a68ca6a2067a18dd@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: tajo git commit: TAJO-1644: When inserting empty data into a partitioned table, existing data would be removed. (jaehwa) Date: Fri, 26 Jun 2015 02:12:34 +0000 (UTC) Repository: tajo Updated Branches: refs/heads/master aa49dc4a8 -> f57d6c43f TAJO-1644: When inserting empty data into a partitioned table, existing data would be removed. (jaehwa) Closes #601 Project: http://git-wip-us.apache.org/repos/asf/tajo/repo Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f57d6c43 Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f57d6c43 Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f57d6c43 Branch: refs/heads/master Commit: f57d6c43fd201326fef2a695b1d1e798d0f814e3 Parents: aa49dc4 Author: JaeHwa Jung Authored: Fri Jun 26 11:11:09 2015 +0900 Committer: JaeHwa Jung Committed: Fri Jun 26 11:11:09 2015 +0900 ---------------------------------------------------------------------- CHANGES | 3 + .../main/java/org/apache/tajo/SessionVars.java | 4 + .../java/org/apache/tajo/conf/TajoConf.java | 7 +- .../tajo/engine/query/TestOuterJoinQuery.java | 2 +- .../tajo/engine/query/TestTablePartitions.java | 106 ++++++++----------- .../TestTajoCli/testHelpSessionVars.result | 1 + .../org/apache/tajo/storage/FileTablespace.java | 7 +- 7 files changed, 66 insertions(+), 64 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/CHANGES ---------------------------------------------------------------------- diff --git a/CHANGES b/CHANGES index 9412179..e0cbd47 100644 --- a/CHANGES +++ b/CHANGES @@ -163,6 +163,9 @@ Release 0.11.0 - unreleased BUG FIXES + TAJO-1644: When inserting empty data into a partitioned table, + existing data would be removed. (jaehwa) + TAJO-1642: CatalogServer need to check meta table first. (jaehwa) TAJO-1650: TestQueryResource.testGetAllQueries() occasionally fails. http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-common/src/main/java/org/apache/tajo/SessionVars.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java index 98c2f3e..28fdb0b 100644 --- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java +++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java @@ -126,6 +126,10 @@ public enum SessionVars implements ConfigKey { NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT), CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT), + PARTITION_NO_RESULT_OVERWRITE_ENABLED(ConfVars.$PARTITION_NO_RESULT_OVERWRITE_ENABLED, + "If True, a partitioned table is overwritten even if a sub query leads to no result. " + + "Otherwise, the table data will be kept if there is no result", DEFAULT), + // Behavior Control --------------------------------------------------------- ARITHABORT(ConfVars.$BEHAVIOR_ARITHMETIC_ABORT, "If true, a running query will be terminated when an overflow or divide-by-zero occurs.", DEFAULT), http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java ---------------------------------------------------------------------- diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java index ba777c1..14cfb11 100644 --- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java +++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java @@ -323,7 +323,6 @@ public class TajoConf extends Configuration { $DIST_QUERY_JOIN_TASK_VOLUME("tajo.dist-query.join.task-volume-mb", 128), $DIST_QUERY_SORT_TASK_VOLUME("tajo.dist-query.sort.task-volume-mb", 128), $DIST_QUERY_GROUPBY_TASK_VOLUME("tajo.dist-query.groupby.task-volume-mb", 128), - $DIST_QUERY_JOIN_PARTITION_VOLUME("tajo.dist-query.join.partition-volume-mb", 128, Validators.min("1")), $DIST_QUERY_GROUPBY_PARTITION_VOLUME("tajo.dist-query.groupby.partition-volume-mb", 256, Validators.min("1")), $DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb", 256, Validators.min("1")), @@ -376,7 +375,11 @@ public class TajoConf extends Configuration { // Behavior Control --------------------------------------------------------- $BEHAVIOR_ARITHMETIC_ABORT("tajo.behavior.arithmetic-abort", false), - // ResultSet --------------------------------------------------------- + // If True, a partitioned table is overwritten even if a sub query leads to no result. + // Otherwise, the table data will be kept if there is no result + $PARTITION_NO_RESULT_OVERWRITE_ENABLED("tajo.partition.overwrite.even-if-no-result", false), + + // ResultSet --------------------------------------------------------- $RESULT_SET_FETCH_ROWNUM("tajo.resultset.fetch.rownum", 200), $RESULT_SET_BLOCK_WAIT("tajo.resultset.block.wait", true), ; http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java index 9445557..9d0e0bc 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java @@ -251,7 +251,7 @@ public class TestOuterJoinQuery extends TestJoinQuery { } @Test - @Option(withExplain = true, withExplainGlobal = true, parameterized = true) + @Option(withExplain = true, withExplainGlobal = true, parameterized = true, sort = true) @SimpleTest(queries = { @QuerySpec("select t1.id, t1.name, t2.id, t3.id, t4.id\n" + "from jointable11 t1\n" + http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java index 397b9ef..ef57356 100644 --- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java +++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java @@ -56,6 +56,7 @@ import java.util.*; import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME; import static org.apache.tajo.plan.serder.PlanProto.ShuffleType.SCATTERED_HASH_SHUFFLE; import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; @RunWith(Parameterized.class) public class TestTablePartitions extends QueryTestCaseBase { @@ -437,18 +438,7 @@ public class TestTablePartitions extends QueryTestCaseBase { Path path = new Path(desc.getUri()); FileSystem fs = FileSystem.get(conf); - assertTrue(fs.isDirectory(path)); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); + verifyDirectoriesForThreeColumns(fs, path, 1); if (!testingCluster.isHiveCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } @@ -488,22 +478,11 @@ public class TestTablePartitions extends QueryTestCaseBase { desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); path = new Path(desc.getUri()); - assertTrue(fs.isDirectory(path)); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); - + verifyDirectoriesForThreeColumns(fs, path, 2); if (!testingCluster.isHiveCatalogStoreRunning()) { assertEquals(5, desc.getStats().getNumRows().intValue()); } + String expected = "N\n" + "N\n" + "N\n" + @@ -548,54 +527,61 @@ public class TestTablePartitions extends QueryTestCaseBase { + " where l_orderkey = 1 and l_partkey = 1 and l_linenumber = 1"); res.close(); - desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); - assertTrue(fs.isDirectory(path)); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); - assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); - + verifyDirectoriesForThreeColumns(fs, path, 3); if (!testingCluster.isHiveCatalogStoreRunning()) { // TODO: If there is existing another partition directory, we must add its rows number to result row numbers. + // desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); // assertEquals(6, desc.getStats().getNumRows().intValue()); } - res = executeString("select * from " + tableName + " where col2 = 1"); - resultSetData = resultSetToString(res); - res.close(); - expected = "col4,col1,col2,col3\n" + - "-------------------------------\n" + - "N,1,1,17.0\n" + - "N,1,1,17.0\n" + - "N,1,1,30.0\n" + - "N,1,1,36.0\n" + - "N,1,1,36.0\n"; - - assertEquals(expected, resultSetData); + verifyKeptExistingData(res, tableName); // insert overwrite empty result to partitioned table res = executeString("insert overwrite into " + tableName - + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey" + - " > 100"); + + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey > 100"); res.close(); - desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName); + verifyDirectoriesForThreeColumns(fs, path, 4); + verifyKeptExistingData(res, tableName); - ContentSummary summary = fs.getContentSummary(new Path(desc.getUri())); + executeString("DROP TABLE " + tableName + " PURGE").close(); + } - assertEquals(summary.getDirectoryCount(), 1L); - assertEquals(summary.getFileCount(), 0L); - assertEquals(summary.getLength(), 0L); + private final void verifyKeptExistingData(ResultSet res, String tableName) throws Exception { + res = executeString("select * from " + tableName + " where col2 = 1"); + String resultSetData = resultSetToString(res); + res.close(); + String expected = "col4,col1,col2,col3\n" + + "-------------------------------\n" + + "N,1,1,17.0\n" + + "N,1,1,17.0\n" + + "N,1,1,30.0\n" + + "N,1,1,36.0\n" + + "N,1,1,36.0\n"; - executeString("DROP TABLE " + tableName + " PURGE").close(); + assertEquals(expected, resultSetData); + } + + private final void verifyDirectoriesForThreeColumns(FileSystem fs, Path path, int step) throws Exception { + assertTrue(fs.isDirectory(path)); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1"))); + + if (step == 1 || step == 2) { + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); + } else { + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0"))); + } + + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0"))); + assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0"))); } @Test http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result ---------------------------------------------------------------------- diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result index 137b0de..5c2ffe3 100644 --- a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result +++ b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result @@ -35,6 +35,7 @@ Available Session Variables: \set MAX_OUTPUT_FILE_SIZE [int value] - Maximum per-output file size (mb). 0 means infinite. \set NULL_CHAR [text value] - null char of text file output \set CODEGEN [true or false] - Runtime code generation enabled (experiment) +\set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If True, a partitioned table is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept if there is no result \set ARITHABORT [true or false] - If true, a running query will be terminated when an overflow or divide-by-zero occurs. \set FETCH_ROWNUM [int value] - Sets the number of rows at a time from Master \set BLOCK_ON_RESULT [true or false] - Whether to block result set on query execution http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java ---------------------------------------------------------------------- diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java index 3b63012..e8a6c12 100644 --- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java +++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java @@ -981,7 +981,12 @@ public class FileTablespace extends Tablespace { Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME); ContentSummary summary = fs.getContentSummary(stagingResultDir); - if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty() && summary.getFileCount() > 0L) { + // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not. + boolean overwriteEnabled = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED); + + // If existing data doesn't need to keep, check if there are some files. + if ( (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty()) + && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) { // This is a map for existing non-leaf directory to rename. A key is current directory and a value is // renaming directory. Map renameDirs = TUtil.newHashMap();