drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From adene...@apache.org
Subject [2/2] drill git commit: DRILL-4484: NPE when querying empty directory
Date Wed, 16 Mar 2016 17:10:11 GMT
DRILL-4484: NPE when querying  empty directory


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/71608ca9
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/71608ca9
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/71608ca9

Branch: refs/heads/master
Commit: 71608ca9fb53ff0af4f1d09f32d61e7280377e7a
Parents: 11fe8d7
Author: adeneche <adeneche@gmail.com>
Authored: Thu Mar 10 10:40:06 2016 +0100
Committer: adeneche <adeneche@gmail.com>
Committed: Wed Mar 16 16:53:30 2016 +0100

----------------------------------------------------------------------
 .../drill/exec/store/dfs/FileSelection.java     |   6 +-
 .../exec/store/parquet/ParquetGroupScan.java    |  12 +-
 .../store/parquet/TestParquetGroupScan.java     | 120 +++++++++++++++++++
 .../store/parquet/TestParquetMetadataCache.java |  19 ---
 4 files changed, 135 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/71608ca9/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index dc49281..5b4813a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -150,7 +150,11 @@ public class FileSelection {
     logger.debug("FileSelection.minusDirectories() took {} ms, numFiles: {}",
         timer.elapsed(TimeUnit.MILLISECONDS), total);
 
-    fileSel.setExpanded();
+    // fileSel will be null if we query an empty folder
+    if (fileSel != null) {
+      fileSel.setExpanded();
+    }
+
     return fileSel;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/71608ca9/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
index 774f515..47172cc 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
@@ -27,6 +27,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.drill.common.exceptions.ExecutionSetupException;
+import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.logical.FormatPluginConfig;
 import org.apache.drill.common.logical.StoragePluginConfig;
@@ -568,6 +569,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
    * @return file selection read from cache
    *
    * @throws IOException
+   * @throws UserException when the updated selection is empty, this happens if the user
selects an empty folder.
    */
   private FileSelection
   initFromMetadataCache(FileSelection selection, Path metaFilePath) throws IOException {
@@ -580,7 +582,8 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
     List<String> fileNames = Lists.newArrayList();
     List<FileStatus> fileStatuses = selection.getStatuses(fs);
 
-    if (fileStatuses.size() == 1 && fileStatuses.get(0).isDirectory()) {
+    final Path first = fileStatuses.get(0).getPath();
+    if (fileStatuses.size() == 1 && selection.getSelectionRoot().equals(first.toString()))
{
       // we are selecting all files from selection root. Expand the file list from the cache
       for (Metadata.ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
         fileNames.add(file.getPath());
@@ -590,7 +593,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
       // we need to expand the files from fileStatuses
       for (FileStatus status : fileStatuses) {
         if (status.isDirectory()) {
-          //TODO read the metadata cache files in parallel
+          //TODO [DRILL-4496] read the metadata cache files in parallel
           final Path metaPath = new Path(status.getPath(), Metadata.METADATA_FILENAME);
           final Metadata.ParquetTableMetadataBase metadata = Metadata.readBlockMeta(fs, metaPath.toString());
           for (Metadata.ParquetFileMetadata file : metadata.getFiles()) {
@@ -606,6 +609,11 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
       fileSet = Sets.newHashSet(fileNames);
     }
 
+    if (fileNames.isEmpty()) {
+      // no files were found, most likely we tried to query some empty sub folders
+      throw UserException.validationError().message("The table you tried to query is empty").build(logger);
+    }
+
     // when creating the file selection, set the selection root in the form /a/b instead
of
     // file:/a/b.  The reason is that the file names above have been created in the form
     // /a/b/c.parquet and the format of the selection root must match that of the file names

http://git-wip-us.apache.org/repos/asf/drill/blob/71608ca9/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetGroupScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetGroupScan.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetGroupScan.java
new file mode 100644
index 0000000..21d6285
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetGroupScan.java
@@ -0,0 +1,120 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import org.apache.drill.BaseTestQuery;
+import org.apache.drill.common.exceptions.UserRemoteException;
+import org.junit.Test;
+
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+public class TestParquetGroupScan extends BaseTestQuery {
+
+  private void prepareTables(final String tableName, boolean refreshMetadata) throws Exception
{
+    // first create some parquet subfolders
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s`      AS SELECT employee_id FROM cp.`employee.json`
LIMIT 1", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/501`  AS SELECT employee_id FROM cp.`employee.json`
LIMIT 2", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/502`  AS SELECT employee_id FROM cp.`employee.json`
LIMIT 4", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/503`  AS SELECT employee_id FROM cp.`employee.json`
LIMIT 8", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/504`  AS SELECT employee_id FROM cp.`employee.json`
LIMIT 16", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/505`  AS SELECT employee_id FROM cp.`employee.json`
LIMIT 32", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/60`   AS SELECT employee_id FROM cp.`employee.json`
LIMIT 64", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/602`  AS SELECT employee_id FROM cp.`employee.json`
LIMIT 128", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/6031` AS SELECT employee_id FROM cp.`employee.json`
LIMIT 256", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/6032` AS SELECT employee_id FROM cp.`employee.json`
LIMIT 512", tableName);
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/6033` AS SELECT employee_id FROM cp.`employee.json`
LIMIT 1024", tableName);
+
+    // we need an empty subfolder `4376/20160401`
+    // to do this we first create a table inside that subfolder
+    testNoResult("CREATE TABLE dfs_test.tmp.`%s/6041/a` AS SELECT * FROM cp.`employee.json`
LIMIT 1", tableName);
+    // then we delete the table, leaving the parent subfolder empty
+    testNoResult("DROP TABLE   dfs_test.tmp.`%s/6041/a`", tableName);
+
+    if (refreshMetadata) {
+      // build the metadata cache file
+      testNoResult("REFRESH TABLE METADATA dfs_test.tmp.`%s`", tableName);
+    }
+  }
+
+  @Test
+  public void testFix4376() throws Exception {
+    prepareTables("4376_1", true);
+
+    testBuilder()
+      .sqlQuery("SELECT COUNT(*) AS `count` FROM dfs_test.tmp.`4376_1/60*`")
+      .ordered()
+      .baselineColumns("count").baselineValues(1984L)
+      .go();
+  }
+
+  @Test
+  public void testWildCardEmptyWithCache() throws Exception {
+    prepareTables("4376_2", true);
+
+    try {
+      runSQL("SELECT COUNT(*) AS `count` FROM dfs_test.tmp.`4376_2/604*`");
+      fail("Query should've failed!");
+    } catch (UserRemoteException uex) {
+      final String expectedMsg = "The table you tried to query is empty";
+      assertTrue(String.format("Error message should contain \"%s\" but was instead \"%s\"",
expectedMsg,
+        uex.getMessage()), uex.getMessage().contains(expectedMsg));
+    }
+  }
+
+  @Test
+  public void testWildCardEmptyNoCache() throws Exception {
+    prepareTables("4376_3", false);
+
+    try {
+      runSQL("SELECT COUNT(*) AS `count` FROM dfs_test.tmp.`4376_3/604*`");
+      fail("Query should've failed!");
+    } catch (UserRemoteException uex) {
+      final String expectedMsg = "Table 'dfs_test.tmp.4376_3/604*' not found";
+      assertTrue(String.format("Error message should contain \"%s\" but was instead \"%s\"",
expectedMsg,
+        uex.getMessage()), uex.getMessage().contains(expectedMsg));
+    }
+  }
+
+  @Test
+  public void testSelectEmptyWithCache() throws Exception {
+    prepareTables("4376_4", true);
+
+    try {
+      runSQL("SELECT COUNT(*) AS `count` FROM dfs_test.tmp.`4376_4/6041`");
+      fail("Query should've failed!");
+    } catch (UserRemoteException uex) {
+      final String expectedMsg = "The table you tried to query is empty";
+      assertTrue(String.format("Error message should contain \"%s\" but was instead \"%s\"",
expectedMsg,
+        uex.getMessage()), uex.getMessage().contains(expectedMsg));
+    }
+  }
+
+  @Test
+  public void testSelectEmptyNoCache() throws Exception {
+    prepareTables("4376_5", false);
+    try {
+      runSQL("SELECT COUNT(*) AS `count` FROM dfs_test.tmp.`4376_5/6041`");
+      fail("Query should've failed!");
+    } catch (UserRemoteException uex) {
+      final String expectedMsg = "Table 'dfs_test.tmp.4376_5/6041' not found";
+      assertTrue(String.format("Error message should contain \"%s\" but was instead \"%s\"",
expectedMsg,
+        uex.getMessage()), uex.getMessage().contains(expectedMsg));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/71608ca9/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
index b41502e..4330c96 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
@@ -177,25 +177,6 @@ public class TestParquetMetadataCache extends PlanTestBase {
       .go();
   }
 
-  @Test
-  public void testFix4376() throws Exception {
-    // first create some parquet subfolders
-    runSQL("CREATE TABLE dfs_test.tmp.`4376`    AS SELECT * FROM cp.`employee.json` LIMIT
1");
-    runSQL("CREATE TABLE dfs_test.tmp.`4376/01` AS SELECT * FROM cp.`employee.json` LIMIT
2");
-    runSQL("CREATE TABLE dfs_test.tmp.`4376/02` AS SELECT * FROM cp.`employee.json` LIMIT
4");
-    runSQL("CREATE TABLE dfs_test.tmp.`4376/0`  AS SELECT * FROM cp.`employee.json` LIMIT
8");
-    runSQL("CREATE TABLE dfs_test.tmp.`4376/11` AS SELECT * FROM cp.`employee.json` LIMIT
16");
-    runSQL("CREATE TABLE dfs_test.tmp.`4376/12` AS SELECT * FROM cp.`employee.json` LIMIT
32");
-    // next, build the metadata cache file
-    runSQL("REFRESH TABLE METADATA dfs_test.tmp.`4376`");
-
-    testBuilder()
-      .sqlQuery("SELECT COUNT(*) AS `count` FROM dfs_test.tmp.`4376/0*`")
-      .ordered()
-      .baselineColumns("count").baselineValues(15L)
-      .go();
-    }
-
   private void checkForMetadataFile(String table) throws Exception {
     String tmpDir = getDfsTestTmpSchemaLocation();
     String metaFile = Joiner.on("/").join(tmpDir, table, Metadata.METADATA_FILENAME);


Mime
View raw message