drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amansi...@apache.org
Subject drill git commit: DRILL-3917: During file selection expansion, get the metadata for the directory by reading the metadata file. Ensure the selection root format is the same as the format of the files. Add unit test.
Date Sat, 10 Oct 2015 22:42:12 GMT
Repository: drill
Updated Branches:
  refs/heads/master ca2c4f29b -> 8197ba8f9


DRILL-3917: During file selection expansion, get the metadata for the directory by reading
the metadata file.
Ensure the selection root format is the same as the format of the files.
Add unit test.

close apache/drill#195


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/8197ba8f
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/8197ba8f
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/8197ba8f

Branch: refs/heads/master
Commit: 8197ba8f97799c39c5d31466524ae5f694e6b0a7
Parents: ca2c4f2
Author: Aman Sinha <asinha@maprtech.com>
Authored: Sat Oct 10 02:18:40 2015 -0700
Committer: Aman Sinha <asinha@maprtech.com>
Committed: Sat Oct 10 15:40:43 2015 -0700

----------------------------------------------------------------------
 .../exec/store/parquet/ParquetFormatPlugin.java  | 13 +++++++++++--
 .../store/parquet/TestParquetMetadataCache.java  | 19 ++++++++++++++++++-
 2 files changed, 29 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/8197ba8f/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index 7f95024..ac0d8e3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -216,12 +216,21 @@ public class ParquetFormatPlugin implements FormatPlugin{
 
     private FileSelection expandSelection(DrillFileSystem fs, FileSelection selection) throws
IOException {
       if (metaDataFileExists(fs, selection.getFirstPath(fs))) {
-        ParquetTableMetadata_v1 metadata = Metadata.getParquetTableMetadata(fs, getMetadataPath(selection.getFirstPath(fs)).toString());
+        FileStatus metaRootDir = selection.getFirstPath(fs);
+        Path metaFilePath = getMetadataPath(metaRootDir);
+
+        // get the metadata for the directory by reading the metadata file
+        ParquetTableMetadata_v1 metadata  = Metadata.readBlockMeta(fs, metaFilePath.toString());
         List<String> fileNames = Lists.newArrayList();
         for (ParquetFileMetadata file : metadata.files) {
           fileNames.add(file.path);
         }
-        return new FileSelection(fileNames, true);
+        // when creating the file selection, set the selection root in the form /a/b instead
of
+        // file:/a/b.  The reason is that the file names above have been created in the form
+        // /a/b/c.parquet and the format of the selection root must match that of the file
names
+        // otherwise downstream operations such as partition pruning can break.
+        Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
+        return new FileSelection(fileNames, metaRootPath.toString(), true);
       } else {
         // don't expand yet; ParquetGroupScan's metadata gathering operation
         // does that.

http://git-wip-us.apache.org/repos/asf/drill/blob/8197ba8f/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
index 70092e5..ef481e3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
@@ -51,7 +51,7 @@ public class TestParquetMetadataCache extends PlanTestBase {
   }
 
   @Test
-  public void testPartitionPruningWithMetadataCache() throws Exception {
+  public void testPartitionPruningWithMetadataCache_1() throws Exception {
     test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(),
tableName));
     checkForMetadataFile(tableName);
     String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s`
" +
@@ -67,6 +67,23 @@ public class TestParquetMetadataCache extends PlanTestBase {
     PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern},
new String[] {"Filter"});
   }
 
+  @Test // DRILL-3917
+  public void testPartitionPruningWithMetadataCache_2() throws Exception {
+    test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(),
tableName));
+    checkForMetadataFile(tableName);
+    String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s`
" +
+            " where dir0=1994",
+        getDfsTestTmpSchemaLocation(), tableName);
+    int expectedRowCount = 40;
+    int expectedNumFiles = 4;
+
+    int actualRowCount = testSql(query);
+    assertEquals(expectedRowCount, actualRowCount);
+    String numFilesPattern = "numFiles=" + expectedNumFiles;
+    String usedMetaPattern = "usedMetadataFile=true";
+    PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern},
new String[] {"Filter"});
+  }
+
   @Test
   public void testCache() throws Exception {
     String tableName = "nation_ctas";


Mime
View raw message