drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From amansi...@apache.org
Subject [1/4] drill git commit: DRILL-3788: Expand the file selection to contain all files within the directory while creating DynamicDrillTable
Date Thu, 01 Oct 2015 22:00:00 GMT
Repository: drill
Updated Branches:
  refs/heads/master f01da3eeb -> 9c74c7f78


DRILL-3788: Expand the file selection to contain all files within the directory while creating
DynamicDrillTable


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e8d27e14
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e8d27e14
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e8d27e14

Branch: refs/heads/master
Commit: e8d27e141c0d5a4ec51309ac06c7d5762c204966
Parents: edd10df
Author: Mehant Baid <mehantr@gmail.com>
Authored: Thu Sep 24 16:35:47 2015 -0700
Committer: Aman Sinha <asinha@maprtech.com>
Committed: Thu Oct 1 14:55:04 2015 -0700

----------------------------------------------------------------------
 .../planner/FileSystemPartitionDescriptor.java  |  3 +-
 .../exec/store/parquet/ParquetFormatPlugin.java |  2 +-
 .../exec/store/parquet/ParquetGroupScan.java    |  2 +-
 .../apache/drill/TestCTASPartitionFilter.java   |  2 +-
 .../store/parquet/TestParquetMetadataCache.java | 40 +++++++++++++++++++-
 5 files changed, 43 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/e8d27e14/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
index c10d0af..fd2f850 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
@@ -28,6 +28,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
 import org.apache.calcite.util.BitSets;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.Types;
@@ -95,7 +96,7 @@ public class FileSystemPartitionDescriptor extends AbstractPartitionDescriptor
{
     for (PartitionLocation partitionLocation: partitions) {
       for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) {
         if (partitionLocation.getPartitionValue(partitionColumnIndex) == null) {
-          ((NullableVarCharVector) vectors[partitionColumnIndex]).getMutator().setNull(record);
+          throw new DrillRuntimeException("Value for directory cannot be null");
         } else {
           byte[] bytes = (partitionLocation.getPartitionValue(partitionColumnIndex)).getBytes(Charsets.UTF_8);
           ((NullableVarCharVector) vectors[partitionColumnIndex]).getMutator().setSafe(record,
bytes, 0, bytes.length);

http://git-wip-us.apache.org/repos/asf/drill/blob/e8d27e14/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index 3b7839a..e72f4a8 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -206,7 +206,7 @@ public class ParquetFormatPlugin implements FormatPlugin{
       // TODO: we only check the first file for directory reading.  This is because
       if(selection.containsDirectories(fs)){
         if(isDirReadable(fs, selection.getFirstPath(fs))){
-          return new FormatSelection(plugin.getConfig(), selection);
+          return new FormatSelection(plugin.getConfig(), selection.minusDirectories(fs));
         }
       }
       return super.isReadable(fs, selection);

http://git-wip-us.apache.org/repos/asf/drill/blob/e8d27e14/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
index 7800721..3080d11 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
@@ -497,7 +497,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
     } else {
       Path p = Path.getPathWithoutSchemeAndAuthority(new Path(selectionRoot));
       Path metaPath = new Path(p, Metadata.METADATA_FILENAME);
-      if (fs.exists(metaPath)) {
+      if (fs.exists(metaPath) && fileSet != null) {
         parquetTableMetadata = removeUnneededRowGroups(Metadata.readBlockMeta(fs, metaPath.toString()));
       } else {
         fileStatuses = Lists.newArrayList();

http://git-wip-us.apache.org/repos/asf/drill/blob/e8d27e14/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java b/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java
index 9886024..1f53a81 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/TestCTASPartitionFilter.java
@@ -74,7 +74,7 @@ public class TestCTASPartitionFilter extends PlanTestBase {
     test("use dfs_test.tmp");
     test(String.format("create table drill_3410 partition by (o_orderpriority) as select
* from dfs_test.`%s/multilevel/parquet`", TEST_RES_PATH));
     String query = "select * from drill_3410 where (o_orderpriority = '1-URGENT' and o_orderkey
= 10) or (o_orderpriority = '2-HIGH' or o_orderkey = 11)";
-    testIncludeFilter(query, 1, "Filter", 34);
+    testIncludeFilter(query, 5, "Filter", 34);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/drill/blob/e8d27e14/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
index 5a88cf9..12158c9 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetMetadataCache.java
@@ -18,17 +18,53 @@
 package org.apache.drill.exec.store.parquet;
 
 import com.google.common.base.Joiner;
+import org.apache.commons.io.filefilter.IOFileFilter;
 import org.apache.drill.BaseTestQuery;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
+import org.apache.drill.PlanTestBase;
+import org.apache.drill.common.util.TestTools;
+import org.apache.commons.io.FileUtils;
+import org.apache.drill.exec.store.dfs.DrillPathFilter;
 import org.apache.hadoop.fs.Path;
 import org.junit.Assert;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import java.io.File;
 import java.nio.file.Files;
 
+import static org.junit.Assert.assertEquals;
+
 public class TestParquetMetadataCache extends BaseTestQuery {
+  private static final String WORKING_PATH = TestTools.getWorkingPath();
+  private static final String TEST_RES_PATH = WORKING_PATH + "/src/test/resources";
+  private static final String tableName = "parquetTable";
+
+
+  @BeforeClass
+  public static void copyData() throws Exception {
+    // copy the data into the temporary location
+    String tmpLocation = getDfsTestTmpSchemaLocation();
+    File dataDir = new File(tmpLocation + Path.SEPARATOR + tableName);
+    dataDir.mkdir();
+    FileUtils.copyDirectory(new File(String.format(String.format("%s/multilevel/parquet",
TEST_RES_PATH))),
+        dataDir);
+  }
+
+  @Test
+  public void testPartitionPruningWithMetadataCache() throws Exception {
+    test(String.format("refresh table metadata dfs_test.`%s/%s`", getDfsTestTmpSchemaLocation(),
tableName));
+    checkForMetadataFile(tableName);
+    String query = String.format("select dir0, dir1, o_custkey, o_orderdate from dfs_test.`%s/%s`
" +
+            " where dir0=1994 and dir1='Q1'",
+        getDfsTestTmpSchemaLocation(), tableName);
+    int expectedRowCount = 10;
+    int expectedNumFiles = 1;
+
+    int actualRowCount = testSql(query);
+    assertEquals(expectedRowCount, actualRowCount);
+    String numFilesPattern = "numFiles=" + expectedNumFiles;
+    PlanTestBase.testPlanMatchingPatterns(query, new String[]{numFilesPattern}, new String[]
{"Filter"});
+  }
 
   @Test
   public void testCache() throws Exception {


Mime
View raw message