drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From h.@apache.org
Subject drill git commit: DRILL-2618: handle queries over empty folders consistently so that they report table not found rather than failing. Refactor FileSelection to eliminate redundancy, make it more managable Fix WorkspaceSchemaFactory to handle empty folder
Date Wed, 25 Nov 2015 00:49:11 GMT
Repository: drill
Updated Branches:
  refs/heads/master bd39d3002 -> 367d74a65


DRILL-2618: handle queries over empty folders consistently so that they report table not found
rather than failing.
Refactor FileSelection to eliminate redundancy, make it more managable
Fix WorkspaceSchemaFactory to handle empty folders.
Introduce ParquetFileSelection, a sub-class of FileSelection that carries along metadata cache
Fix MagicStringMatcher so that it operate on files only.
Unit test file selection


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/367d74a6
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/367d74a6
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/367d74a6

Branch: refs/heads/master
Commit: 367d74a65ce2871a1452361cbd13bbd5f4a6cc95
Parents: bd39d30
Author: Hanifi Gunes <hanifigunes@gmail.com>
Authored: Mon Nov 16 18:33:13 2015 -0800
Committer: Hanifi Gunes <hanifigunes@gmail.com>
Committed: Tue Nov 24 16:50:34 2015 -0800

----------------------------------------------------------------------
 .../org/apache/drill/common/util/TestTools.java |  23 ++
 .../planner/FileSystemPartitionDescriptor.java  |   4 +-
 .../planner/ParquetPartitionDescriptor.java     |   5 +-
 .../exec/store/dfs/BasicFormatMatcher.java      |  12 +-
 .../drill/exec/store/dfs/FileSelection.java     | 279 ++++++++++---------
 .../drill/exec/store/dfs/FormatPlugin.java      |   1 -
 .../drill/exec/store/dfs/FormatSelection.java   |   4 +-
 .../exec/store/dfs/WorkspaceSchemaFactory.java  |  24 +-
 .../exec/store/dfs/easy/EasyGroupScan.java      |   6 +-
 .../sequencefile/SequenceFileFormatPlugin.java  |   1 -
 .../store/parquet/ParquetFileSelection.java     |  62 +++++
 .../exec/store/parquet/ParquetFormatPlugin.java |   6 +-
 .../exec/store/parquet/ParquetGroupScan.java    |  10 +-
 .../drill/exec/store/dfs/TestFileSelection.java |  66 +++++
 14 files changed, 342 insertions(+), 161 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/common/src/main/java/org/apache/drill/common/util/TestTools.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/drill/common/util/TestTools.java b/common/src/main/java/org/apache/drill/common/util/TestTools.java
index 1ad235b..30d073e 100644
--- a/common/src/main/java/org/apache/drill/common/util/TestTools.java
+++ b/common/src/main/java/org/apache/drill/common/util/TestTools.java
@@ -49,4 +49,27 @@ public class TestTools {
     return WORKING_PATH;
   }
 
+  private static final String PATH_SEPARATOR = System.getProperty("file.separator");
+  private static final String[] STRUCTURE = {"drill", "exec", "java-exec", "src", "test",
"resources"};
+
+  /**
+   * Returns fully qualified path where test resources reside if current working directory
is at any level in the
+   * following root->exec->java-exec->src->test->resources, throws an {@link
IllegalStateException} otherwise.
+   */
+  public static String getTestResourcesPath() {
+    final StringBuilder builder = new StringBuilder(WORKING_PATH);
+    for (int i=0; i< STRUCTURE.length; i++) {
+      if (WORKING_PATH.endsWith(STRUCTURE[i])) {
+        for (int j=i+1; j< STRUCTURE.length; j++) {
+          builder.append(PATH_SEPARATOR).append(STRUCTURE[j]);
+        }
+        return builder.toString();
+      }
+    }
+    final String msg = String.format("Unable to recognize working directory[%s]. The workspace
must be root or exec " +
+        "module.", WORKING_PATH);
+    throw new IllegalStateException(msg);
+  }
+
+
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
index fd2f850..7297c27 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/FileSystemPartitionDescriptor.java
@@ -84,8 +84,8 @@ public class FileSystemPartitionDescriptor extends AbstractPartitionDescriptor
{
 
   @Override
   public GroupScan createNewGroupScan(List<String> newFiles) throws IOException {
-    final FileSelection newFileSelection = new FileSelection(newFiles, getBaseTableLocation(),
true);
-    final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newFileSelection);
+    final FileSelection newSelection = FileSelection.create(null, newFiles, getBaseTableLocation());
+    final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newSelection);
     return newScan;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
index 5294b30..cda5a5e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/ParquetPartitionDescriptor.java
@@ -46,7 +46,6 @@ public class ParquetPartitionDescriptor extends AbstractPartitionDescriptor
{
 
   private final List<SchemaPath> partitionColumns;
   private final DrillScanRel scanRel;
-  static final int MAX_NESTED_SUBDIRS = 10;
 
   public ParquetPartitionDescriptor(PlannerSettings settings, DrillScanRel scanRel) {
     ParquetGroupScan scan = (ParquetGroupScan) scanRel.getGroupScan();
@@ -81,8 +80,8 @@ public class ParquetPartitionDescriptor extends AbstractPartitionDescriptor
{
 
   @Override
   public GroupScan createNewGroupScan(List<String> newFiles) throws IOException {
-    final FileSelection newFileSelection = new FileSelection(newFiles, getBaseTableLocation(),
true);
-    final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newFileSelection);
+    final FileSelection newSelection = FileSelection.create(null, newFiles, getBaseTableLocation());
+    final FileGroupScan newScan = ((FileGroupScan)scanRel.getGroupScan()).clone(newSelection);
     return newScan;
   }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
index 37011d9..fb51bfc 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/BasicFormatMatcher.java
@@ -138,9 +138,19 @@ public class BasicFormatMatcher extends FormatMatcher{
     }
 
     public boolean matches(DrillFileSystem fs, FileStatus status) throws IOException{
-      if (ranges.isEmpty()) {
+      if (ranges.isEmpty() || status.isDirectory()) {
         return false;
       }
+      // walk all the way down in the symlinks until a hard entry is reached
+      FileStatus current = status;
+      while (current.isSymlink()) {
+        current = fs.getFileStatus(status.getSymlink());
+      }
+      // if hard entry is not a file nor can it be a symlink then it is not readable simply
deny matching.
+      if (!current.isFile()) {
+        return false;
+      }
+
       final Range<Long> fileRange = Range.closedOpen( 0L, status.getLen());
 
       try (FSDataInputStream is = fs.open(status.getPath())) {

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index d17cfed..e6cb653 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -19,82 +19,82 @@ package org.apache.drill.exec.store.dfs;
 
 import java.io.IOException;
 import java.net.URI;
-import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
 
-import com.google.common.base.Stopwatch;
-import org.apache.commons.lang3.ArrayUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.drill.exec.store.parquet.Metadata.ParquetTableMetadata_v1;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Strings;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import com.google.common.collect.Lists;
-
 /**
- * Jackson serializable description of a file selection. Maintains an internal set of file
statuses. However, also
- * serializes out as a list of Strings. All accessing methods first regenerate the FileStatus
objects if they are not
- * available.  This allows internal movement of FileStatus and the ability to serialize if
need be.
+ * Jackson serializable description of a file selection.
  */
 public class FileSelection {
-  static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSelection.class);
+  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(FileSelection.class);
+  private static final String PATH_SEPARATOR = System.getProperty("file.separator");
 
-  @JsonIgnore
   private List<FileStatus> statuses;
 
   public List<String> files;
-  public String selectionRoot;
-
-  // this is a temporary location for the reference to Parquet metadata
-  // TODO: ideally this should be in a Parquet specific derived class.
-  private ParquetTableMetadata_v1 parquetMeta = null;
+  public final String selectionRoot;
 
-  public FileSelection() {
-  }
-
-  public FileSelection(List<String> files, String selectionRoot, boolean dummy) {
-    this.files = files;
-    this.selectionRoot = selectionRoot;
-  }
-
-  public FileSelection(List<String> files, boolean dummy) {
+  /**
+   * Creates a {@link FileSelection selection} out of given file statuses/files and selection
root.
+   *
+   * @param statuses  list of file statuses
+   * @param files  list of files
+   * @param selectionRoot  root path for selections
+   */
+  protected FileSelection(final List<FileStatus> statuses, final List<String>
files, final String selectionRoot) {
+    this.statuses = statuses;
     this.files = files;
+    this.selectionRoot = Preconditions.checkNotNull(selectionRoot);
   }
 
-  public FileSelection(List<FileStatus> statuses) {
-    this(statuses, null);
+  /**
+   * Copy constructor for convenience.
+   */
+  protected FileSelection(final FileSelection selection) {
+    Preconditions.checkNotNull(selection, "selection cannot be null");
+    this.statuses = selection.statuses;
+    this.files = selection.files;
+    this.selectionRoot = selection.selectionRoot;
   }
 
-  public FileSelection(List<String> files, String selectionRoot,
-      ParquetTableMetadata_v1 meta) {
-    this.files = files;
-    this.selectionRoot = selectionRoot;
-    this.parquetMeta = meta;
+  public String getSelectionRoot() {
+    return selectionRoot;
   }
 
-  public FileSelection(List<FileStatus> statuses, String selectionRoot) {
-    this.statuses = statuses;
-    this.files = Lists.newArrayList();
-    for (FileStatus f : statuses) {
-      files.add(f.getPath().toString());
+  public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException
{
+    if (statuses == null) {
+      final List<FileStatus> newStatuses = Lists.newArrayList();
+      for (final String pathStr:files) {
+        newStatuses.add(fs.getFileStatus(new Path(pathStr)));
+      }
+      statuses = newStatuses;
     }
-    this.selectionRoot = selectionRoot;
+    return statuses;
   }
 
-  public FileSelection(List<String> files, String selectionRoot,
-                       ParquetTableMetadata_v1 meta, List<FileStatus> statuses) {
-    this.files = files;
-    this.selectionRoot = selectionRoot;
-    this.parquetMeta = meta;
-    this.statuses = statuses;
+  public List<String> getFiles() {
+    if (files == null) {
+      final List<String> newFiles = Lists.newArrayList();
+      for (final FileStatus status:statuses) {
+        newFiles.add(status.getPath().toString());
+      }
+      files = newFiles;
+    }
+    return files;
   }
 
   public boolean containsDirectories(DrillFileSystem fs) throws IOException {
-    init(fs);
-    for (FileStatus p : statuses) {
-      if (p.isDirectory()) {
+    for (final FileStatus status : getStatuses(fs)) {
+      if (status.isDirectory()) {
         return true;
       }
     }
@@ -102,110 +102,127 @@ public class FileSelection {
   }
 
   public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
-    Stopwatch timer = new Stopwatch();
-    timer.start();
-    init(fs);
-    List<FileStatus> newList = Lists.newArrayList();
-    for (FileStatus p : statuses) {
-      if (p.isDirectory()) {
-        List<FileStatus> statuses = fs.list(true, p.getPath());
-        for (FileStatus s : statuses) {
-          newList.add(s);
-        }
-      } else {
-        newList.add(p);
-      }
+    final List<FileStatus> statuses = getStatuses(fs);
+    final int total = statuses.size();
+    final Path[] paths = new Path[total];
+    for (int i=0; i<total; i++) {
+      paths[i] = statuses.get(i).getPath();
     }
-    logger.info("FileSelection.minusDirectories() took {} ms, numFiles: {}",
-        timer.elapsed(TimeUnit.MILLISECONDS), newList.size());
-    return new FileSelection(newList, selectionRoot);
+    final List<FileStatus> allStats = fs.list(true, paths);
+    final List<FileStatus> nonDirectories = Lists.newArrayList(Iterables.filter(allStats,
new Predicate<FileStatus>() {
+      @Override
+      public boolean apply(@Nullable FileStatus status) {
+        return !status.isDirectory();
+      }
+    }));
+
+    return create(nonDirectories, null, selectionRoot);
   }
 
   public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
-    init(fs);
-    return statuses.get(0);
+    return getStatuses(fs).get(0);
   }
 
-  public List<String> getAsFiles() {
-    if (!files.isEmpty()) {
-      return files;
+  private static String commonPath(final List<FileStatus> statuses) {
+    if (statuses == null || statuses.isEmpty()) {
+      return "";
     }
-    if (statuses == null) {
-      return Collections.emptyList();
-    }
-    List<String> files = Lists.newArrayList();
-    for (FileStatus s : statuses) {
-      files.add(s.getPath().toString());
-    }
-    return files;
-  }
 
-  private void init(DrillFileSystem fs) throws IOException {
-    Stopwatch timer = new Stopwatch();
-    timer.start();
-    if (files != null && statuses == null) {
-      statuses = Lists.newArrayList();
-      for (String p : files) {
-        statuses.add(fs.getFileStatus(new Path(p)));
-      }
+    final List<String> files = Lists.newArrayList();
+    for (final FileStatus status : statuses) {
+      files.add(status.getPath().toString());
     }
-    logger.info("FileSelection.init() took {} ms, numFiles: {}",
-        timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
-  }
-
-  public List<FileStatus> getFileStatusList(DrillFileSystem fs) throws IOException
{
-    init(fs);
-    return statuses;
+    return commonPathForFiles(files);
   }
 
   /**
-   * Return the parquet table metadata that may have been read
-   * from a metadata cache file during creation of this file selection.
-   * It will always be null for non-parquet files and null for cases
-   * where no metadata cache was created.
+   * Returns longest common path for the given list of files.
+   *
+   * @param files  list of files.
+   * @return  longest common path
    */
-  public ParquetTableMetadata_v1 getParquetMetadata() {
-    return parquetMeta;
-  }
+  private static String commonPathForFiles(final List<String> files) {
+    if (files == null || files.isEmpty()) {
+      return "";
+    }
 
-  private static String commonPath(FileStatus... paths) {
-    String commonPath = "";
-    String[][] folders = new String[paths.length][];
-    for (int i = 0; i < paths.length; i++) {
-      folders[i] = Path.getPathWithoutSchemeAndAuthority(paths[i].getPath()).toString().split("/");
+    final int total = files.size();
+    final String[][] folders = new String[total][];
+    int shortest = Integer.MAX_VALUE;
+    for (int i = 0; i < total; i++) {
+      final Path path = new Path(files.get(i));
+      folders[i] = Path.getPathWithoutSchemeAndAuthority(path).toString().split(PATH_SEPARATOR);
+      shortest = Math.min(shortest, folders[i].length);
     }
-    for (int j = 0; j < folders[0].length; j++) {
-      String thisFolder = folders[0][j];
-      boolean allMatched = true;
-      for (int i = 1; i < folders.length && allMatched; i++) {
-        if (folders[i].length < j) {
-          allMatched = false;
-          break;
+
+    int latest;
+    out:
+    for (latest = 0; latest < shortest; latest++) {
+      final String current = folders[0][latest];
+      for (int i = 1; i < folders.length; i++) {
+        if (!current.equals(folders[i][latest])) {
+          break out;
         }
-        allMatched &= folders[i][j].equals(thisFolder);
-      }
-      if (allMatched) {
-        commonPath += thisFolder + "/";
-      } else {
-        break;
       }
     }
-    URI oneURI = paths[0].getPath().toUri();
-    return new Path(oneURI.getScheme(), oneURI.getAuthority(), commonPath).toString();
+    final Path path = new Path(files.get(0));
+    final URI uri = path.toUri();
+    final String pathString = buildPath(folders[0], latest);
+    return new Path(uri.getScheme(), uri.getAuthority(), pathString).toString();
   }
 
-  public static FileSelection create(DrillFileSystem fs, String parent, String path) throws
IOException {
-    Path p = new Path(parent,removeLeadingSlash(path));
-    FileStatus[] status = fs.globStatus(p);
-    if (status == null || status.length == 0) {
+  private static String buildPath(final String[] path, final int folderIndex) {
+    final StringBuilder builder = new StringBuilder();
+    for (int i=0; i<folderIndex; i++) {
+      builder.append(path[i]).append(PATH_SEPARATOR);
+    }
+    builder.deleteCharAt(builder.length()-1);
+    return builder.toString();
+  }
+
+  public static FileSelection create(final DrillFileSystem fs, final String parent, final
String path) throws IOException {
+    final Path combined = new Path(parent, removeLeadingSlash(path));
+    final FileStatus[] statuses = fs.globStatus(combined);
+    if (statuses == null) {
+      return null;
+    }
+    return create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+  }
+
+  /**
+   * Creates a {@link FileSelection selection} with the given file statuses/files and selection
root.
+   *
+   * @param statuses  list of file statuses
+   * @param files  list of files
+   * @param root  root path for selections
+   *
+   * @return  null if creation of {@link FileSelection} fails with an {@link IllegalArgumentException}
+   *          otherwise a new selection.
+   *
+   * @see FileSelection#FileSelection(List, List, String)
+   */
+  public static FileSelection create(final List<FileStatus> statuses, final List<String>
files, final String root) {
+    final boolean bothNonEmptySelection = (statuses != null && statuses.size() >
0) && (files != null && files.size() == 0);
+    final boolean bothEmptySelection = (statuses == null || statuses.size() == 0) &&
(files == null || files.size() == 0);
+
+    if (bothNonEmptySelection || bothEmptySelection) {
       return null;
     }
-    if (status.length == 1) {
-      URI oneURI = status[0].getPath().toUri();
-      String selectionRoot = new Path(oneURI.getScheme(), oneURI.getAuthority(), p.toUri().getPath()).toString();
-      return new FileSelection(Collections.singletonList(status[0]), selectionRoot);
+
+    final String selectionRoot;
+    if (statuses == null || statuses.isEmpty()) {
+      selectionRoot = commonPathForFiles(files);
+    } else {
+      if (statuses.size() == 1 && !Strings.isNullOrEmpty(root)) {
+        final Path rootPath = new Path(root);
+        final URI uri = statuses.get(0).getPath().toUri();
+        final Path path = new Path(uri.getScheme(), uri.getAuthority(), rootPath.toUri().getPath());
+        selectionRoot = path.toString();
+      } else {
+        selectionRoot = commonPath(statuses);
+      }
     }
-    return new FileSelection(Lists.newArrayList(status), commonPath(status));
+    return new FileSelection(statuses, files, selectionRoot);
   }
 
   private static String removeLeadingSlash(String path) {

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
index eda9d4d..14f1441 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatPlugin.java
@@ -27,7 +27,6 @@ import org.apache.drill.common.logical.StoragePluginConfig;
 import org.apache.drill.exec.physical.base.AbstractGroupScan;
 import org.apache.drill.exec.physical.base.AbstractWriter;
 import org.apache.drill.exec.physical.base.PhysicalOperator;
-import org.apache.drill.exec.planner.logical.DrillTable;
 import org.apache.drill.exec.server.DrillbitContext;
 import org.apache.drill.exec.store.StoragePluginOptimizerRule;
 import org.apache.hadoop.conf.Configuration;

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
index 1ea7da7..4473c5c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FormatSelection.java
@@ -37,7 +37,7 @@ public class FormatSelection {
   @JsonCreator
   public FormatSelection(@JsonProperty("format") FormatPluginConfig format, @JsonProperty("files")
List<String> files){
     this.format = format;
-    this.selection = new FileSelection(files, true);
+    this.selection = FileSelection.create(null, files, null);
   }
 
   public FormatSelection(FormatPluginConfig format, FileSelection selection) {
@@ -53,7 +53,7 @@ public class FormatSelection {
 
   @JsonProperty("files")
   public List<String> getAsFiles(){
-    return selection.getAsFiles();
+    return selection.getFiles();
   }
 
   @JsonIgnore

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
index ac832da..37da606 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/WorkspaceSchemaFactory.java
@@ -316,16 +316,16 @@ public class WorkspaceSchemaFactory {
     @Override
     public DrillTable create(String key) {
       try {
-
-        FileSelection fileSelection = FileSelection.create(fs, config.getLocation(), key);
-        if (fileSelection == null) {
+        final FileSelection selection = FileSelection.create(fs, config.getLocation(), key);
+        if (selection == null) {
           return null;
         }
 
-        if (fileSelection.containsDirectories(fs)) {
-          for (FormatMatcher m : dirMatchers) {
+        final boolean hasDirectories = selection.containsDirectories(fs);
+        if (hasDirectories) {
+          for (final FormatMatcher matcher : dirMatchers) {
             try {
-              DrillTable table = m.isReadable(fs, fileSelection, plugin, storageEngineName,
schemaConfig.getUserName());
+              DrillTable table = matcher.isReadable(fs, selection, plugin, storageEngineName,
schemaConfig.getUserName());
               if (table != null) {
                 return table;
               }
@@ -333,11 +333,15 @@ public class WorkspaceSchemaFactory {
               logger.debug("File read failed.", e);
             }
           }
-          fileSelection = fileSelection.minusDirectories(fs);
         }
 
-        for (FormatMatcher m : fileMatchers) {
-          DrillTable table = m.isReadable(fs, fileSelection, plugin, storageEngineName, schemaConfig.getUserName());
+        final FileSelection newSelection = hasDirectories ? selection.minusDirectories(fs)
: selection;
+        if (newSelection == null) {
+          return null;
+        }
+
+        for (final FormatMatcher matcher : fileMatchers) {
+          DrillTable table = matcher.isReadable(fs, newSelection, plugin, storageEngineName,
schemaConfig.getUserName());
           if (table != null) {
             return table;
           }
@@ -396,7 +400,7 @@ public class WorkspaceSchemaFactory {
 
       FormatMatcher matcher = null;
       Queue<FileStatus> listOfFiles = new LinkedList<>();
-      listOfFiles.addAll(fileSelection.getFileStatusList(fs));
+      listOfFiles.addAll(fileSelection.getStatuses(fs));
 
       while (!listOfFiles.isEmpty()) {
         FileStatus currentFile = listOfFiles.poll();

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
index a559beb..d75b6f6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/easy/EasyGroupScan.java
@@ -78,7 +78,7 @@ public class EasyGroupScan extends AbstractFileGroupScan{
       @JsonProperty("selectionRoot") String selectionRoot
       ) throws IOException, ExecutionSetupException {
         this(ImpersonationUtil.resolveUserName(userName),
-            new FileSelection(files, true),
+            FileSelection.create(null, files, selectionRoot),
             (EasyFormatPlugin<?>)engineRegistry.getFormatPlugin(storageConfig, formatConfig),
             columns,
             selectionRoot);
@@ -130,7 +130,7 @@ public class EasyGroupScan extends AbstractFileGroupScan{
     final DrillFileSystem dfs = ImpersonationUtil.createFileSystem(getUserName(), formatPlugin.getFsConf());
     this.selection = selection;
     BlockMapBuilder b = new BlockMapBuilder(dfs, formatPlugin.getContext().getBits());
-    this.chunks = b.generateFileWork(selection.getFileStatusList(dfs), formatPlugin.isBlockSplittable());
+    this.chunks = b.generateFileWork(selection.getStatuses(dfs), formatPlugin.isBlockSplittable());
     this.maxWidth = chunks.size();
     this.endpointAffinities = AffinityCreator.getAffinityMap(chunks);
   }
@@ -152,7 +152,7 @@ public class EasyGroupScan extends AbstractFileGroupScan{
 
   @JsonProperty("files")
   public List<String> getFiles() {
-    return selection.getAsFiles();
+    return selection.getFiles();
   }
 
   @JsonProperty("columns")

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
index f5dcb7d..9393f4a 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/sequencefile/SequenceFileFormatPlugin.java
@@ -22,7 +22,6 @@ import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.common.logical.StoragePluginConfig;
 import org.apache.drill.exec.ops.FragmentContext;
 import org.apache.drill.exec.physical.base.AbstractGroupScan;
-import org.apache.drill.exec.proto.UserBitShared;
 import org.apache.drill.exec.server.DrillbitContext;
 import org.apache.drill.exec.store.RecordReader;
 import org.apache.drill.exec.store.RecordWriter;

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
new file mode 100644
index 0000000..26ebfc5
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFileSelection.java
@@ -0,0 +1,62 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import com.google.common.base.Preconditions;
+import org.apache.drill.exec.store.dfs.FileSelection;
+import org.apache.drill.exec.store.parquet.Metadata.ParquetTableMetadata_v1;
+
+/**
+ * Parquet specific {@link FileSelection selection} that carries out {@link ParquetTableMetadata_v1
metadata} along.
+ */
+public class ParquetFileSelection extends FileSelection {
+//  private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetFileSelection.class);
+
+  private final ParquetTableMetadata_v1 metadata;
+
+  protected ParquetFileSelection(final FileSelection delegate, final ParquetTableMetadata_v1
metadata) {
+    super(delegate);
+    this.metadata = Preconditions.checkNotNull(metadata, "Parquet metadata cannot be null");
+  }
+
+  /**
+   * Return the parquet table metadata that may have been read
+   * from a metadata cache file during creation of this file selection.
+   * It will always be null for non-parquet files and null for cases
+   * where no metadata cache was created.
+   */
+  public ParquetTableMetadata_v1 getParquetMetadata() {
+    return metadata;
+  }
+
+  /**
+   * Creates a new Parquet specific selection wrapping the given {@link FileSelection selection}.
+   *
+   * @param selection  inner file selection
+   * @param metadata  parquet metadata
+   * @return  null if selection is null
+   *          otherwise a new selection
+   */
+  public static ParquetFileSelection create(final FileSelection selection, final ParquetTableMetadata_v1
metadata) {
+    if (selection == null) {
+      return null;
+    }
+    return new ParquetFileSelection(selection, metadata);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index 91cd112..4932aaf 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -234,9 +234,9 @@ public class ParquetFormatPlugin implements FormatPlugin{
         // file:/a/b.  The reason is that the file names above have been created in the form
         // /a/b/c.parquet and the format of the selection root must match that of the file
names
         // otherwise downstream operations such as partition pruning can break.
-        Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
-        return new FileSelection(fileNames, metaRootPath.toString(), metadata, /* save metadata
for future use */
-            selection.getFileStatusList(fs));
+        final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
+        final FileSelection newSelection = FileSelection.create(null, fileNames, metaRootPath.toString());
+        return ParquetFileSelection.create(newSelection, metadata);
       } else {
         // don't expand yet; ParquetGroupScan's metadata gathering operation
         // does that.

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
index ce2f845..3a9fc0d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetGroupScan.java
@@ -19,7 +19,6 @@ package org.apache.drill.exec.store.parquet;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -178,13 +177,16 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
     this.fs = ImpersonationUtil.createFileSystem(userName, formatPlugin.getFsConf());
 
     this.entries = Lists.newArrayList();
-    List<FileStatus> files = selection.getFileStatusList(fs);
+    final List<FileStatus> files = selection.getStatuses(fs);
     for (FileStatus file : files) {
       entries.add(new ReadEntryWithPath(file.getPath().toString()));
     }
 
     this.selectionRoot = selectionRoot;
-    this.parquetTableMetadata = selection.getParquetMetadata();
+    if (selection instanceof ParquetFileSelection) {
+      final ParquetFileSelection pfs = ParquetFileSelection.class.cast(selection);
+      this.parquetTableMetadata = pfs.getParquetMetadata();
+    }
 
     init();
   }
@@ -341,7 +343,7 @@ public class ParquetGroupScan extends AbstractFileGroupScan {
   public void modifyFileSelection(FileSelection selection) {
     entries.clear();
     fileSet = Sets.newHashSet();
-    for (String fileName : selection.getAsFiles()) {
+    for (String fileName : selection.getFiles()) {
       entries.add(new ReadEntryWithPath(fileName));
       fileSet.add(fileName);
     }

http://git-wip-us.apache.org/repos/asf/drill/blob/367d74a6/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java
new file mode 100644
index 0000000..82f45ae
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/dfs/TestFileSelection.java
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.dfs;
+
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.List;
+
+import com.google.common.collect.ImmutableList;
+import org.apache.drill.BaseTestQuery;
+import org.apache.drill.common.util.TestTools;
+import org.apache.hadoop.fs.FileStatus;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+public class TestFileSelection extends BaseTestQuery {
+  private static final List<FileStatus> EMPTY_STATUSES = ImmutableList.of();
+  private static final List<String> EMPTY_FILES = ImmutableList.of();
+  private static final String EMPTY_ROOT = "";
+
+  @Test
+  public void testCreateReturnsNullWhenArgumentsAreIllegal() {
+    for (final Object statuses : new Object[] { null, EMPTY_STATUSES}) {
+      for (final Object files : new Object[]{null, EMPTY_FILES}) {
+        for (final Object root : new Object[]{null, EMPTY_ROOT}) {
+          final FileSelection selection = FileSelection.create((List<FileStatus>) statuses,
(List<String>) files,
+              (String)root);
+          assertNull(selection);
+        }
+      }
+    }
+  }
+
+
+  @Test(expected = Exception.class)
+  public void testEmptyFolderThrowsTableNotFound() throws Exception {
+    final String table = String.format("%s/empty", TestTools.getTestResourcesPath());
+    final String query = String.format("select * from dfs.`%s`", table);
+    try {
+      testNoResult(query);
+    } catch (Exception ex) {
+      final String pattern = String.format("%s' not found", table).toLowerCase();
+      final boolean isTableNotFound = ex.getMessage().toLowerCase().contains(pattern);
+      assertTrue(isTableNotFound);
+      throw ex;
+    }
+  }
+
+}


Mime
View raw message