drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From adeneche <...@git.apache.org>
Subject [GitHub] drill pull request: DRILL-2618: handle queries over empty folders ...
Date Thu, 19 Nov 2015 23:40:41 GMT
Github user adeneche commented on a diff in the pull request:

    https://github.com/apache/drill/pull/270#discussion_r45417976
  
    --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
---
    @@ -102,77 +92,33 @@ public boolean containsDirectories(DrillFileSystem fs) throws IOException
{
       }
     
       public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
    -    Stopwatch timer = new Stopwatch();
    -    timer.start();
    -    init(fs);
    -    List<FileStatus> newList = Lists.newArrayList();
    -    for (FileStatus p : statuses) {
    -      if (p.isDirectory()) {
    -        List<FileStatus> statuses = fs.list(true, p.getPath());
    -        for (FileStatus s : statuses) {
    -          newList.add(s);
    -        }
    -      } else {
    -        newList.add(p);
    -      }
    -    }
    -    logger.info("FileSelection.minusDirectories() took {} ms, numFiles: {}",
    -        timer.elapsed(TimeUnit.MILLISECONDS), newList.size());
    -    return new FileSelection(newList, selectionRoot);
    -  }
    -
    -  public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
    -    init(fs);
    -    return statuses.get(0);
    -  }
    -
    -  public List<String> getAsFiles() {
    -    if (!files.isEmpty()) {
    -      return files;
    -    }
    -    if (statuses == null) {
    -      return Collections.emptyList();
    +    final List<FileStatus> statuses = getStatuses(fs);
    +    final int total = statuses.size();
    +    final Path[] paths = new Path[total];
    +    for (int i=0; i<total; i++) {
    +      paths[i] = statuses.get(i).getPath();
         }
    -    List<String> files = Lists.newArrayList();
    -    for (FileStatus s : statuses) {
    -      files.add(s.getPath().toString());
    -    }
    -    return files;
    -  }
    -
    -  private void init(DrillFileSystem fs) throws IOException {
    -    Stopwatch timer = new Stopwatch();
    -    timer.start();
    -    if (files != null && statuses == null) {
    -      statuses = Lists.newArrayList();
    -      for (String p : files) {
    -        statuses.add(fs.getFileStatus(new Path(p)));
    +    final List<FileStatus> allStats = fs.list(true, paths);
    +    final List<FileStatus> nonDirectories = Lists.newArrayList(Iterables.filter(allStats,
new Predicate<FileStatus>() {
    +      @Override
    +      public boolean apply(@Nullable FileStatus status) {
    +        return !status.isDirectory();
           }
    -    }
    -    logger.info("FileSelection.init() took {} ms, numFiles: {}",
    -        timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
    -  }
    +    }));
     
    -  public List<FileStatus> getFileStatusList(DrillFileSystem fs) throws IOException
{
    -    init(fs);
    -    return statuses;
    +    return create(nonDirectories, null, selectionRoot);
       }
     
    -  /**
    -   * Return the parquet table metadata that may have been read
    -   * from a metadata cache file during creation of this file selection.
    -   * It will always be null for non-parquet files and null for cases
    -   * where no metadata cache was created.
    -   */
    -  public ParquetTableMetadata_v1 getParquetMetadata() {
    -    return parquetMeta;
    +  public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
    +    return getStatuses(fs).get(0);
       }
     
    -  private static String commonPath(FileStatus... paths) {
    +  private static String commonPath(List<FileStatus> statuses) {
    --- End diff --
    
    this method assumes statuses is not null. This is not always the case, see my comment
below


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message