drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j...@apache.org
Subject [8/9] drill git commit: DRILL-4380: Fix performance regression: in creation of FileSelection in ParquetFormatPlugin to not set files if metadata cache is available. This closes #369
Date Tue, 16 Feb 2016 21:23:26 GMT
DRILL-4380: Fix performance regression: in creation of FileSelection in ParquetFormatPlugin
to not set files if metadata cache is available. This closes #369

This change is slightly modified from the version in the master branch,
as the 1.5 release branch did not include the Guava version update. The
change simply uses the old constructor pattern for creating StopWatch
objects.

Also backported the change of the FileSelection object constructor to
public access. As stated in the DRILL-4380 discussion the usage patterns
of creating FileSelections and a clarification of the API will be covered
by DRILL-4381.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/921bb4f0
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/921bb4f0
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/921bb4f0

Branch: refs/heads/1.5.0
Commit: 921bb4f0293f01ecb021bbaeecc7ebe496a7e559
Parents: a9383ee
Author: Parth Chandra <parthc@apache.org>
Authored: Thu Dec 17 16:30:42 2015 -0800
Committer: Jason Altekruse <altekrusejason@gmail.com>
Committed: Tue Feb 9 15:19:25 2016 -0800

----------------------------------------------------------------------
 .../drill/exec/store/dfs/FileSelection.java     | 23 ++++++++++++++++----
 .../exec/store/parquet/ParquetFormatPlugin.java |  2 +-
 2 files changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/921bb4f0/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index 6df3ffc..c384401 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -20,11 +20,13 @@ package org.apache.drill.exec.store.dfs;
 import java.io.IOException;
 import java.net.URI;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 import javax.annotation.Nullable;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
+import com.google.common.base.Stopwatch;
 import com.google.common.base.Strings;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
@@ -52,7 +54,7 @@ public class FileSelection {
    * @param files  list of files
    * @param selectionRoot  root path for selections
    */
-  protected FileSelection(final List<FileStatus> statuses, final List<String>
files, final String selectionRoot) {
+  public FileSelection(final List<FileStatus> statuses, final List<String> files,
final String selectionRoot) {
     this.statuses = statuses;
     this.files = files;
     this.selectionRoot = Preconditions.checkNotNull(selectionRoot);
@@ -73,13 +75,18 @@ public class FileSelection {
   }
 
   public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException
{
-    if (statuses == null) {
+    Stopwatch timer = new Stopwatch().start();
+
+    if (statuses == null)  {
       final List<FileStatus> newStatuses = Lists.newArrayList();
       for (final String pathStr:files) {
         newStatuses.add(fs.getFileStatus(new Path(pathStr)));
       }
       statuses = newStatuses;
     }
+    logger.debug("FileSelection.getStatuses() took {} ms, numFiles: {}",
+        timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
+
     return statuses;
   }
 
@@ -104,6 +111,7 @@ public class FileSelection {
   }
 
   public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
+    Stopwatch timer = new Stopwatch().start();
     final List<FileStatus> statuses = getStatuses(fs);
     final int total = statuses.size();
     final Path[] paths = new Path[total];
@@ -118,7 +126,10 @@ public class FileSelection {
       }
     }));
 
-    return create(nonDirectories, null, selectionRoot);
+    final FileSelection fileSel = create(nonDirectories, null, selectionRoot);
+    logger.debug("FileSelection.minusDirectories() took {} ms, numFiles: {}",
+        timer.elapsed(TimeUnit.MILLISECONDS), total);
+    return fileSel;
   }
 
   public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
@@ -183,12 +194,16 @@ public class FileSelection {
   }
 
   public static FileSelection create(final DrillFileSystem fs, final String parent, final
String path) throws IOException {
+    Stopwatch timer = new Stopwatch().start();
     final Path combined = new Path(parent, removeLeadingSlash(path));
     final FileStatus[] statuses = fs.globStatus(combined);
     if (statuses == null) {
       return null;
     }
-    return create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+    final FileSelection fileSel = create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+    logger.debug("FileSelection.create() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
+    return fileSel;
+
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/drill/blob/921bb4f0/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index e2cc670..a924bea 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -233,7 +233,7 @@ public class ParquetFormatPlugin implements FormatPlugin{
         // /a/b/c.parquet and the format of the selection root must match that of the file
names
         // otherwise downstream operations such as partition pruning can break.
         final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
-        final FileSelection newSelection = FileSelection.create(null, fileNames, metaRootPath.toString());
+        final FileSelection newSelection = new FileSelection(selection.getStatuses(fs), fileNames,
metaRootPath.toString());
         return ParquetFileSelection.create(newSelection, metadata);
       } else {
         // don't expand yet; ParquetGroupScan's metadata gathering operation


Mime
View raw message