drill-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From par...@apache.org
Subject drill git commit: DRILL-4380: Fix performance regression: in creation of FileSelection in ParquetFormatPlugin to not set files if metadata cache is available. This closes #369
Date Tue, 09 Feb 2016 22:24:26 GMT
Repository: drill
Updated Branches:
  refs/heads/master ed2f1ca8e -> 7bfcb40a0


DRILL-4380: Fix performance regression: in creation of FileSelection in ParquetFormatPlugin
to not set files if metadata cache is available. This closes #369


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/7bfcb40a
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/7bfcb40a
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/7bfcb40a

Branch: refs/heads/master
Commit: 7bfcb40a0ffa49a1ed27e1ff1f57378aa1136bbd
Parents: ed2f1ca
Author: Parth Chandra <parthc@apache.org>
Authored: Thu Dec 17 16:30:42 2015 -0800
Committer: Parth Chandra <parthc@apache.org>
Committed: Tue Feb 9 14:23:39 2016 -0800

----------------------------------------------------------------------
 .../drill/exec/store/dfs/FileSelection.java     | 21 +++++++++++++++++---
 .../exec/store/parquet/ParquetFormatPlugin.java |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/7bfcb40a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
index bc3cef3..1d79dfb 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/dfs/FileSelection.java
@@ -20,11 +20,13 @@ package org.apache.drill.exec.store.dfs;
 import java.io.IOException;
 import java.net.URI;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 import javax.annotation.Nullable;
 
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
+import com.google.common.base.Stopwatch;
 import com.google.common.base.Strings;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
@@ -73,13 +75,18 @@ public class FileSelection {
   }
 
   public List<FileStatus> getStatuses(final DrillFileSystem fs) throws IOException
{
-    if (statuses == null) {
+    Stopwatch timer = Stopwatch.createStarted();
+
+    if (statuses == null)  {
       final List<FileStatus> newStatuses = Lists.newArrayList();
       for (final String pathStr:files) {
         newStatuses.add(fs.getFileStatus(new Path(pathStr)));
       }
       statuses = newStatuses;
     }
+    logger.debug("FileSelection.getStatuses() took {} ms, numFiles: {}",
+        timer.elapsed(TimeUnit.MILLISECONDS), statuses == null ? 0 : statuses.size());
+
     return statuses;
   }
 
@@ -104,6 +111,7 @@ public class FileSelection {
   }
 
   public FileSelection minusDirectories(DrillFileSystem fs) throws IOException {
+    Stopwatch timer = Stopwatch.createStarted();
     final List<FileStatus> statuses = getStatuses(fs);
     final int total = statuses.size();
     final Path[] paths = new Path[total];
@@ -118,7 +126,10 @@ public class FileSelection {
       }
     }));
 
-    return create(nonDirectories, null, selectionRoot);
+    final FileSelection fileSel = create(nonDirectories, null, selectionRoot);
+    logger.debug("FileSelection.minusDirectories() took {} ms, numFiles: {}",
+        timer.elapsed(TimeUnit.MILLISECONDS), total);
+    return fileSel;
   }
 
   public FileStatus getFirstPath(DrillFileSystem fs) throws IOException {
@@ -183,12 +194,16 @@ public class FileSelection {
   }
 
   public static FileSelection create(final DrillFileSystem fs, final String parent, final
String path) throws IOException {
+    Stopwatch timer = Stopwatch.createStarted();
     final Path combined = new Path(parent, removeLeadingSlash(path));
     final FileStatus[] statuses = fs.globStatus(combined);
     if (statuses == null) {
       return null;
     }
-    return create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+    final FileSelection fileSel = create(Lists.newArrayList(statuses), null, combined.toUri().toString());
+    logger.debug("FileSelection.create() took {} ms ", timer.elapsed(TimeUnit.MILLISECONDS));
+    return fileSel;
+
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/drill/blob/7bfcb40a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
index e2cc670..a924bea 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFormatPlugin.java
@@ -233,7 +233,7 @@ public class ParquetFormatPlugin implements FormatPlugin{
         // /a/b/c.parquet and the format of the selection root must match that of the file
names
         // otherwise downstream operations such as partition pruning can break.
         final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(metaRootDir.getPath());
-        final FileSelection newSelection = FileSelection.create(null, fileNames, metaRootPath.toString());
+        final FileSelection newSelection = new FileSelection(selection.getStatuses(fs), fileNames,
metaRootPath.toString());
         return ParquetFileSelection.create(newSelection, metadata);
       } else {
         // don't expand yet; ParquetGroupScan's metadata gathering operation


Mime
View raw message