impala-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tarmstr...@apache.org
Subject [2/4] incubator-impala git commit: IMPALA-4943: Speed up block md loading for add/recover partition calls.
Date Fri, 21 Apr 2017 22:50:08 GMT
IMPALA-4943: Speed up block md loading for add/recover partition calls.

This change makes alter table add/recover partitions calls use the
per directory block metadata loading routines instead of doing it
per file. This is done since these calls always load the entire
partition directory from scratch and there is no advantage in
loading them incrementally on a per-file basis.

Tests: Ran core tests and the metadata benchmark tests.

(I) Improvement: METADATA-BENCHMARKS()
100K-PARTITIONS-1M-FILES-03-RECOVER [text / none / none] (718.62s ->
549.91s [-23.48%])

(I) Improvement: METADATA-BENCHMARKS()
100K-PARTITIONS-1M-FILES-08-ADD-PARTITION [text / none / none] (46.92s
-> 26.20s [-44.15%])

Change-Id: I331f1f090518f317bcd7df069e480edbd8f039f1
Reviewed-on: http://gerrit.cloudera.org:8080/6651
Reviewed-by: Bharath Vissapragada <bharathv@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/8bd854df
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/8bd854df
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/8bd854df

Branch: refs/heads/master
Commit: 8bd854dfa6f40bd32e8fcd6f284c15b045b4f1ee
Parents: 7555316
Author: Bharath Vissapragada <bharathv@cloudera.com>
Authored: Fri Apr 14 12:42:45 2017 -0700
Committer: Impala Public Jenkins <impala-public-jenkins@gerrit.cloudera.org>
Committed: Fri Apr 21 20:53:26 2017 +0000

----------------------------------------------------------------------
 .../org/apache/impala/catalog/HdfsTable.java    | 25 ++++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8bd854df/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
index 30241b0..143e2b1 100644
--- a/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
+++ b/fe/src/main/java/org/apache/impala/catalog/HdfsTable.java
@@ -835,6 +835,19 @@ public class HdfsTable extends Table {
   }
 
   /**
+   * Helper method to load the partition file metadata from scratch. This method is
+   * optimized for loading newly added partitions. For refreshing existing partitions
+   * use refreshFileMetadata(HdfsPartition).
+   */
+  private void loadFileMetadataFromScratch(HdfsPartition partition) {
+    Path partitionDirPath = partition.getLocationPath();
+    Set<Path> dirsToLoad = Sets.newHashSet(partitionDirPath);
+    HashMap<Path, List<HdfsPartition>> partsByPath = Maps.newHashMap();
+    partsByPath.put(partitionDirPath, Lists.newArrayList(partition));
+    loadMetadataAndDiskIds(dirsToLoad, partsByPath);
+  }
+
+  /**
    * Helper method to load the block locations from each directory in 'locations'
    * and filtering only the paths from 'partsByPath'. Also loads the disk IDs
    * corresponding to these block locations.
@@ -903,7 +916,7 @@ public class HdfsTable extends Table {
       org.apache.hadoop.hive.metastore.api.Partition msPartition)
       throws CatalogException {
     HdfsPartition hdfsPartition = createPartition(storageDescriptor, msPartition);
-    refreshFileMetadata(hdfsPartition);
+    loadFileMetadataFromScratch(hdfsPartition);
     return hdfsPartition;
   }
 
@@ -1513,7 +1526,9 @@ public class HdfsTable extends Table {
   }
 
   /**
-   * Loads the file descriptors and block metadata of a list of partitions.
+   * Loads the file descriptors and block metadata of a list of partitions. This function
+   * is optimized for incremental loading of the partition file metadata. To load it from
+   * scratch, use loadFileMetadataFromScratch(HdfsPartition).
    */
   private void loadPartitionFileMetadata(List<HdfsPartition> partitions)
       throws Exception {
@@ -1548,8 +1563,7 @@ public class HdfsTable extends Table {
   /**
    * Loads the file descriptors and block metadata of a partition from its
    * StorageDescriptor. If 'partition' does not have an entry in the Hive Metastore,
-   * 'storageDescriptor' is the StorageDescriptor of the associated table. Populates
-   * 'perFsFileBlocks' with file block info and updates table metadata.
+   * 'storageDescriptor' is the StorageDescriptor of the associated table.
    */
   private void loadPartitionFileMetadata(StorageDescriptor storageDescriptor,
       HdfsPartition partition) throws Exception {
@@ -1994,8 +2008,9 @@ public class HdfsTable extends Table {
    */
   public void reloadPartition(HdfsPartition oldPartition, Partition hmsPartition)
       throws CatalogException {
-    HdfsPartition refreshedPartition = createAndLoadPartition(
+    HdfsPartition refreshedPartition = createPartition(
         hmsPartition.getSd(), hmsPartition);
+    refreshFileMetadata(refreshedPartition);
     Preconditions.checkArgument(oldPartition == null
         || oldPartition.compareTo(refreshedPartition) == 0);
     dropPartition(oldPartition);


Mime
View raw message