hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-18160: Jar localization during session initialization is slow (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Date Wed, 29 Nov 2017 02:41:49 GMT
Repository: hive
Updated Branches:
  refs/heads/master 8d39a0887 -> 07fe7e210


HIVE-18160: Jar localization during session initialization is slow (Prasanth Jayachandran
reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/07fe7e21
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/07fe7e21
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/07fe7e21

Branch: refs/heads/master
Commit: 07fe7e210cb444aec43cb5adda37f8f7cd26f243
Parents: 8d39a08
Author: Prasanth Jayachandran <prasanthj@apache.org>
Authored: Tue Nov 28 18:41:26 2017 -0800
Committer: Prasanth Jayachandran <prasanthj@apache.org>
Committed: Tue Nov 28 18:41:26 2017 -0800

----------------------------------------------------------------------
 .../hive/ql/exec/tez/TezSessionState.java       | 49 ++++++++++++++------
 1 file changed, 35 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/07fe7e21/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
index 7a7fe15..6fa3724 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
@@ -19,9 +19,7 @@ package org.apache.hadoop.hive.ql.exec.tez;
 
 import java.util.Collection;
 import java.io.File;
-import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.io.InputStream;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -39,15 +37,19 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicReference;
 import javax.security.auth.login.LoginException;
+
+import org.apache.commons.codec.binary.Hex;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.llap.LlapUtil;
 import org.apache.hadoop.hive.llap.coordinator.LlapCoordinator;
 import org.apache.hadoop.hive.llap.impl.LlapProtocolClientImpl;
 import org.apache.hadoop.hive.llap.security.LlapTokenClient;
@@ -87,6 +89,9 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.hive.ql.exec.tez.monitoring.TezJobMonitor;
 
+import com.google.common.cache.Cache;
+import com.google.common.cache.CacheBuilder;
+
 /**
  * Holds session state related to Tez
  */
@@ -121,6 +126,7 @@ public class TezSessionState {
   private TriggerContext triggerContext;
   private KillQuery killQuery;
 
+  private static final Cache<String, String> shaCache = CacheBuilder.newBuilder().maximumSize(100).build();
   /**
    * Constructor. We do not automatically connect, because we only want to
    * load tez classes when the user has tez installed.
@@ -698,8 +704,7 @@ public class TezSessionState {
    * @throws URISyntaxException when current jar location cannot be determined.
    */
   private LocalResource createJarLocalResource(String localJarPath)
-      throws IOException, LoginException, IllegalArgumentException,
-      FileNotFoundException {
+      throws IOException, LoginException, IllegalArgumentException {
     // TODO Reduce the number of lookups that happen here. This shouldn't go to HDFS for
each call.
     // The hiveJarDir can be determined once per client.
     FileStatus destDirStatus = utils.getHiveJarDirectory(conf);
@@ -726,6 +731,10 @@ public class TezSessionState {
     return utils.localizeResource(localFile, destFile, LocalResourceType.FILE, conf);
   }
 
+  private String getKey(final FileStatus fileStatus) {
+    return fileStatus.getPath() + ":" + fileStatus.getLen() + ":" + fileStatus.getModificationTime();
+  }
+
   private void addJarLRByClassName(String className, final Map<String, LocalResource>
lrMap) throws
       IOException, LoginException {
     Class<?> clazz;
@@ -741,22 +750,34 @@ public class TezSessionState {
       LoginException {
     final File jar =
         new File(Utilities.jarFinderGetJar(clazz));
+    final String localJarPath = jar.toURI().toURL().toExternalForm();
     final LocalResource jarLr =
-        createJarLocalResource(jar.toURI().toURL().toExternalForm());
+      createJarLocalResource(localJarPath);
     lrMap.put(utils.getBaseName(jarLr), jarLr);
   }
 
-  private String getSha(Path localFile) throws IOException, IllegalArgumentException {
-    InputStream is = null;
-    try {
-      FileSystem localFs = FileSystem.getLocal(conf);
-      is = localFs.open(localFile);
-      return DigestUtils.sha256Hex(is);
-    } finally {
-      if (is != null) {
-        is.close();
+  private String getSha(final Path localFile) throws IOException, IllegalArgumentException
{
+    FileSystem localFs = FileSystem.getLocal(conf);
+    FileStatus fileStatus = localFs.getFileStatus(localFile);
+    String key = getKey(fileStatus);
+    String sha256 = shaCache.getIfPresent(key);
+    if (sha256 == null) {
+      FSDataInputStream is = null;
+      try {
+        is = localFs.open(localFile);
+        long start = System.currentTimeMillis();
+        sha256 = DigestUtils.sha256Hex(is);
+        long end = System.currentTimeMillis();
+        LOG.info("Computed sha: {} for file: {} of length: {} in {} ms", sha256, localFile,
+          LlapUtil.humanReadableByteCount(fileStatus.getLen()), end - start);
+        shaCache.put(key, sha256);
+      } finally {
+        if (is != null) {
+          is.close();
+        }
       }
     }
+    return sha256;
   }
   public void setQueueName(String queueName) {
     this.queueName = queueName;


Mime
View raw message