hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From xu...@apache.org
Subject svn commit: r1646510 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql: exec/Utilities.java io/CombineHiveInputFormat.java
Date Thu, 18 Dec 2014 18:54:04 GMT
Author: xuefu
Date: Thu Dec 18 18:54:04 2014
New Revision: 1646510

URL: http://svn.apache.org/r1646510
Log:
HIVE-9127: Improve CombineHiveInputFormat.getSplit performance (Brock via Xuefu)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1646510&r1=1646509&r2=1646510&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Thu Dec 18 18:54:04
2014
@@ -419,8 +419,9 @@ public final class Utilities {
       LOG.info("No plan file found: "+path);
       return null;
     } catch (Exception e) {
-      LOG.error("Failed to load plan: "+path, e);
-      throw new RuntimeException(e);
+      String msg = "Failed to load plan: " + path + ": " + e;
+      LOG.error(msg, e);
+      throw new RuntimeException(msg, e);
     } finally {
       if (in != null) {
         try {
@@ -702,11 +703,11 @@ public final class Utilities {
 
       // Cache the plan in this process
       gWorkMap.put(planPath, w);
-
       return planPath;
     } catch (Exception e) {
-      e.printStackTrace();
-      throw new RuntimeException(e);
+      String msg = "Error caching " + name + ": " + e;
+      LOG.error(msg, e);
+      throw new RuntimeException(msg, e);
     }
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java?rev=1646510&r1=1646509&r2=1646510&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java Thu Dec
18 18:54:04 2014
@@ -82,8 +82,9 @@ public class CombineHiveInputFormat<K ex
    */
   public static class CombineHiveInputSplit extends InputSplitShim {
 
-    String inputFormatClassName;
-    CombineFileSplit inputSplitShim;
+    private String inputFormatClassName;
+    private CombineFileSplit inputSplitShim;
+    private Map<String, PartitionDesc> pathToPartitionInfo;
 
     public CombineHiveInputSplit() throws IOException {
       this(ShimLoader.getHadoopShims().getCombineFileInputFormat()
@@ -93,20 +94,25 @@ public class CombineHiveInputFormat<K ex
     public CombineHiveInputSplit(CombineFileSplit inputSplitShim) throws IOException {
       this(inputSplitShim.getJob(), inputSplitShim);
     }
-
     public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim)
         throws IOException {
+      this(job, inputSplitShim, null);
+    }
+    public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim,
+        Map<String, PartitionDesc> pathToPartitionInfo) throws IOException {
       this.inputSplitShim = inputSplitShim;
+      this.pathToPartitionInfo = pathToPartitionInfo;
       if (job != null) {
-        Map<String, PartitionDesc> pathToPartitionInfo = Utilities
-            .getMapWork(job).getPathToPartitionInfo();
+        if (this.pathToPartitionInfo == null) {
+          this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+        }
 
         // extract all the inputFormatClass names for each chunk in the
         // CombinedSplit.
         Path[] ipaths = inputSplitShim.getPaths();
         if (ipaths.length > 0) {
           PartitionDesc part = HiveFileFormatUtils
-              .getPartitionDescFromPathRecursively(pathToPartitionInfo,
+              .getPartitionDescFromPathRecursively(this.pathToPartitionInfo,
                   ipaths[0], IOPrepareCache.get().getPartitionDescMap());
           inputFormatClassName = part.getInputFileFormatClass().getName();
         }
@@ -215,8 +221,9 @@ public class CombineHiveInputFormat<K ex
       inputSplitShim.write(out);
 
       if (inputFormatClassName == null) {
-        Map<String, PartitionDesc> pathToPartitionInfo = Utilities
-            .getMapWork(getJob()).getPathToPartitionInfo();
+        if (pathToPartitionInfo == null) {
+          pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo();
+        }
 
         // extract all the inputFormatClass names for each chunk in the
         // CombinedSplit.
@@ -268,8 +275,8 @@ public class CombineHiveInputFormat<K ex
   /**
    * Create Hive splits based on CombineFileSplit.
    */
-  private InputSplit[] getCombineSplits(JobConf job,
-                                        int numSplits) throws IOException {
+  private InputSplit[] getCombineSplits(JobConf job, int numSplits, Map<String, PartitionDesc>
pathToPartitionInfo)
+      throws IOException {
     PerfLogger perfLogger = PerfLogger.getPerfLogger();
     perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.GET_SPLITS);
     init(job);
@@ -438,7 +445,7 @@ public class CombineHiveInputFormat<K ex
     }
 
     for (CombineFileSplit is : iss) {
-      CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is);
+      CombineHiveInputSplit csplit = new CombineHiveInputSplit(job, is, pathToPartitionInfo);
       result.add(csplit);
     }
 
@@ -505,7 +512,8 @@ public class CombineHiveInputFormat<K ex
     if (combinablePaths.size() > 0) {
       FileInputFormat.setInputPaths(job, combinablePaths.toArray
           (new Path[combinablePaths.size()]));
-      InputSplit[] splits = getCombineSplits(job, numSplits);
+      Map<String, PartitionDesc> pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo();
+      InputSplit[] splits = getCombineSplits(job, numSplits, pathToPartitionInfo);
       for (InputSplit split : splits) {
         result.add(split);
       }



Mime
View raw message