hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-11541: ORC: Split Strategy should depend on global file count, not per-partition (Gopal V reviewed by Prasanth Jayachandran)
Date Thu, 13 Aug 2015 19:39:06 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 f389fc74e -> 3319d79ea


HIVE-11541: ORC: Split Strategy should depend on global file count, not per-partition (Gopal
V reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/3319d79e
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/3319d79e
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/3319d79e

Branch: refs/heads/branch-1
Commit: 3319d79eae2c7f1d63dac1c4c84fe6f6e86ddf81
Parents: f389fc7
Author: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Authored: Thu Aug 13 12:38:48 2015 -0700
Committer: Prasanth Jayachandran <j.prasanth.j@gmail.com>
Committed: Thu Aug 13 12:38:48 2015 -0700

----------------------------------------------------------------------
 ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/3319d79e/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 6fe0044..1426492 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -487,7 +487,6 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
     }
 
     private FileInfo verifyCachedFileInfo(FileStatus file) {
-      context.numFilesCounter.incrementAndGet();
       FileInfo fileInfo = Context.footerCache.getIfPresent(file.getPath());
       if (fileInfo != null) {
         if (LOG.isDebugEnabled()) {
@@ -675,6 +674,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
 
         int numFiles = children.size();
         long avgFileSize = totalFileSize / numFiles;
+        int totalFiles = context.numFilesCounter.addAndGet(numFiles);
         switch(context.splitStrategyKind) {
           case BI:
             // BI strategy requested through config
@@ -688,7 +688,7 @@ public class OrcInputFormat  implements InputFormat<NullWritable, OrcStruct>,
             break;
           default:
             // HYBRID strategy
-            if (avgFileSize > context.maxSize || numFiles <= context.minSplits) {
+            if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
               splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal,
deltas,
                   covered);
             } else {


Mime
View raw message