hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From prasan...@apache.org
Subject hive git commit: HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Date Wed, 15 Jun 2016 01:49:39 GMT
Repository: hive
Updated Branches:
  refs/heads/branch-1 370f5d799 -> 22a910b1f


HIVE-13841: Orc split generation returns different strategies with cache enabled vs disabled
(Prasanth Jayachandran reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/22a910b1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/22a910b1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/22a910b1

Branch: refs/heads/branch-1
Commit: 22a910b1fbf2539c3bfd35399dfa552d590a5f0b
Parents: 370f5d7
Author: Prasanth Jayachandran <prasanthj@apache.org>
Authored: Tue Jun 14 18:49:21 2016 -0700
Committer: Prasanth Jayachandran <prasanthj@apache.org>
Committed: Tue Jun 14 18:49:21 2016 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/io/orc/OrcInputFormat.java   |  7 ++++---
 .../hive/ql/io/orc/TestInputOutputFormat.java   | 21 ++++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/22a910b1/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 2d6ef9a..9ac34b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -126,6 +126,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
   private static final long DEFAULT_MIN_SPLIT_SIZE = 16 * 1024 * 1024;
   private static final long DEFAULT_MAX_SPLIT_SIZE = 256 * 1024 * 1024;
+  private static final int DEFAULT_ETL_FILE_THRESHOLD = 100;
 
   private static final PerfLogger perfLogger = PerfLogger.getPerfLogger();
   private static final String CLASS_NAME = ReaderImpl.class.getName();
@@ -434,7 +435,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
     private final int numBuckets;
     private final long maxSize;
     private final long minSize;
-    private final int minSplits;
+    private final int etlFileThreshold;
     private final boolean footerInSplits;
     private final boolean cacheStripeDetails;
     private final AtomicInteger cacheHitCounter = new AtomicInteger(0);
@@ -469,7 +470,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
 
       cacheStripeDetails = (cacheStripeDetailsSize > 0);
 
-      this.minSplits = Math.min(cacheStripeDetailsSize, minSplits);
+      this.etlFileThreshold = minSplits <= 0 ? DEFAULT_ETL_FILE_THRESHOLD : minSplits;
 
       synchronized (Context.class) {
         if (threadPool == null) {
@@ -748,7 +749,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
             break;
           default:
             // HYBRID strategy
-            if (avgFileSize > context.maxSize || totalFiles <= context.minSplits) {
+            if (avgFileSize > context.maxSize || totalFiles <= context.etlFileThreshold)
{
               splitStrategy = new ETLSplitStrategy(context, fs, dir, children, isOriginal,
deltas,
                   covered);
             } else {

http://git-wip-us.apache.org/repos/asf/hive/blob/22a910b1/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
index c0d912d..fa32bf6 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
@@ -509,6 +509,27 @@ public class TestInputOutputFormat {
         }
       }
     }
+
+    k = 0;
+    conf.set("hive.orc.cache.stripe.details.size", "-1");
+    for (int c : counts) {
+      for (int s : sizes) {
+        final FileSystem fs = generateMockFiles(c, s);
+        for (int n : numSplits) {
+          final OrcInputFormat.Context context = new OrcInputFormat.Context(
+              conf, n);
+          OrcInputFormat.FileGenerator gen = new OrcInputFormat.FileGenerator(
+              context, fs, new MockPath(fs, "mock:/a/b"));
+          final SplitStrategy splitStrategy = gen.call();
+          assertTrue(
+              String.format(
+                  "Split strategy for %d files x %d size for %d splits", c, s,
+                  n),
+              splitStrategy.getClass().getSimpleName()
+                  .equals(strategyResults[k++]));
+        }
+      }
+    }
   }
 
   @Test


Mime
View raw message