hive-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From gop...@apache.org
Subject hive git commit: HIVE-11882: Fetch optimizer should stop source files traversal once it exceeds the hive.fetch.task.conversion.threshold (Illya Yalovyy, via Gopal V)
Date Tue, 13 Oct 2015 22:25:09 GMT
Repository: hive
Updated Branches:
  refs/heads/master 07eaab396 -> aebe5c6b9


HIVE-11882: Fetch optimizer should stop source files traversal once it exceeds the hive.fetch.task.conversion.threshold
(Illya Yalovyy, via Gopal V)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/aebe5c6b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/aebe5c6b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/aebe5c6b

Branch: refs/heads/master
Commit: aebe5c6b9a73e01761d6e2faa07865a0023a1c27
Parents: 07eaab3
Author: Gopal V <gopalv@apache.org>
Authored: Tue Oct 13 15:23:58 2015 -0700
Committer: Gopal V <gopalv@apache.org>
Committed: Tue Oct 13 15:24:55 2015 -0700

----------------------------------------------------------------------
 .../hive/ql/optimizer/SimpleFetchOptimizer.java |  8 ++-
 .../hadoop/hive/ql/parse/SplitSample.java       |  4 ++
 .../hadoop/hive/ql/parse/TestSplitSample.java   | 60 ++++++++++++++++++++
 3 files changed, 69 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/aebe5c6b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
index 3859177..af74fff 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SimpleFetchOptimizer.java
@@ -411,11 +411,10 @@ public class SimpleFetchOptimizer implements Transform {
       if (splitSample != null && splitSample.getTotalLength() != null) {
         return splitSample.getTotalLength();
       }
-      long length = calculateLength(pctx, remaining);
       if (splitSample != null) {
-        return splitSample.getTargetSize(length);
+        return splitSample.getTargetSize(calculateLength(pctx, splitSample.estimateSourceSize(remaining)));
       }
-      return length;
+      return calculateLength(pctx, remaining);
     }
 
     private long calculateLength(ParseContext pctx, long remaining) throws Exception {
@@ -440,6 +439,9 @@ public class SimpleFetchOptimizer implements Transform {
       for (Partition partition : partsList.getNotDeniedPartns()) {
         Path path = partition.getDataLocation();
         total += getFileLength(jobConf, path, partition.getInputFormatClass());
+        if (total > remaining) {
+          break;
+        }
       }
       return total;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/aebe5c6b/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java
index 551d209..2b07635 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SplitSample.java
@@ -102,4 +102,8 @@ public class SplitSample implements Serializable{
   public long getTargetSize(long totalSize) {
     return totalLength != null ? totalLength : (long) (totalSize * percent / 100D);
   }
+
+  public long estimateSourceSize(long targetSize) {
+    return percent != null ? Math.round(targetSize * 100D / percent) : targetSize;
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/aebe5c6b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSplitSample.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSplitSample.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSplitSample.java
new file mode 100644
index 0000000..121d9ec
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSplitSample.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2015 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.parse;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestSplitSample {
+
+  private static final int SEED_NUM = 123;
+  private static final double PERCENT = 2.0;
+  private static final long TOTAL_LENGTH = 1000L;
+  private static final int ROW_COUNT = 5;
+  private SplitSample splitSample;
+
+  @Test
+  public void testGetTargetSizeTotalLength() {
+    splitSample = new SplitSample(TOTAL_LENGTH, SEED_NUM);
+    assertEquals(TOTAL_LENGTH, splitSample.getTargetSize(1000));
+    assertEquals(TOTAL_LENGTH, splitSample.getTargetSize(100));
+  }
+
+  @Test
+  public void testGetTargetSizePercent() {
+    splitSample = new SplitSample(PERCENT, SEED_NUM);
+    assertEquals(20, splitSample.getTargetSize(1000));
+  }
+
+  @Test
+  public void testEstimateSourceSizeTotalLength() {
+    splitSample = new SplitSample(TOTAL_LENGTH, SEED_NUM);
+    assertEquals(10, splitSample.estimateSourceSize(10));
+  }
+
+  @Test
+  public void testEstimateSourceSizeRowCount() {
+    splitSample = new SplitSample(ROW_COUNT);
+    assertEquals(123, splitSample.estimateSourceSize(123));
+  }
+
+  @Test
+  public void testEstimateSourceSizePercent() {
+    splitSample = new SplitSample(PERCENT, SEED_NUM);
+    assertEquals(500, splitSample.estimateSourceSize(10));
+  }
+}


Mime
View raw message