Return-Path: X-Original-To: apmail-hive-commits-archive@www.apache.org Delivered-To: apmail-hive-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id D3E1E17C99 for ; Thu, 18 Jun 2015 20:27:56 +0000 (UTC) Received: (qmail 80944 invoked by uid 500); 18 Jun 2015 20:27:56 -0000 Delivered-To: apmail-hive-commits-archive@hive.apache.org Received: (qmail 80901 invoked by uid 500); 18 Jun 2015 20:27:56 -0000 Mailing-List: contact commits-help@hive.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: hive-dev@hive.apache.org Delivered-To: mailing list commits@hive.apache.org Received: (qmail 80890 invoked by uid 99); 18 Jun 2015 20:27:56 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 18 Jun 2015 20:27:56 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 6DDD1E3C9A; Thu, 18 Jun 2015 20:27:56 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: gunther@apache.org To: commits@hive.apache.org Message-Id: <2a716604453044f9adef82b8672e62d5@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hive git commit: HIVE-10746: Hive 1.2.0+Tez produces 1-byte FileSplits from mapred.TextInputFormat (Gopal V via Gunther H) Date: Thu, 18 Jun 2015 20:27:56 +0000 (UTC) Repository: hive Updated Branches: refs/heads/master ef6f313c6 -> b98a30b1b HIVE-10746: Hive 1.2.0+Tez produces 1-byte FileSplits from mapred.TextInputFormat (Gopal V via Gunther H) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b98a30b1 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b98a30b1 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b98a30b1 Branch: refs/heads/master Commit: b98a30b1b9172fdc5a74fc272393c5eec4344c74 Parents: ef6f313 Author: Gunther Hagleitner Authored: Thu Jun 18 13:27:29 2015 -0700 Committer: Gunther Hagleitner Committed: Thu Jun 18 13:28:02 2015 -0700 ---------------------------------------------------------------------- .../hive/ql/exec/tez/HiveSplitGenerator.java | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b98a30b1/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java index 52d0996..87881b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/HiveSplitGenerator.java @@ -25,11 +25,13 @@ import com.google.common.base.Preconditions; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; @@ -72,6 +74,15 @@ public class HiveSplitGenerator extends InputInitializer { private final MapWork work; private final SplitGrouper splitGrouper = new SplitGrouper(); + private static final String MIN_SPLIT_SIZE; + @SuppressWarnings("unused") + private static final String MAX_SPLIT_SIZE; + + static { + final HadoopShims SHIMS = ShimLoader.getHadoopShims(); + MIN_SPLIT_SIZE = SHIMS.getHadoopConfNames().get("MAPREDMINSPLITSIZE"); + MAX_SPLIT_SIZE = SHIMS.getHadoopConfNames().get("MAPREDMAXSPLITSIZE"); + } public HiveSplitGenerator(InputInitializerContext initializerContext) throws IOException, SerDeException { @@ -97,6 +108,7 @@ public class HiveSplitGenerator extends InputInitializer { } + @SuppressWarnings("unchecked") @Override public List initialize() throws Exception { // Setup the map work for this thread. Pruning modified the work instance to potentially remove @@ -123,6 +135,18 @@ public class HiveSplitGenerator extends InputInitializer { int taskResource = getContext().getVertexTaskResource().getMemory(); int availableSlots = totalResource / taskResource; + if (conf.getLong(MIN_SPLIT_SIZE, 1) <= 1) { + // broken configuration from mapred-default.xml + final long blockSize = conf.getLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, + DFSConfigKeys.DFS_BLOCK_SIZE_DEFAULT); + final long minGrouping = conf.getLong( + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, + TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT); + final long preferredSplitSize = Math.min(blockSize / 2, minGrouping); + jobConf.setLong(MIN_SPLIT_SIZE, preferredSplitSize); + LOG.info("The preferred split size is " + preferredSplitSize); + } + // Create the un-grouped splits float waves = conf.getFloat(TezMapReduceSplitsGrouper.TEZ_GROUPING_SPLIT_WAVES,