Return-Path: X-Original-To: apmail-flink-commits-archive@minotaur.apache.org Delivered-To: apmail-flink-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id E436A186B2 for ; Wed, 8 Jul 2015 09:12:47 +0000 (UTC) Received: (qmail 91937 invoked by uid 500); 8 Jul 2015 09:12:47 -0000 Delivered-To: apmail-flink-commits-archive@flink.apache.org Received: (qmail 91903 invoked by uid 500); 8 Jul 2015 09:12:47 -0000 Mailing-List: contact commits-help@flink.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@flink.apache.org Delivered-To: mailing list commits@flink.apache.org Received: (qmail 91894 invoked by uid 99); 8 Jul 2015 09:12:47 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 08 Jul 2015 09:12:47 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 9E61FE00D5; Wed, 8 Jul 2015 09:12:47 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: sewen@apache.org To: commits@flink.apache.org Message-Id: <1594a1f8615c4d6da3415d67efec5b49@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: flink git commit: [FLINK-2293] [runtime] Fix estimation for the number of hash buckets on recursive builds Date: Wed, 8 Jul 2015 09:12:47 +0000 (UTC) Repository: flink Updated Branches: refs/heads/release-0.9 0789460d7 -> 7c2a704f2 [FLINK-2293] [runtime] Fix estimation for the number of hash buckets on recursive builds Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/7c2a704f Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/7c2a704f Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/7c2a704f Branch: refs/heads/release-0.9 Commit: 7c2a704f24646e77726a2cc944a65c2096d2f11a Parents: 0789460 Author: Stephan Ewen Authored: Tue Jul 7 17:01:44 2015 +0200 Committer: Stephan Ewen Committed: Wed Jul 8 11:12:07 2015 +0200 ---------------------------------------------------------------------- .../operators/hash/MutableHashTable.java | 23 ++++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/7c2a704f/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java ---------------------------------------------------------------------- diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java index 21d67a8..9416796 100644 --- a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java +++ b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java @@ -678,9 +678,7 @@ public class MutableHashTable implements MemorySegmentSource { * @param input * @throws IOException */ - protected void buildInitialTable(final MutableObjectIterator input) - throws IOException - { + protected void buildInitialTable(final MutableObjectIterator input) throws IOException { // create the partitions final int partitionFanOut = getPartitioningFanOutNoEstimates(this.availableMemory.size()); if (partitionFanOut > MAX_NUM_PARTITIONS) { @@ -788,8 +786,8 @@ public class MutableHashTable implements MemorySegmentSource { final int avgRecordLenPartition = (int) (((long) p.getBuildSideBlockCount()) * this.segmentSize / p.getBuildSideRecordCount()); - final int bucketCount = (int) (((long) totalBuffersAvailable) * RECORD_TABLE_BYTES / - (avgRecordLenPartition + RECORD_OVERHEAD_BYTES)); + final int bucketCount = getInitialTableSize(totalBuffersAvailable, this.segmentSize, + getPartitioningFanOutNoEstimates(totalBuffersAvailable), avgRecordLenPartition); // compute in how many splits, we'd need to partition the result final int splits = (int) (totalBuffersNeeded / totalBuffersAvailable) + 1; @@ -1201,7 +1199,7 @@ public class MutableHashTable implements MemorySegmentSource { * @param numBuffers The number of available buffers. * @return The number */ - public static final int getNumWriteBehindBuffers(int numBuffers) { + public static int getNumWriteBehindBuffers(int numBuffers) { int numIOBufs = (int) (Math.log(numBuffers) / Math.log(4) - 1.5); return numIOBufs > 6 ? 6 : numIOBufs; } @@ -1216,11 +1214,12 @@ public class MutableHashTable implements MemorySegmentSource { * @param numBuffers The number of buffers available. * @return The number of partitions to use. */ - public static final int getPartitioningFanOutNoEstimates(int numBuffers) { + public static int getPartitioningFanOutNoEstimates(int numBuffers) { return Math.max(10, Math.min(numBuffers / 10, MAX_NUM_PARTITIONS)); } - public static final int getInitialTableSize(int numBuffers, int bufferSize, int numPartitions, int recordLenBytes) { + public static int getInitialTableSize(int numBuffers, int bufferSize, int numPartitions, int recordLenBytes) { + // ---------------------------------------------------------------------------------------- // the following observations hold: // 1) If the records are assumed to be very large, then many buffers need to go to the partitions @@ -1249,11 +1248,11 @@ public class MutableHashTable implements MemorySegmentSource { /** * Assigns a partition to a bucket. * - * @param bucket - * @param numPartitions - * @return The hash code for the integer. + * @param bucket The bucket to get the partition for. + * @param numPartitions The number of partitions. + * @return The partition for the bucket. */ - public static final byte assignPartition(int bucket, byte numPartitions) { + public static byte assignPartition(int bucket, byte numPartitions) { return (byte) (bucket % numPartitions); }