flink-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From se...@apache.org
Subject [1/3] flink git commit: [FLINK-2293] [runtime] Fix estimation for the number of hash buckets on recursive builds
Date Wed, 08 Jul 2015 09:05:42 GMT
Repository: flink
Updated Branches:
  refs/heads/master 0d2c49005 -> 9ea4be895


[FLINK-2293] [runtime] Fix estimation for the number of hash buckets on recursive builds


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/627f3cbc
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/627f3cbc
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/627f3cbc

Branch: refs/heads/master
Commit: 627f3cbcfdca8368eea6aa825cd9a45a9a0a841f
Parents: 0d2c490
Author: Stephan Ewen <sewen@apache.org>
Authored: Tue Jul 7 17:01:44 2015 +0200
Committer: Stephan Ewen <sewen@apache.org>
Committed: Tue Jul 7 17:01:44 2015 +0200

----------------------------------------------------------------------
 .../operators/hash/MutableHashTable.java        | 23 ++++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/627f3cbc/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java
----------------------------------------------------------------------
diff --git a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java
b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java
index 21d67a8..9416796 100644
--- a/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java
+++ b/flink-runtime/src/main/java/org/apache/flink/runtime/operators/hash/MutableHashTable.java
@@ -678,9 +678,7 @@ public class MutableHashTable<BT, PT> implements MemorySegmentSource
{
 	 * @param input
 	 * @throws IOException
 	 */
-	protected void buildInitialTable(final MutableObjectIterator<BT> input)
-	throws IOException
-	{
+	protected void buildInitialTable(final MutableObjectIterator<BT> input) throws IOException
{
 		// create the partitions
 		final int partitionFanOut = getPartitioningFanOutNoEstimates(this.availableMemory.size());
 		if (partitionFanOut > MAX_NUM_PARTITIONS) {
@@ -788,8 +786,8 @@ public class MutableHashTable<BT, PT> implements MemorySegmentSource
{
 			final int avgRecordLenPartition = (int) (((long) p.getBuildSideBlockCount()) * 
 					this.segmentSize / p.getBuildSideRecordCount());
 			
-			final int bucketCount = (int) (((long) totalBuffersAvailable) * RECORD_TABLE_BYTES / 
-					(avgRecordLenPartition + RECORD_OVERHEAD_BYTES));
+			final int bucketCount = getInitialTableSize(totalBuffersAvailable, this.segmentSize,
+					getPartitioningFanOutNoEstimates(totalBuffersAvailable), avgRecordLenPartition);
 			
 			// compute in how many splits, we'd need to partition the result 
 			final int splits = (int) (totalBuffersNeeded / totalBuffersAvailable) + 1;
@@ -1201,7 +1199,7 @@ public class MutableHashTable<BT, PT> implements MemorySegmentSource
{
 	 * @param numBuffers The number of available buffers.
 	 * @return The number 
 	 */
-	public static final int getNumWriteBehindBuffers(int numBuffers) {
+	public static int getNumWriteBehindBuffers(int numBuffers) {
 		int numIOBufs = (int) (Math.log(numBuffers) / Math.log(4) - 1.5);
 		return numIOBufs > 6 ? 6 : numIOBufs;
 	}
@@ -1216,11 +1214,12 @@ public class MutableHashTable<BT, PT> implements MemorySegmentSource
{
 	 * @param numBuffers The number of buffers available.
 	 * @return The number of partitions to use.
 	 */
-	public static final int getPartitioningFanOutNoEstimates(int numBuffers) {
+	public static int getPartitioningFanOutNoEstimates(int numBuffers) {
 		return Math.max(10, Math.min(numBuffers / 10, MAX_NUM_PARTITIONS));
 	}
 	
-	public static final int getInitialTableSize(int numBuffers, int bufferSize, int numPartitions,
int recordLenBytes) {
+	public static int getInitialTableSize(int numBuffers, int bufferSize, int numPartitions,
int recordLenBytes) {
+		
 		// ----------------------------------------------------------------------------------------
 		// the following observations hold:
 		// 1) If the records are assumed to be very large, then many buffers need to go to the
partitions
@@ -1249,11 +1248,11 @@ public class MutableHashTable<BT, PT> implements MemorySegmentSource
{
 	/**
 	 * Assigns a partition to a bucket.
 	 * 
-	 * @param bucket
-	 * @param numPartitions
-	 * @return The hash code for the integer.
+	 * @param bucket The bucket to get the partition for.
+	 * @param numPartitions The number of partitions.
+	 * @return The partition for the bucket.
 	 */
-	public static final byte assignPartition(int bucket, byte numPartitions) {
+	public static byte assignPartition(int bucket, byte numPartitions) {
 		return (byte) (bucket % numPartitions);
 	}
 	


Mime
View raw message