drill-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] ilooner commented on a change in pull request #1408: DRILL-6453: Resolve deadlock when reading from build and probe sides simultaneously in HashJoin
Date Thu, 09 Aug 2018 20:24:13 GMT
ilooner commented on a change in pull request #1408: DRILL-6453: Resolve deadlock when reading
from build and probe sides simultaneously in HashJoin
URL: https://github.com/apache/drill/pull/1408#discussion_r209066765
 
 

 ##########
 File path: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/HashJoinMemoryCalculatorImpl.java
 ##########
 @@ -420,31 +357,32 @@ public long getMaxReservedMemory() {
     private void calculateMemoryUsage()
     {
       // Adjust based on number of records
-      maxBuildBatchSize = computeMaxBatchSizeNoHash(buildBatchSize, buildNumRecords,
-        maxBatchNumRecordsBuild, fragmentationFactor, safetyFactor);
-      maxProbeBatchSize = computeMaxBatchSizeNoHash(probeBatchSize, probeNumRecords,
-        maxBatchNumRecordsProbe, fragmentationFactor, safetyFactor);
-
-      // Safety factor can be multiplied at the end since these batches are coming from exchange
operators, so no excess value vector doubling
-      partitionBuildBatchSize = computeMaxBatchSize(buildBatchSize,
-        buildNumRecords,
-        recordsPerPartitionBatchBuild,
-        fragmentationFactor,
-        safetyFactor,
-        reserveHash);
+      maxBuildBatchSize = buildSizePredictor.predictBatchSize(maxBatchNumRecordsBuild, false);
 
-      // Safety factor can be multiplied at the end since these batches are coming from exchange
operators, so no excess value vector doubling
-      partitionProbeBatchSize = computeMaxBatchSize(
-        probeBatchSize,
-        probeNumRecords,
-        recordsPerPartitionBatchProbe,
-        fragmentationFactor,
-        safetyFactor,
-        reserveHash);
+      if (probeSizePredictor.hasData()) {
+        // We have probe data and we can compute the max incoming size.
+        maxProbeBatchSize = probeSizePredictor.predictBatchSize(maxBatchNumRecordsProbe,
false);
+      } else {
+        // We don't have probe data
+        if (probeEmpty) {
+          // We know the probe has no data, so we don't need to reserve any space for the
incoming probe
+          maxProbeBatchSize = 0;
+        } else {
+          // The probe side may have data, so assume it is the max incoming batch size. This
assumption
+          // can fail in some cases since the batch sizing project is incomplete.
+          maxProbeBatchSize = maxIncomingBatchSize;
+        }
+      }
+
+      partitionBuildBatchSize = buildSizePredictor.predictBatchSize(recordsPerPartitionBatchBuild,
reserveHash);
+
+      if (probeSizePredictor.hasData()) {
+        partitionProbeBatchSize = probeSizePredictor.predictBatchSize(recordsPerPartitionBatchProbe,
reserveHash);
+      }
 
       maxOutputBatchSize = (long) ((double)outputBatchSize * fragmentationFactor * safetyFactor);
 
-      long probeReservedMemory;
+      long probeReservedMemory = -1;
 
 Review comment:
   Removed -1

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message