pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From roh...@apache.org
Subject svn commit: r1778363 - in /pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
Date Thu, 12 Jan 2017 00:08:18 GMT
Author: rohini
Date: Thu Jan 12 00:08:17 2017
New Revision: 1778363

URL: http://svn.apache.org/viewvc?rev=1778363&view=rev
Log:
PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1778363&r1=1778362&r2=1778363&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Jan 12 00:08:17 2017
@@ -175,6 +175,8 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-5088: HashValuePartitioner has skew when there is only map fields (rohini)
+
 PIG-5043: Slowstart not applied in Tez with PARALLEL clause (rohini)
 
 PIG-4930: Skewed Join Breaks On Empty Sampled Input When Key is From Map (nkollar via rohini)

Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java?rev=1778363&r1=1778362&r2=1778363&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
(original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/tez/runtime/HashValuePartitioner.java
Thu Jan 12 00:08:17 2017
@@ -17,8 +17,6 @@
  */
 package org.apache.pig.backend.hadoop.executionengine.tez.runtime;
 
-import java.util.Map;
-
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.Partitioner;
 import org.apache.pig.data.DataBag;
@@ -44,13 +42,10 @@ public class HashValuePartitioner extend
                 if (o != null) {
                     // Skip computing hashcode for bags.
                     // Order of elements in the map/bag may be different on each run
+                    // Can't even include size as some DataBag implementations
+                    // iterate through all elements in the bag to get the size.
                     if (o instanceof DataBag) {
                         hash = 31 * hash;
-                    } else if (o instanceof Map) {
-                        // Including size of map as it is easily available
-                        // Not doing for DataBag as some implementations actually
-                        // iterate through all elements in the bag to get the size.
-                        hash = 31 * hash + ((Map) o).size();
                     } else {
                         hash = 31 * hash + o.hashCode();
                     }



Mime
View raw message