pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From knogu...@apache.org
Subject svn commit: r1818985 - in /pig/trunk: ./ src/org/apache/pig/data/ test/org/apache/pig/test/
Date Thu, 21 Dec 2017 21:39:21 GMT
Author: knoguchi
Date: Thu Dec 21 21:39:21 2017
New Revision: 1818985

URL: http://svn.apache.org/viewvc?rev=1818985&view=rev
Log:
PIG-5300: hashCode for Bag needs to be order independent (knoguchi)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/data/DataBag.java
    pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
    pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
    pig/trunk/src/org/apache/pig/data/SingleTupleBag.java
    pig/trunk/test/org/apache/pig/test/TestDataModel.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Dec 21 21:39:21 2017
@@ -59,6 +59,7 @@ PIG-5251: Bump joda-time to 2.9.9 (dbist
 OPTIMIZATIONS
  
 BUG FIXES
+PIG-5300: hashCode for Bag needs to be order independent (knoguchi)
 
 PIG-5318: Unit test failures on Pig on Spark with Spark 2.2 (nkollar via szita)
 

Modified: pig/trunk/src/org/apache/pig/data/DataBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/DataBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/DataBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/DataBag.java Thu Dec 21 21:39:21 2017
@@ -126,6 +126,15 @@ public interface DataBag extends Spillab
     void clear();
 
     /**
+     * Returns the hash code value for the bag.  The hash code of a bag is
+     * defined to be the sum of the hash codes of each tuple.
+     * This ensures that b1.equals(b2) implies b1.hashCode() == b2.hashCode()
+     *
+     * @return the hash code value for this bag
+     */
+    int hashCode();
+
+    /**
      * This is used by FuncEvalSpec.FakeDataBag.
      * @param stale Set stale state.
      */

Modified: pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Thu Dec 21 21:39:21 2017
@@ -309,6 +309,9 @@ public abstract class DefaultAbstractBag
 
     @Override
     public boolean equals(Object other) {
+        if( other == null ) {
+            return false;
+        }
         return compareTo(other) == 0;
     }
 
@@ -359,11 +362,10 @@ public abstract class DefaultAbstractBag
 
     @Override
     public int hashCode() {
-        int hash = 1;
+        int hash = 0;
         Iterator<Tuple> i = iterator();
         while (i.hasNext()) {
-            // Use 37 because we want a prime, and tuple uses 31.
-            hash = 37 * hash + i.next().hashCode();
+            hash += i.next().hashCode();
         }
         return hash;
     }

Modified: pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java Thu Dec 21 21:39:21 2017
@@ -198,11 +198,18 @@ public class NonSpillableDataBag impleme
      */
     @Override
     public boolean equals(Object obj) {
+        if( obj == null) {
+            return false;
+        }
         return compareTo(obj) == 0;
     }
 
     public int hashCode() {
-        return mContents.hashCode();
+        int hash = 0;
+        for( Tuple t : mContents ) {
+            hash += t.hashCode();
+        }
+        return hash;
     }
 
     @SuppressWarnings("unchecked")

Modified: pig/trunk/src/org/apache/pig/data/SingleTupleBag.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/SingleTupleBag.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/SingleTupleBag.java (original)
+++ pig/trunk/src/org/apache/pig/data/SingleTupleBag.java Thu Dec 21 21:39:21 2017
@@ -156,22 +156,37 @@ public class SingleTupleBag implements D
         }    
     }
 
-    /* (non-Javadoc)
-     * @see java.lang.Comparable#compareTo(java.lang.Object)
-     */
     @Override
-    public int compareTo(Object o) {
-        // TODO Auto-generated method stub
-        return 0;
+    public int compareTo(Object other) {
+        if (this == other)
+            return 0;
+        if (other instanceof DataBag) {
+            DataBag bOther = (DataBag) other;
+            if (bOther.size() != 1) {
+                if ( 1 > bOther.size()) return 1;
+                else return -1;
+            }
+            Iterator<Tuple> iter = bOther.iterator();
+            return item.compareTo(iter.next());
+        } else {
+            return DataType.compare(this, other);
+        }
     }
 
-    public boolean equals(Object o){
-        // TODO: match to compareTo if it is updated
-        return true;
+    @Override
+    public boolean equals(Object other) {
+        if( other == null ) {
+            return false;
+        }
+        return compareTo(other) == 0;
     }
 
+    @Override
     public int hashCode() {
-        return 42; 
+        if( item != null ) {
+            return item.hashCode();
+        }
+        return 0;
     }
 
     class TBIterator implements Iterator<Tuple> {

Modified: pig/trunk/test/org/apache/pig/test/TestDataModel.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestDataModel.java?rev=1818985&r1=1818984&r2=1818985&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestDataModel.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestDataModel.java Thu Dec 21 21:39:21 2017
@@ -17,6 +17,7 @@ package org.apache.pig.test;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;
@@ -42,7 +43,15 @@ import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.DataType;
+import org.apache.pig.data.DefaultDataBag;
+import org.apache.pig.data.DistinctDataBag;
+import org.apache.pig.data.InternalCachedBag;
+import org.apache.pig.data.InternalDistinctBag;
 import org.apache.pig.data.InternalMap;
+import org.apache.pig.data.InternalSortedBag;
+import org.apache.pig.data.NonSpillableDataBag;
+import org.apache.pig.data.SingleTupleBag;
+import org.apache.pig.data.SortedDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.junit.Test;
@@ -587,6 +596,77 @@ public class TestDataModel {
         }
     }
 
+    @Test
+    public void testBagHashCodeAndEquals() throws Exception {
+        DataBag [] ascBags = {new SortedDataBag(null), new DefaultDataBag(), new DistinctDataBag(),
new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(), new NonSpillableDataBag()};
+        DataBag [] dscBags = {new SortedDataBag(null), new DefaultDataBag(), new DistinctDataBag(),
new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(), new NonSpillableDataBag()};
+        Tuple input = giveMeOneOfEach();
+        TupleFactory tf = TupleFactory.getInstance();
+        for (int i = 0; i < ascBags.length; i++ ) {
+            for( int j = 0; j < input.size(); j++ ) {
+                ascBags[i].add(tf.newTuple(input.get(j)));
+                dscBags[i].add(tf.newTuple(input.get(input.size() - 1 - j)));
+            }
+        }
+        // making sure equals(null) does not throw NPE
+        for (DataBag bag : ascBags ) {
+            assertFalse(bag.equals(null));
+        }
+        // Comparing against each other including itself
+        for (DataBag bag1 : ascBags ) {
+            for (DataBag bag2 : ascBags ) {
+                assertEquals("Comparing hashcode for " + bag1.getClass() + " and " + bag2.getClass()
+ " failed", bag1.hashCode(), bag2.hashCode());
+                assertEquals("Comparing content of Bag for " + bag1.getClass() + " and "
+ bag2.getClass() + " failed", bag1, bag2);
+            }
+        }
+        // Comparing against each other again but now with different insertion
+        // order.  (including from the same type of Bag class)
+        for (DataBag ascBag : ascBags) {
+            for (DataBag dscBag : dscBags) {
+                assertEquals("Comparing hashcode for " + ascBag.getClass() + " and " + dscBag.getClass()
+ " failed", ascBag.hashCode(), dscBag.hashCode());
+                assertEquals("Comparing content of Bag for " + ascBag.getClass() + " and
" + dscBag.getClass() + " failed", ascBag, dscBag);
+            }
+        }
+    }
+
+    @Test
+    public void testBagHashCodeAndEqualsForSingleTuple() throws Exception {
+        TupleFactory tf = TupleFactory.getInstance();
+
+        // For each datatype, making sure bag hashcode and content match
+        for( Object input: giveMeOneOfEach() ) {
+            DataBag [] singleItemBags = {new SortedDataBag(null), new DefaultDataBag(), new
DistinctDataBag(), new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(),
new NonSpillableDataBag(), new SingleTupleBag(null)};
+            DataBag [] twoItemBags = {new SortedDataBag(null), new DefaultDataBag(), new
DistinctDataBag(), new InternalCachedBag(), new InternalDistinctBag(), new InternalSortedBag(),
new NonSpillableDataBag()};
+
+            for( DataBag singleItemBag : singleItemBags ) {
+                singleItemBag.add(tf.newTuple(input));
+            }
+
+            for( DataBag singleItemBag1 : singleItemBags ) {
+                for( DataBag singleItemBag2 : singleItemBags ) {
+                    assertEquals("Comparing hashcode for " + singleItemBag1.getClass()
+                                 + " and " + singleItemBag2.getClass() + " failed",
+                                 singleItemBag1.hashCode(), singleItemBag2.hashCode());
+                    assertEquals("Comparing content of Bag for " + singleItemBag1.getClass()
+                                 + " and " + singleItemBag2.getClass() + " failed",
+                                 singleItemBag1, singleItemBag2);
+                }
+            }
+
+            // making sure no two-items-bag would match with single item bag
+            for( DataBag twoItemBag : twoItemBags ) {
+                twoItemBag.add(tf.newTuple(input));
+                twoItemBag.add(tf.newTuple(new String("Unique string not to overlap with
a tuple above")));
+            }
+
+            for( DataBag twoItemBag : twoItemBags ) {
+                for( DataBag singleItemBag : singleItemBags ) {
+                    assertNotEquals(twoItemBag, singleItemBag);
+                }
+            }
+        }
+    }
+
     private Tuple giveMeOneOfEach() throws Exception {
         TupleFactory tf = TupleFactory.getInstance();
 



Mime
View raw message