Return-Path: X-Original-To: apmail-pig-commits-archive@www.apache.org Delivered-To: apmail-pig-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id B394ADCBF for ; Mon, 22 Oct 2012 22:29:28 +0000 (UTC) Received: (qmail 16411 invoked by uid 500); 22 Oct 2012 22:29:28 -0000 Delivered-To: apmail-pig-commits-archive@pig.apache.org Received: (qmail 16380 invoked by uid 500); 22 Oct 2012 22:29:28 -0000 Mailing-List: contact commits-help@pig.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pig.apache.org Delivered-To: mailing list commits@pig.apache.org Received: (qmail 16373 invoked by uid 99); 22 Oct 2012 22:29:28 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 22 Oct 2012 22:29:28 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 22 Oct 2012 22:29:26 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 7EE7823888E4 for ; Mon, 22 Oct 2012 22:28:43 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1401112 - in /pig/branches/branch-0.11: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/data/ src/org/apache/pig/impl/io/ test/ test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ Date: Mon, 22 Oct 2012 22:28:43 -0000 To: commits@pig.apache.org From: jcoveney@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20121022222843.7EE7823888E4@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: jcoveney Date: Mon Oct 22 22:28:42 2012 New Revision: 1401112 URL: http://svn.apache.org/viewvc?rev=1401112&view=rev Log: PIG-2975: TestTypedMap.testOrderBy failing with incorrect result (knoguchi via jcoveney) Added: pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java Modified: pig/branches/branch-0.11/CHANGES.txt pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java pig/branches/branch-0.11/test/unit-tests Modified: pig/branches/branch-0.11/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/CHANGES.txt?rev=1401112&r1=1401111&r2=1401112&view=diff ============================================================================== --- pig/branches/branch-0.11/CHANGES.txt (original) +++ pig/branches/branch-0.11/CHANGES.txt Mon Oct 22 22:28:42 2012 @@ -308,9 +308,7 @@ OPTIMIZATIONS BUG FIXES -PIG-2950: Fix tiny documentation error in BagToString builtin (initialcontext via daijy) - -PIG-2967: Fix Glob_local test failure for Pig E2E Test Framework (sushantj via daijy) +PIG-2975: TestTypedMap.testOrderBy failing with incorrect result (knoguchi via jcoveney) PIG-1283: COUNT on null bag causes failure (analog.sony via jcoveney) Modified: pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java?rev=1401112&r1=1401111&r2=1401112&view=diff ============================================================================== --- pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java (original) +++ pig/branches/branch-0.11/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigBytesRawComparator.java Mon Oct 22 22:28:42 2012 @@ -21,14 +21,11 @@ import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.mapred.JobConf; - -import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.BinInterSedes; import org.apache.pig.data.DataType; import org.apache.pig.impl.io.NullableBytesWritable; import org.apache.pig.impl.util.ObjectSerializer; @@ -37,11 +34,11 @@ public class PigBytesRawComparator exten private final Log mLog = LogFactory.getLog(getClass()); private boolean[] mAsc; - private BytesWritable.Comparator mWrappedComp; + private WritableComparator mWrappedComp; public PigBytesRawComparator() { super(NullableBytesWritable.class); - mWrappedComp = new BytesWritable.Comparator(); + mWrappedComp = new BinInterSedes.BinInterSedesTupleRawComparator(); } public void setConf(Configuration conf) { @@ -63,6 +60,7 @@ public class PigBytesRawComparator exten mAsc = new boolean[1]; mAsc[0] = true; } + ((BinInterSedes.BinInterSedesTupleRawComparator)mWrappedComp).setConf(conf); } public Configuration getConf() { @@ -70,17 +68,69 @@ public class PigBytesRawComparator exten } /** - * Compare two NullableBytesWritables as raw bytes. If neither are null, - * then IntWritable.compare() is used. If both are null then the indices - * are compared. Otherwise the null one is defined to be less. + * Compare two NullableBytesWritables as raw bytes. + * If both are null, then the indices are compared. + * If neither are null + * and both are bytearrays, then direct Writable.compareBytes is used. + * For non-bytearrays, we use BinInterSedesTupleRawComparator. + * If either is null, null one is defined to be less. */ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { int rc = 0; // If either are null, handle differently. if (b1[s1] == 0 && b2[s2] == 0) { - // Subtract 2, one for null byte and one for index byte - rc = mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2); + // Checking if both of them are ByteArrays. + // b1[s1+1] is always TUPLE_1 so skipping. + // b1[s1+2] contains the datatype followed by a datasize. + // No need to read the actual size since it is given from l1&l2. + // We just need to skip those bytes here. + // This shortcut is placed here for performances purposes.(PIG-2975) + boolean dataByteArraysCompare=true; + int offset1,offset2,length1,length2; + switch(b1[s1 + 2]) { + case BinInterSedes.TINYBYTEARRAY: + // skipping mNull, TUPLE_1, TINYBYTEARRAY(1byte), size(1byte) + offset1 = s1 + 4; + length1 = l1 - 4; + break; + case BinInterSedes.SMALLBYTEARRAY: + // skipping mNull, TUPLE_1, SMALLBYTEARRAY(1byte), size(2byte) + offset1 = s1 + 5; + length1 = l1 - 5; + break; + case BinInterSedes.BYTEARRAY: + // skipping mNull, TUPLE_1, BYTEARRAY(1byte), size(4byte) + offset1 = s1 + 7; + length1 = l1 - 7; + break; + default: + offset1 = length1 = 0; + dataByteArraysCompare = false; + } + switch(b2[s2 + 2]) { + case BinInterSedes.TINYBYTEARRAY: + offset2 = s2 + 4; + length2 = l2 - 4; + break; + case BinInterSedes.SMALLBYTEARRAY: + offset2 = s2 + 5; + length2 = l2 - 5; + break; + case BinInterSedes.BYTEARRAY: + offset2 = s2 + 7; + length2 = l2 - 7; + break; + default: + offset2 = length2 = 0; + dataByteArraysCompare=false; + } + if( dataByteArraysCompare ) { + rc = WritableComparator.compareBytes(b1, offset1, length1, b2, offset2, length2); + } else { + // Subtract 2, one for null byte and one for index byte + rc = mWrappedComp.compare(b1, s1 + 1, l1 - 2, b2, s2 + 1, l2 - 2); + } } else { // For sorting purposes two nulls are equal. if (b1[s1] != 0 && b2[s2] != 0) rc = 0; Modified: pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java?rev=1401112&r1=1401111&r2=1401112&view=diff ============================================================================== --- pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java (original) +++ pig/branches/branch-0.11/src/org/apache/pig/data/BinInterSedes.java Mon Oct 22 22:28:42 2012 @@ -862,11 +862,9 @@ public class BinInterSedes implements In if (type1 == type2) { int basz1 = readSize(bb1, dt1); int basz2 = readSize(bb2, dt2); - byte[] ba1 = new byte[basz1]; - byte[] ba2 = new byte[basz2]; - bb1.get(ba1); - bb2.get(ba2); - rc = DataByteArray.compare(ba1, ba2); + rc = org.apache.hadoop.io.WritableComparator.compareBytes( + bb1.array(), bb1.position(), basz1, + bb2.array(), bb2.position(), basz2); } break; } Modified: pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java?rev=1401112&r1=1401111&r2=1401112&view=diff ============================================================================== --- pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java (original) +++ pig/branches/branch-0.11/src/org/apache/pig/impl/io/NullableBytesWritable.java Mon Oct 22 22:28:42 2012 @@ -25,28 +25,32 @@ import org.apache.pig.data.TupleFactory; * */ public class NullableBytesWritable extends PigNullableWritable { + private static final TupleFactory mTupleFactory = TupleFactory.getInstance(); public NullableBytesWritable() { - mValue = TupleFactory.getInstance().newTuple(); + mValue = mTupleFactory.newTuple(); } /** * @param obj */ public NullableBytesWritable(Object obj) { - mValue = TupleFactory.getInstance().newTuple(); - ((Tuple)mValue).append(obj); + mValue = mTupleFactory.newTuple(1); + try { + ((Tuple)mValue).set(0, obj); + } catch (ExecException e) { + throw new RuntimeException(e); + } } public Object getValueAsPigType() { - if (isNull()) + if (isNull()) { return null; - Object obj = null; + } try { - obj = ((Tuple)mValue).get(0); + return ((Tuple)mValue).get(0); } catch (ExecException e) { throw new RuntimeException(e); } - return obj; } } Added: pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java?rev=1401112&view=auto ============================================================================== --- pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java (added) +++ pig/branches/branch-0.11/test/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/TestPigBytesRawComparator.java Mon Oct 22 22:28:42 2012 @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.backend.hadoop.executionengine.mapReduceLayer; + +import static junit.framework.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.DataOutput; +import java.io.DataOutputStream; +import java.util.Arrays; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapred.JobConf; +import org.apache.pig.data.BinInterSedes; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.impl.io.NullableBytesWritable; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class TestPigBytesRawComparator { + private static PigBytesRawComparator bytesRawComparator ; + private static ByteArrayOutputStream bas; + + + @BeforeClass + public static void setupBeforeClass() throws Exception { + bytesRawComparator = new PigBytesRawComparator(); + bytesRawComparator.setConf(new JobConf()); + + //this bytesarray is used for holding serialized NullableBytesWritable + bas = new ByteArrayOutputStream(BinInterSedes.UNSIGNED_SHORT_MAX + 10); + } + @AfterClass + public static void setupAfterClass() throws Exception { + bytesRawComparator = null; + bas = null; + } + + @Test + public void testBoolean() throws Exception { + assertTrue("boolean True and False considered equal", + compareTwoObjectsAsNullableBytesWritables( + new Boolean(true), new Boolean(false)) != 0); + } + + + @Test + public void testSingleInt0and1() throws Exception { + assertTrue("Integer 0 and Integer 1 considered equal", + compareTwoObjectsAsNullableBytesWritables( + new Integer(0), new Integer(1)) != 0); + } + + @Test + public void testSingleInt() throws Exception { + assertTrue("Integer 5 and Integer 7 considered equal", + compareTwoObjectsAsNullableBytesWritables( + new Integer(5), new Integer(7)) != 0); + } + + @Test + public void testSingleLong0and1() throws Exception { + assertTrue("Long 0 and Long 1 considered equal", + compareTwoObjectsAsNullableBytesWritables( + new Long(0), new Long(1)) != 0); + } + + @Test + public void testSingleLong() throws Exception { + assertTrue("Long 5 and Long 7 considered equal", + compareTwoObjectsAsNullableBytesWritables( + new Long(5), new Long(7)) != 0); + } + + @Test + public void testSingleByte() throws Exception { + assertTrue("Byte 5 and Byte 7 considered equal", + compareTwoObjectsAsNullableBytesWritables( + new Byte((byte)5), new Byte((byte)7)) != 0); + } + + @Test + public void testSingleByteDiffByteArray() throws Exception { + assertTrue("'ab' and 'ac' considered equal", + compareTwoObjectsAsNullableBytesWritables( + new DataByteArray(new byte[] {'a','b'}), + new DataByteArray(new byte[] {'a','c'})) != 0); + assertTrue("'ab' and 'abc' considered equal", + compareTwoObjectsAsNullableBytesWritables( + new DataByteArray(new byte[] {'a','b'}), + new DataByteArray(new byte[] {'a','b','c'})) != 0); + assertTrue("'ab' and 'cb' considered equal", + compareTwoObjectsAsNullableBytesWritables( + new DataByteArray(new byte[] {'a','b'}), + new DataByteArray(new byte[] {'c','b'})) != 0); + assertTrue("'a' and 'b' considered equal", + compareTwoObjectsAsNullableBytesWritables( + new DataByteArray(new byte[] {'a'}), + new DataByteArray(new byte[] {'b'})) != 0); + } + + @Test + public void testDifferentType() throws Exception { + assertTrue("Integer 9999 and Long 9999 considered equal", + compareTwoObjectsAsNullableBytesWritables(new Integer(9999), new Long(9999)) != 0 ); + } + + @Test + public void testByteArrayAlphabeticalOrderingByLength() throws Exception { + // making sure we order as + // '1' + // '2' + // '22' + // '222' and repeats... + // If compare includes the header, order suddenly changes when header + // changes from INTEGER_INSHORT to INTEGER. + // Here, we're making sure that doesn't happen. + // + for(int i = BinInterSedes.UNSIGNED_BYTE_MAX - 3 ; + i < BinInterSedes.UNSIGNED_BYTE_MAX + 3; i++){ + DataByteArray dba1 = createRepeatedByteArray((byte)'2', i); + DataByteArray dba2 = createRepeatedByteArray((byte)'2', i + 1); + assertTrue("ByteArray order changed at UNSIGNED_BYTE_MAX boundary", + compareTwoObjectsAsNullableBytesWritables(dba1, dba2) < 0); + } + for(int i = BinInterSedes.UNSIGNED_SHORT_MAX -3 ; + i < BinInterSedes.UNSIGNED_SHORT_MAX + 3; i++){ + DataByteArray dba1 = createRepeatedByteArray((byte)'2', i); + DataByteArray dba2 = createRepeatedByteArray((byte)'2', i + 1); + assertTrue("ByteArray order changed at UNSIGNED_SHORT_MAX boundary", + compareTwoObjectsAsNullableBytesWritables(dba1, dba2) < 0); + } + } + + @Test + public void testLongByteArrays() throws Exception { + for(int length: new int [] {BinInterSedes.UNSIGNED_BYTE_MAX + 50, + BinInterSedes.UNSIGNED_SHORT_MAX + 50} ) { + // To make sure that offset & length are set correctly for + // BinInterSedes.SMALLBYTEARRAY and BinInterSedes.BYTEARRAY, + // create a long bytearray and compare with first or last byte changed + + byte [] ba1 = new byte[length]; + byte [] ba2 = new byte[length]; + Arrays.fill(ba1, 0, length, (byte)'a'); + Arrays.fill(ba2, 0, length, (byte)'a'); + //changing only the last byte + ba2[length-1] = 'b'; + assertTrue("ByteArray with length: " + length + + " compare failed with the last byte", + compareTwoObjectsAsNullableBytesWritables( + new DataByteArray(ba1), new DataByteArray(ba2)) != 0); + //setting back + ba2[length-1] = 'a'; + + //now changing the first byte + ba2[0] = 'b'; + assertTrue("ByteArray with length: " + length + + " compare failed with the first byte", + compareTwoObjectsAsNullableBytesWritables( + new DataByteArray(ba1), new DataByteArray(ba2)) != 0); + } + } + + private DataByteArray createRepeatedByteArray(byte c, int num) { + byte [] ba = new byte[num]; + Arrays.fill(ba, 0, num, c); + return new DataByteArray(ba); + } + + // Wrap the passed object with NullableBytesWritable and return the serialized + // form. + private byte [] serializeAsNullableBytesWritable(Object obj) throws Exception { + NullableBytesWritable nbw = new NullableBytesWritable(obj); + bas.reset(); + DataOutput dout = new DataOutputStream(bas); + nbw.write(dout); + return bas.toByteArray(); + } + + // Take two objects wrapped by NullableBytesWritable and comare using + // PigBytesRawComparator + private int compareTwoObjectsAsNullableBytesWritables(Object obj1, Object obj2) throws Exception { + byte [] ba1 = serializeAsNullableBytesWritable(obj1); + byte [] ba2 = serializeAsNullableBytesWritable(obj2); + return bytesRawComparator.compare(ba1, 0, ba1.length, ba2, 0, ba2.length); + } +} Modified: pig/branches/branch-0.11/test/unit-tests URL: http://svn.apache.org/viewvc/pig/branches/branch-0.11/test/unit-tests?rev=1401112&r1=1401111&r2=1401112&view=diff ============================================================================== --- pig/branches/branch-0.11/test/unit-tests (original) +++ pig/branches/branch-0.11/test/unit-tests Mon Oct 22 22:28:42 2012 @@ -44,6 +44,7 @@ **/TestNull.java **/TestPackage.java **/TestParamSubPreproc.java +**/TestPigBytesRawComparator.java **/TestPhyOp.java **/TestPigScriptParser.java **/TestPigSplit.java