pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From roh...@apache.org
Subject svn commit: r1746359 - in /pig/branches/branch-0.16: CHANGES.txt src/org/apache/pig/data/utils/SedesHelper.java
Date Tue, 31 May 2016 22:59:59 GMT
Author: rohini
Date: Tue May 31 22:59:59 2016
New Revision: 1746359

URL: http://svn.apache.org/viewvc?rev=1746359&view=rev
Log:
PIG-4821: Pig chararray field with special UTF-8 chars as part of tuple join key produces
wrong results in Tez (rohini)

Modified:
    pig/branches/branch-0.16/CHANGES.txt
    pig/branches/branch-0.16/src/org/apache/pig/data/utils/SedesHelper.java

Modified: pig/branches/branch-0.16/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/CHANGES.txt?rev=1746359&r1=1746358&r2=1746359&view=diff
==============================================================================
--- pig/branches/branch-0.16/CHANGES.txt (original)
+++ pig/branches/branch-0.16/CHANGES.txt Tue May 31 22:59:59 2016
@@ -131,6 +131,8 @@ PIG-4639: Add better parser for Apache H
 
 BUG FIXES
 
+PIG-4821: Pig chararray field with special UTF-8 chars as part of tuple join key produces
wrong results in Tez (rohini)
+
 PIG-4734: TOMAP schema inferring breaks some scripts in type checking for bincond (daijy)
 
 PIG-4786: CROSS will not work correctly with Grace Parallelism (daijy)

Modified: pig/branches/branch-0.16/src/org/apache/pig/data/utils/SedesHelper.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/data/utils/SedesHelper.java?rev=1746359&r1=1746358&r2=1746359&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/data/utils/SedesHelper.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/data/utils/SedesHelper.java Tue May 31 22:59:59
2016
@@ -61,25 +61,25 @@ public class SedesHelper {
     public static void writeChararray(DataOutput out, String s) throws IOException {
         // a char can take up to 3 bytes in the modified utf8 encoding
         // used by DataOutput.writeUTF, so use UNSIGNED_SHORT_MAX/3
-        if (s.length() < BinInterSedes.UNSIGNED_SHORT_MAX / 3) {
+        byte[] utfBytes = s.getBytes(BinInterSedes.UTF8);
+        int length = utfBytes.length;
+        if (length < BinInterSedes.UNSIGNED_SHORT_MAX) {
             out.writeByte(BinInterSedes.SMALLCHARARRAY);
-            out.writeUTF(s);
+            out.writeShort(length);
         } else {
-            byte[] utfBytes = s.getBytes(BinInterSedes.UTF8);
-            int length = utfBytes.length;
-
             out.writeByte(BinInterSedes.CHARARRAY);
             out.writeInt(length);
-            out.write(utfBytes);
         }
+        out.write(utfBytes);
     }
 
     public static String readChararray(DataInput in, byte type) throws IOException {
+        int size;
         if (type == BinInterSedes.SMALLCHARARRAY) {
-            return in.readUTF();
+            size = in.readUnsignedShort();
+        } else {
+            size = in.readInt();
         }
-
-        int size = in.readInt();
         byte[] buf = new byte[size];
         in.readFully(buf);
         return new String(buf, BinInterSedes.UTF8);



Mime
View raw message