From common-commits-return-10519-apmail-hadoop-common-commits-archive=hadoop.apache.org@hadoop.apache.org Wed Feb 10 22:15:59 2010 Return-Path: Delivered-To: apmail-hadoop-common-commits-archive@www.apache.org Received: (qmail 24547 invoked from network); 10 Feb 2010 22:15:59 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 10 Feb 2010 22:15:59 -0000 Received: (qmail 12532 invoked by uid 500); 10 Feb 2010 22:15:58 -0000 Delivered-To: apmail-hadoop-common-commits-archive@hadoop.apache.org Received: (qmail 12475 invoked by uid 500); 10 Feb 2010 22:15:58 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: common-dev@hadoop.apache.org Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 12466 invoked by uid 99); 10 Feb 2010 22:15:58 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 10 Feb 2010 22:15:58 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=10.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 10 Feb 2010 22:15:56 +0000 Received: by eris.apache.org (Postfix, from userid 65534) id 356F2238889B; Wed, 10 Feb 2010 22:15:35 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r908680 - in /hadoop/common/branches/branch-0.21: CHANGES.txt src/java/org/apache/hadoop/io/UTF8.java src/test/core/org/apache/hadoop/io/TestUTF8.java Date: Wed, 10 Feb 2010 22:15:14 -0000 To: common-commits@hadoop.apache.org From: cutting@apache.org X-Mailer: svnmailer-1.0.8 Message-Id: <20100210221535.356F2238889B@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: cutting Date: Wed Feb 10 22:14:39 2010 New Revision: 908680 URL: http://svn.apache.org/viewvc?rev=908680&view=rev Log: HADOOP-6522. Fix decoding of codepoint zero in UTF8. Modified: hadoop/common/branches/branch-0.21/CHANGES.txt hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/UTF8.java hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/TestUTF8.java Modified: hadoop/common/branches/branch-0.21/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.21/CHANGES.txt?rev=908680&r1=908679&r2=908680&view=diff ============================================================================== --- hadoop/common/branches/branch-0.21/CHANGES.txt (original) +++ hadoop/common/branches/branch-0.21/CHANGES.txt Wed Feb 10 22:14:39 2010 @@ -1147,6 +1147,8 @@ HADOOP-6290. Prevent duplicate slf4j-simple jar via Avro's classpath. (Owen O'Malley via cdouglas) + HADOOP-6522. Fix decoding of codepoint zero in UTF8. (cutting) + Release 0.20.2 - Unreleased NEW FEATURES Modified: hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/UTF8.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/UTF8.java?rev=908680&r1=908679&r2=908680&view=diff ============================================================================== --- hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/UTF8.java (original) +++ hadoop/common/branches/branch-0.21/src/java/org/apache/hadoop/io/UTF8.java Wed Feb 10 22:14:39 2010 @@ -253,7 +253,7 @@ int utf8Length = 0; for (int i = 0; i < stringLength; i++) { int c = string.charAt(i); - if ((c >= 0x0001) && (c <= 0x007F)) { + if (c <= 0x007F) { utf8Length++; } else if (c > 0x07FF) { utf8Length += 3; @@ -270,7 +270,7 @@ final int end = start + length; for (int i = start; i < end; i++) { int code = s.charAt(i); - if (code >= 0x01 && code <= 0x7F) { + if (code <= 0x7F) { out.writeByte((byte)code); } else if (code <= 0x07FF) { out.writeByte((byte)(0xC0 | ((code >> 6) & 0x1F))); Modified: hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/TestUTF8.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/TestUTF8.java?rev=908680&r1=908679&r2=908680&view=diff ============================================================================== --- hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/TestUTF8.java (original) +++ hadoop/common/branches/branch-0.21/src/test/core/org/apache/hadoop/io/TestUTF8.java Wed Feb 10 22:14:39 2010 @@ -22,6 +22,7 @@ import java.util.Random; /** Unit tests for UTF8. */ +@SuppressWarnings("deprecation") public class TestUTF8 extends TestCase { public TestUTF8(String name) { super(name); } @@ -37,13 +38,13 @@ } public void testWritable() throws Exception { - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 10000; i++) { TestWritable.testWritable(new UTF8(getTestString())); } } public void testGetBytes() throws Exception { - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 10000; i++) { // generate a random string String before = getTestString(); @@ -57,7 +58,7 @@ DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); - for (int i = 0; i < 10; i++) { + for (int i = 0; i < 10000; i++) { // generate a random string String before = getTestString(); @@ -82,5 +83,14 @@ } } + + public void testNullEncoding() throws Exception { + String s = new String(new char[] { 0 }); + + DataOutputBuffer dob = new DataOutputBuffer(); + new UTF8(s).write(dob); + + assertEquals(s, new String(dob.getData(), 2, dob.getLength()-2, "UTF-8")); + } }