Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id F081E200D16 for ; Tue, 10 Oct 2017 17:44:14 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id EEEFC160BE0; Tue, 10 Oct 2017 15:44:14 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 403B11609E5 for ; Tue, 10 Oct 2017 17:44:14 +0200 (CEST) Received: (qmail 30604 invoked by uid 500); 10 Oct 2017 15:44:13 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 30595 invoked by uid 99); 10 Oct 2017 15:44:13 -0000 Received: from Unknown (HELO svn01-us-west.apache.org) (209.188.14.144) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 10 Oct 2017 15:44:13 +0000 Received: from svn01-us-west.apache.org (localhost [127.0.0.1]) by svn01-us-west.apache.org (ASF Mail Server at svn01-us-west.apache.org) with ESMTP id 93F673A000C for ; Tue, 10 Oct 2017 15:44:12 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1811718 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java Date: Tue, 10 Oct 2017 15:44:12 -0000 To: commits@pdfbox.apache.org From: tilman@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20171010154412.93F673A000C@svn01-us-west.apache.org> archived-at: Tue, 10 Oct 2017 15:44:15 -0000 Author: tilman Date: Tue Oct 10 15:44:12 2017 New Revision: 1811718 URL: http://svn.apache.org/viewvc?rev=1811718&view=rev Log: PDFBOX-3958: support UTF-16 Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java?rev=1811718&r1=1811717&r2=1811718&view=diff ============================================================================== --- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java (original) +++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURI.java Tue Oct 10 15:44:12 2017 @@ -82,7 +82,7 @@ public class PDActionURI extends PDActio /** * This will get the uniform resource identifier to resolve. It should be encoded in 7-bit - * ASCII, but UTF-8 is supported too. + * ASCII, but UTF-8 and UTF-16 are supported too. * * @return The URI entry of the specific URI action dictionary or null if there isn't any. */ @@ -91,7 +91,21 @@ public class PDActionURI extends PDActio COSBase base = action.getDictionaryObject(COSName.URI); if (base instanceof COSString) { - return new String(((COSString) base).getBytes(), Charsets.UTF_8); + byte[] bytes = ((COSString) base).getBytes(); + if (bytes.length >= 2) + { + // UTF-16 (BE) + if ((bytes[0] & 0xFF) == 0xFE && (bytes[1] & 0xFF) == 0xFF) + { + return action.getString(COSName.URI); + } + // UTF-16 (LE) + if ((bytes[0] & 0xFF) == 0xFF && (bytes[1] & 0xFF) == 0xFE) + { + return action.getString(COSName.URI); + } + } + return new String(bytes, Charsets.UTF_8); } return null; } Modified: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java?rev=1811718&r1=1811717&r2=1811718&view=diff ============================================================================== --- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java (original) +++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/interactive/action/PDActionURITest.java Tue Oct 10 15:44:12 2017 @@ -15,6 +15,9 @@ */ package org.apache.pdfbox.pdmodel.interactive.action; +import java.io.IOException; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.cos.COSString; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import org.junit.Test; @@ -37,4 +40,45 @@ public class PDActionURITest actionURI.setURI("http://組匶替綎.com/"); assertEquals("http://経営承継.com/", actionURI.getURI()); } + + /** + * PDFBOX-3913: Check that URIs encoded in UTF16 (BE) are also supported. + * + * @throws IOException + */ + @Test + public void testUTF16BEURI() throws IOException + { + PDActionURI actionURI = new PDActionURI(); + + // found in govdocs file 534948.pdf + COSString utf16URI = COSString.parseHex("FEFF0068007400740070003A002F002F00770077" + + "0077002E006E00610070002E006500640075002F0063006100740061006C006F006700" + + "2F00310031003100340030002E00680074006D006C"); + actionURI.getCOSObject().setItem(COSName.URI, utf16URI); + assertEquals("http://www.nap.edu/catalog/11140.html", actionURI.getURI()); + } + + /** + * PDFBOX-3913: Check that URIs encoded in UTF16 (LE) are also supported. + * + * @throws IOException + */ + @Test + public void testUTF16LEURI() throws IOException + { + PDActionURI actionURI = new PDActionURI(); + + COSString utf16URI = COSString.parseHex("FFFE68007400740070003A00"); + actionURI.getCOSObject().setItem(COSName.URI, utf16URI); + assertEquals("http:", actionURI.getURI()); + } + + @Test + public void testUTF7URI() + { + PDActionURI actionURI = new PDActionURI(); + actionURI.setURI("http://pdfbox.apache.org/"); + assertEquals("http://pdfbox.apache.org/", actionURI.getURI()); + } }