Return-Path: X-Original-To: apmail-commons-commits-archive@minotaur.apache.org Delivered-To: apmail-commons-commits-archive@minotaur.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1E3A291F9 for ; Sat, 24 Mar 2012 05:33:05 +0000 (UTC) Received: (qmail 31793 invoked by uid 500); 24 Mar 2012 05:33:03 -0000 Delivered-To: apmail-commons-commits-archive@commons.apache.org Received: (qmail 31470 invoked by uid 500); 24 Mar 2012 05:32:57 -0000 Mailing-List: contact commits-help@commons.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@commons.apache.org Delivered-To: mailing list commits@commons.apache.org Received: (qmail 31446 invoked by uid 99); 24 Mar 2012 05:32:56 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 24 Mar 2012 05:32:56 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 24 Mar 2012 05:32:53 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id CD3B223889BB for ; Sat, 24 Mar 2012 05:32:31 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1304709 - in /commons/proper/compress/trunk/src: changes/changes.xml main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Date: Sat, 24 Mar 2012 05:32:31 -0000 To: commits@commons.apache.org From: bodewig@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20120324053231.CD3B223889BB@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: bodewig Date: Sat Mar 24 05:32:31 2012 New Revision: 1304709 URL: http://svn.apache.org/viewvc?rev=1304709&view=rev Log: optionally use PAX headers when writing non-ASCII file names. COMPRESS-183 Modified: commons/proper/compress/trunk/src/changes/changes.xml commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Modified: commons/proper/compress/trunk/src/changes/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1304709&r1=1304708&r2=1304709&view=diff ============================================================================== --- commons/proper/compress/trunk/src/changes/changes.xml (original) +++ commons/proper/compress/trunk/src/changes/changes.xml Sat Mar 24 05:32:31 2012 @@ -46,6 +46,17 @@ The type attribute can be add,u + + The tar package now allows the encoding of file names to be + specified and can optionally use PAX extension headers to + write non-ASCII file names. + The stream classes now write (or expect to read) archives that + use the platform's native encoding for file names. Apache + Commons Compress 1.3 used to strip everything but the lower + eight bits of each character which effectively only worked for + ASCII and ISO-8859-1 file names. + This new default behavior is a breaking change. + TarArchiveInputStream failed to parse PAX headers that contained non-ASCII characters. Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java?rev=1304709&r1=1304708&r2=1304709&view=diff ============================================================================== --- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java (original) +++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStream.java Sat Mar 24 05:32:31 2012 @@ -81,6 +81,10 @@ public class TarArchiveOutputStream exte private final ZipEncoding encoding; + private boolean addPaxHeadersForNonAsciiNames = false; + private static final ZipEncoding ASCII = + ZipEncodingHelper.getZipEncoding("ASCII"); + /** * Constructor for TarInputStream. * @param os the output stream to use @@ -172,6 +176,13 @@ public class TarArchiveOutputStream exte this.bigNumberMode = bigNumberMode; } + /** + * Whether to add a PAX extension header for non-ASCII file names. + * @since Apache Commons Compress 1.4 + */ + public void setAddPaxHeadersForNonAsciiNames(boolean b) { + addPaxHeadersForNonAsciiNames = b; + } @Deprecated @Override @@ -254,11 +265,14 @@ public class TarArchiveOutputStream exte } TarArchiveEntry entry = (TarArchiveEntry) archiveEntry; Map paxHeaders = new HashMap(); - final byte[] nameBytes = encoding.encode(entry.getName()).array(); + final String entryName = entry.getName(); + final byte[] nameBytes = encoding.encode(entryName).array(); + boolean paxHeaderContainsPath = false; if (nameBytes.length >= TarConstants.NAMELEN) { if (longFileMode == LONGFILE_POSIX) { - paxHeaders.put("path", entry.getName()); + paxHeaders.put("path", entryName); + paxHeaderContainsPath = true; } else if (longFileMode == LONGFILE_GNU) { // create a TarEntry for the LongLink, the contents // of which are the entry's name @@ -271,7 +285,7 @@ public class TarArchiveOutputStream exte write(0); // NUL terminator closeArchiveEntry(); } else if (longFileMode != LONGFILE_TRUNCATE) { - throw new RuntimeException("file name '" + entry.getName() + throw new RuntimeException("file name '" + entryName + "' is too long ( > " + TarConstants.NAMELEN + " bytes)"); } @@ -283,8 +297,13 @@ public class TarArchiveOutputStream exte failForBigNumbers(entry); } + if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath + && !ASCII.canEncode(entryName)) { + paxHeaders.put("path", entryName); + } + if (paxHeaders.size() > 0) { - writePaxHeaders(entry.getName(), paxHeaders); + writePaxHeaders(entryName, paxHeaders); } entry.writeEntryHeader(recordBuf, encoding, @@ -298,7 +317,7 @@ public class TarArchiveOutputStream exte } else { currSize = entry.getSize(); } - currName = entry.getName(); + currName = entryName; haveUnclosedEntry = true; } @@ -426,7 +445,7 @@ public class TarArchiveOutputStream exte */ void writePaxHeaders(String entryName, Map headers) throws IOException { - String name = "./PaxHeaders.X/" + entryName; + String name = "./PaxHeaders.X/" + stripTo7Bits(entryName); if (name.length() >= TarConstants.NAMELEN) { name = name.substring(0, TarConstants.NAMELEN - 1); } @@ -461,6 +480,18 @@ public class TarArchiveOutputStream exte closeArchiveEntry(); } + private String stripTo7Bits(String name) { + final int length = name.length(); + StringBuffer result = new StringBuffer(length); + for (int i = 0; i < length; i++) { + char stripped = (char) (name.charAt(i) & 0x7F); + if (stripped != 0) { // would be read as Trailing null + result.append(stripped); + } + } + return result.toString(); + } + /** * Write an EOF (end of archive) record to the tar archive. * An EOF record consists of a record of all zeros. Modified: commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java?rev=1304709&r1=1304708&r2=1304709&view=diff ============================================================================== --- commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java (original) +++ commons/proper/compress/trunk/src/test/java/org/apache/commons/compress/archivers/tar/TarArchiveOutputStreamTest.java Sat Mar 24 05:32:31 2012 @@ -274,4 +274,27 @@ public class TarArchiveOutputStreamTest } } + public void testWriteNonAsciiPathNamePaxHeader() throws Exception { + String n = "\u00e4"; + TarArchiveEntry t = new TarArchiveEntry(n); + t.setSize(10 * 1024); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + TarArchiveOutputStream tos = new TarArchiveOutputStream(bos); + tos.setAddPaxHeadersForNonAsciiNames(true); + tos.putArchiveEntry(t); + tos.write(new byte[10 * 1024]); + tos.closeArchiveEntry(); + tos.close(); + byte[] data = bos.toByteArray(); + assertEquals("11 path=" + n + "\n", + new String(data, 512, 11, "UTF-8")); + FileOutputStream fos = new FileOutputStream("/tmp/x"); + fos.write(data); + fos.close(); + TarArchiveInputStream tin = + new TarArchiveInputStream(new ByteArrayInputStream(data)); + TarArchiveEntry e = tin.getNextTarEntry(); + assertEquals(n, e.getName()); + } + } \ No newline at end of file