ant-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "aborisevich" <aborisev...@beldts.de>
Subject Including national characters int Tar Entry names into archive.
Date Tue, 19 May 2009 15:31:57 GMT
Hello everyone. I have found the next bug using present  org.apache.tools.tar package. Tar
Archive was created on one system (for example Windows XP  - default charset CP-1251). This
tar archive contains TarEntries were named with using national characters like German umlauts.
Than this archive file was copied on Linux system (default charset UTF-8)  - after unpackin
this archive file there - information was lost (TarEntries names were lost). There is possible
solution for this problem.


Index: ant-core/src/main/org/apache/tools/tar/TarInputStream.java
===================================================================
--- ant-core/src/main/org/apache/tools/tar/TarInputStream.java (revision 776302)
+++ ant-core/src/main/org/apache/tools/tar/TarInputStream.java (working copy)
@@ -264,10 +264,10 @@
         if (currEntry != null && currEntry.isGNULongNameEntry()) {
             // read in the name
             StringBuffer longName = new StringBuffer();
-            byte[] buf = new byte[SMALL_BUFFER_SIZE];
+            byte[] buf = new byte[(int)currEntry.getSize()];
             int length = 0;
             while ((length = read(buf)) >= 0) {
-                longName.append(new String(buf, 0, length));
+                longName.append(new String(buf, 0, length, "UTF-8"));
             }
             getNextEntry();
             if (currEntry == null) {
Index: ant-core/src/main/org/apache/tools/tar/TarOutputStream.java
===================================================================
--- ant-core/src/main/org/apache/tools/tar/TarOutputStream.java (revision 776302)
+++ ant-core/src/main/org/apache/tools/tar/TarOutputStream.java (working copy)
@@ -179,9 +179,10 @@
                 TarEntry longLinkEntry = new TarEntry(TarConstants.GNU_LONGLINK,
                                                       TarConstants.LF_GNUTYPE_LONGNAME);
 
-                longLinkEntry.setSize(entry.getName().length() + 1);
+                byte[] nameBytes = entry.getName().getBytes("UTF-8");
+                longLinkEntry.setSize(nameBytes.length + 1);
                 putNextEntry(longLinkEntry);
-                write(entry.getName().getBytes());
+               write(nameBytes);
                 write(0);
                 closeEntry();
             } else if (longFileMode != LONGFILE_TRUNCATE) {
Index: ant-core/src/main/org/apache/tools/tar/TarUtils.java
===================================================================
--- ant-core/src/main/org/apache/tools/tar/TarUtils.java (revision 776302)
+++ ant-core/src/main/org/apache/tools/tar/TarUtils.java (working copy)
@@ -23,6 +23,8 @@
 
 package org.apache.tools.tar;
 
+import java.io.UnsupportedEncodingException;
+
 /**
  * This class provides static utility methods to work with byte streams.
  *
@@ -79,15 +81,21 @@
      * @return The header's entry name.
      */
     public static StringBuffer parseName(byte[] header, int offset, int length) {
-        StringBuffer result = new StringBuffer(length);
+        StringBuffer result = null;
+        int nameLen = length;
+
         int          end = offset + length;
-
         for (int i = offset; i < end; ++i) {
-            if (header[i] == 0) {
+            if(header[i] == 0) {
+                nameLen = i - offset;
                 break;
             }
+        }
 
-            result.append((char) header[i]);
+        try {
+            result = new StringBuffer(new String(header, offset, nameLen, "UTF-8"));
+        } catch(UnsupportedEncodingException e) {
+           e.printStackTrace();
         }
 
         return result;
@@ -103,18 +111,23 @@
      * @return The number of bytes in a header's entry name.
      */
     public static int getNameBytes(StringBuffer name, byte[] buf, int offset, int length)
{
-        int i;
+         int nameLength = -1;
+         try
+         {
+             byte nameBytes[] = name.toString().getBytes("UTF-8");
+             nameLength = nameBytes.length ;
+             System.arraycopy(nameBytes, 0, buf, offset, nameLength);
+         } catch(UnsupportedEncodingException e) {
+             e.printStackTrace();
+         }
 
-        for (i = 0; i < length && i < name.length(); ++i) {
-            buf[offset + i] = (byte) name.charAt(i);
-        }
 
-        for (; i < length; ++i) {
-            buf[offset + i] = 0;
-        }
+         for (; nameLength < length; ++nameLength) {
+             buf[offset + nameLength] = 0;
+         }
 
-        return offset + length;
-    }
+         return offset + length;
+     }
 
     /**
      * Parse an octal integer from a header buffer.


Best Regards.
Alexander Borisevich  
BelDTS Minsk Belarus
Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message