pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1752336 - in /pdfbox/branches/2.0/pdfbox/src: main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java main/java/org/apache/pdfbox/util/Hex.java test/java/org/apache/pdfbox/util/TestHexUtil.java
Date Tue, 12 Jul 2016 19:24:20 GMT
Author: tilman
Date: Tue Jul 12 19:24:20 2016
New Revision: 1752336

URL: http://svn.apache.org/viewvc?rev=1752336&view=rev
Log:
PDFBOX-3418: optimize string to hex conversion, as suggested by Michael Doswald

Added:
    pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java
      - copied unchanged from r1752335, pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestHexUtil.java
Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java?rev=1752336&r1=1752335&r2=1752336&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
(original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/ToUnicodeWriter.java
Tue Jul 12 19:24:20 2016
@@ -26,6 +26,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
 
 /**
  * Writes ToUnicode Mapping Files.
@@ -154,15 +155,15 @@ final class ToUnicodeWriter
             {
                 int index = batch * 100 + j;
                 writer.write('<');
-                writer.write(toHex(srcFrom.get(index)));
+                writer.write(Hex.getChars(srcFrom.get(index).shortValue()));
                 writer.write("> ");
 
                 writer.write('<');
-                writer.write(toHex(srcTo.get(index)));
+                writer.write(Hex.getChars(srcTo.get(index).shortValue()));
                 writer.write("> ");
 
-                writer.write("<");
-                writer.write(stringToHex(dstString.get(index)));
+                writer.write('<');
+                writer.write(Hex.getCharsUTF16BE(dstString.get(index)));
                 writer.write(">\n");
             }
             writeLine(writer, "endbfrange\n");
@@ -182,20 +183,4 @@ final class ToUnicodeWriter
         writer.write(text);
         writer.write('\n');
     }
-
-    private String toHex(int num)
-    {
-        return String.format("%04X", num);
-    }
-
-    private String stringToHex(String text)
-    {
-        // use of non-BMP code points requires PDF 1.5 or later, otherwise we're limited
to UCS-2
-        StringBuilder sb = new StringBuilder();
-        for (byte b : text.getBytes(Charsets.UTF_16BE))
-        {
-            sb.append(String.format("%02X", b));
-        }
-        return sb.toString();
-    }
 }

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java?rev=1752336&r1=1752335&r2=1752336&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java Tue Jul 12 19:24:20
2016
@@ -33,9 +33,8 @@ public final class Hex
      * https://stackoverflow.com/questions/2817752/java-code-to-convert-byte-to-hexadecimal
      *
      */
-    private static final String HEXES_STRING = "0123456789ABCDEF";
-
-    private static final byte[] HEXES = HEXES_STRING.getBytes(Charsets.US_ASCII);
+    private static final byte[] HEX_BYTES = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
+    private static final char[] HEX_CHARS = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
 
     private Hex() {}
 
@@ -44,7 +43,7 @@ public final class Hex
      */
     public static String getString(byte b)
     {
-        char[] chars = new char[]{HEXES_STRING.charAt(getHighNibble(b)), HEXES_STRING.charAt(getLowNibble(b))};
+        char[] chars = new char[]{HEX_CHARS[getHighNibble(b)], HEX_CHARS[getLowNibble(b)]};
         return new String(chars);
     }
 
@@ -56,7 +55,7 @@ public final class Hex
         StringBuilder string = new StringBuilder(bytes.length * 2);
         for (byte b : bytes)
         {
-            string.append(HEXES_STRING.charAt(getHighNibble(b))).append(HEXES_STRING.charAt(getLowNibble(b)));
+            string.append(HEX_CHARS[getHighNibble(b)]).append(HEX_CHARS[getLowNibble(b)]);
         }
         return string.toString();
     }
@@ -66,7 +65,7 @@ public final class Hex
      */
     public static byte[] getBytes(byte b)
     {
-        return new byte[]{HEXES[getHighNibble(b)], HEXES[getLowNibble(b)]};
+        return new byte[]{HEX_BYTES[getHighNibble(b)], HEX_BYTES[getLowNibble(b)]};
     }
     
     /**
@@ -77,13 +76,57 @@ public final class Hex
         byte[] asciiBytes = new byte[bytes.length*2];
         for(int i=0; i< bytes.length; i++)
         {
-            asciiBytes[i*2] = HEXES[getHighNibble(bytes[i])];
-            asciiBytes[i*2+1] = HEXES[getLowNibble(bytes[i])];
+            asciiBytes[i*2] = HEX_BYTES[getHighNibble(bytes[i])];
+            asciiBytes[i*2+1] = HEX_BYTES[getLowNibble(bytes[i])];
         }
         return asciiBytes;
     }
 
     /** 
+     * Returns the characters corresponding to the ASCII hex encoding of the given short.
+     */
+    public static char[] getChars(short num)
+    {
+        char[] hex = new char[4];
+        hex[0] = HEX_CHARS[(num >> 12) & 0x0F];
+        hex[1] = HEX_CHARS[(num >> 8) & 0x0F];
+        hex[2] = HEX_CHARS[(num >> 4) & 0x0F];
+        hex[3] = HEX_CHARS[num & 0x0F];
+        return hex;
+    }
+
+    /**
+     * Takes the characters in the given string, convert it to bytes in UTF16-BE format
+     * and build a char array that corresponds to the ASCII hex encoding of the resulting
+     * bytes.
+     *
+     * Example:
+     * <pre>
+     *   getCharsUTF16BE("ab") == new char[]{'0','0','6','1','0','0','6','2'}
+     * </pre>
+     *
+     * @param text The string to convert
+     * @return The string converted to hex
+     */
+    public static char[] getCharsUTF16BE(String text)
+    {
+        // Note that the internal representation of string in Java is already UTF-16. Therefore
+        // we do not need to use an encoder to convert the string to its byte representation.
+        char[] hex = new char[text.length()*4];
+
+        for (int stringIdx = 0, charIdx = 0; stringIdx < text.length(); stringIdx++)
+        {
+            char c = text.charAt(stringIdx);
+            hex[charIdx++] = HEX_CHARS[(c >> 12) & 0x0F];
+            hex[charIdx++] = HEX_CHARS[(c >> 8) & 0x0F];
+            hex[charIdx++] = HEX_CHARS[(c >> 4) & 0x0F];
+            hex[charIdx++] = HEX_CHARS[c & 0x0F];
+        }
+
+        return hex;
+    }
+
+    /**
      * Writes the given byte as hex value to the given output stream.
      * @param b the byte to be written
      * @param output the output stream to be written to
@@ -91,8 +134,8 @@ public final class Hex
      */
     public static void writeHexByte(byte b, OutputStream output) throws IOException
     {
-        output.write(HEXES[getHighNibble(b)]);
-        output.write(HEXES[getLowNibble(b)]);
+        output.write(HEX_BYTES[getHighNibble(b)]);
+        output.write(HEX_BYTES[getLowNibble(b)]);
     }
 
     /** 



Mime
View raw message