pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jahew...@apache.org
Subject svn commit: r1644828 [1/2] - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/contentstream/ pdfbox/src/main/java/org/apache/pdfbox/cos/ pdfbox/src/main/java/org/apache/pdfbox/encoding/ pdfbox/src/main/java/org/apache/pdfbox/filter/ pdfbox/src/...
Date Fri, 12 Dec 2014 04:39:38 GMT
Author: jahewson
Date: Fri Dec 12 04:39:37 2014
New Revision: 1644828

URL: http://svn.apache.org/r1644828
Log:
PDFBOX-1242: Remove usage of COSString with content streams

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Charsets.java   (with props)
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Hex.java   (with props)
Removed:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/persistence/util/COSHEXTable.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/StringUtil.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encoding/
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/StandardSecurityHandler.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/fdf/FDFDictionary.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/digitalsignature/PDSignature.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/documentnavigation/destination/PDNamedDestination.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAppearanceString.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSString.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
    pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractText.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/contentstream/PDFStreamEngine.java Fri Dec 12 04:39:37 2014
@@ -79,9 +79,6 @@ public class PDFStreamEngine
     private boolean isProcessingPage;
     private Matrix initialMatrix;
 
-    // skip malformed or otherwise unparseable input where possible
-    private boolean forceParsing;
-
     /**
      * Creates a new PDFStreamEngine.
      */
@@ -90,26 +87,6 @@ public class PDFStreamEngine
     }
 
     /**
-     * Indicates if force parsing is activated.
-     * 
-     * @return true if force parsing is active
-     */
-    public boolean isForceParsing()
-    {
-        return forceParsing;
-    }
-
-    /**
-     * Enable/Disable force parsing.
-     * 
-     * @param forceParsingValue true activates force parsing
-     */
-    public void setForceParsing(boolean forceParsingValue)
-    {
-        forceParsing = forceParsingValue;
-    }
-
-    /**
      * Register a custom operator processor with the engine.
      * 
      * @param operator The operator as a string.
@@ -473,7 +450,7 @@ public class PDFStreamEngine
     private void processStreamOperators(PDContentStream contentStream) throws IOException
     {
         List<COSBase> arguments = new ArrayList<COSBase>();
-        PDFStreamParser parser = new PDFStreamParser(contentStream.getContentStream(), forceParsing);
+        PDFStreamParser parser = new PDFStreamParser(contentStream.getContentStream());
         try
         {
             Iterator<Object> iter = parser.getTokenIterator();

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Fri Dec 12 04:39:37 2014
@@ -91,34 +91,16 @@ public class COSDocument extends COSBase
     private final File scratchDirectory;
     
     private final boolean useScratchFile;
-    
-    /**
-     * Flag to skip malformed or otherwise unparseable input where possible.
-     */
-    private final boolean forceParsing;
-
-    /**
-     * Constructor.
-     *
-     * @param forceParsingValue flag to skip malformed or otherwise unparseable
-     *                     document content where possible
-     */
-    public COSDocument(boolean forceParsingValue) 
-    {
-        this(null, forceParsingValue, false);
-    }
 
     /**
      * Constructor.
      *
-     * @param forceParsingValue flag to skip malformed or otherwise unparseable
-     *                     document content where possible
      * @param useScratchFiles enables the usage of a scratch file if set to true
      *                     
      */
-    public COSDocument(boolean forceParsingValue, boolean useScratchFiles) 
+    public COSDocument(boolean useScratchFiles)
     {
-        this(null, forceParsingValue, useScratchFiles);
+        this(null, useScratchFiles);
     }
 
     /**
@@ -128,14 +110,11 @@ public class COSDocument extends COSBase
      *
      * @param scratchDir directory for the temporary file,
      *                   or <code>null</code> to use the system default
-     * @param forceParsingValue flag to skip malformed or otherwise unparseable
-     *                     document content where possible
      * @param useScratchFiles enables the usage of a scratch file if set to true
      * 
      */
-    public COSDocument(File scratchDir, boolean forceParsingValue, boolean useScratchFiles) 
+    public COSDocument(File scratchDir, boolean useScratchFiles)
     {
-        forceParsing = forceParsingValue;
         scratchDirectory = scratchDir;
         useScratchFile = useScratchFiles;
     }
@@ -145,7 +124,7 @@ public class COSDocument extends COSBase
      */
     public COSDocument()
     {
-        this(false, false);
+        this(false);
     }
 
     /**
@@ -620,8 +599,7 @@ public class COSDocument extends COSBase
         for( COSObject objStream : getObjectsByType( COSName.OBJ_STM ) )
         {
             COSStream stream = (COSStream)objStream.getObject();
-            PDFObjectStreamParser parser =
-                new PDFObjectStreamParser(stream, this, forceParsing);
+            PDFObjectStreamParser parser = new PDFObjectStreamParser(stream, this);
             parser.parse();
             for( COSObject next : parser.getObjects() )
             {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java Fri Dec 12 04:39:37 2014
@@ -21,11 +21,12 @@ import java.io.OutputStream;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
+import org.apache.pdfbox.util.Charsets;
 
-import org.apache.pdfbox.persistence.util.COSHEXTable;
 
 /**
- * A PDF named object.
+ * A PDF Name object.
+ *
  * @author Ben Litchfield
  */
 public final class COSName extends COSBase implements Comparable<COSName>
@@ -37,12 +38,6 @@ public final class COSName extends COSBa
     // hey are already defined as static constants and don't need to be synchronized
     private static Map<String, COSName> commonNameMap = new HashMap<String, COSName>();
 
-    /** The prefix to a PDF name. */
-    public static final byte[] NAME_PREFIX = new byte[] { 47 }; // The / character
-
-    /** The escape character for a name. */
-    public static final byte[] NAME_ESCAPE = new byte[] { 35 }; // The # character
-
     //
     // IMPORTANT: this list is *alphabetized* and does not need any JavaDoc
     //
@@ -578,15 +573,9 @@ public final class COSName extends COSBa
     }
 
     @Override
-    public boolean equals(Object o)
+    public boolean equals(Object object)
     {
-        boolean retval = this == o;
-        if (!retval && o instanceof COSName)
-        {
-            COSName other = (COSName) o;
-            retval = name == other.name || name.equals(other.name);
-        }
-        return retval;
+        return object instanceof COSName && name.equals(((COSName) object).name);
     }
 
     @Override
@@ -624,39 +613,37 @@ public final class COSName extends COSBa
      */
     public void writePDF(OutputStream output) throws IOException
     {
-        output.write(NAME_PREFIX);
-        byte[] bytes = getName().getBytes("ISO-8859-1");
-        for (int i = 0; i < bytes.length; i++)
+        output.write('/');
+        byte[] bytes = getName().getBytes(Charsets.US_ASCII);
+        for (byte b : bytes)
         {
-            int current = (bytes[i] + 256) % 256;
+            int current = (b + 256) % 256;
 
-            // Be more restrictive than the PDF spec, "Name Objects"
-            // see PDFBOX-2073
-            if ((current >= 'A' && current <= 'Z')
-                    || (current >= 'a' && current <= 'z')
-                    || (current >= '0' && current <= '9')
-                    || current == '+'
-                    || current == '-'
-                    || current == '_'
-                    || current == '@'
-                    || current == '*'
-                    || current == '$'
-                    || current == ';'
-                    || current == '.')
+            // be more restrictive than the PDF spec, "Name Objects", see PDFBOX-2073
+            if (current >= 'A' && current <= 'Z' ||
+                    current >= 'a' && current <= 'z' ||
+                    current >= '0' && current <= '9' ||
+                    current == '+' ||
+                    current == '-' ||
+                    current == '_' ||
+                    current == '@' ||
+                    current == '*' ||
+                    current == '$' ||
+                    current == ';' ||
+                    current == '.')
             {
                 output.write(current);
             }
             else
             {
-                output.write(NAME_ESCAPE);
-                output.write(COSHEXTable.TABLE[current]);
+                output.write('#');
+                output.write(String.format("%02X", current).getBytes(Charsets.US_ASCII));
             }
         }
     }
 
     /**
-     * Not usually needed except if resources need to be reclaimed in a long running process. Patch provided by
-     * flester@GMail.com incorporated 5/23/08, Danielwilson@users.SourceForge.net
+     * Not usually needed except if resources need to be reclaimed in a long running process.
      */
     public static synchronized void clearResources()
     {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSString.java Fri Dec 12 04:39:37 2014
@@ -18,221 +18,70 @@ package org.apache.pdfbox.cos;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.Charset;
 
+import java.util.Arrays;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.pdfbox.encoding.PDFDocEncodingCharset;
-import org.apache.pdfbox.persistence.util.COSHEXTable;
+import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
 
 /**
- * This represents a string object in a PDF document.
+ * A string object, which may be a text string, a PDFDocEncoded string, ASCII string, or byte string.
+ *
+ * <p>Text strings are used for character strings that contain information intended to be
+ * human-readable, such as text annotations, bookmark names, article names, document information,
+ * and so forth.
+ *
+ * <p>PDFDocEncoded strings are used for characters that are represented in a single byte.
+ *
+ * <p>ASCII strings are used for characters that are represented in a single byte using ASCII
+ * encoding.
+ *
+ * <p>Byte strings are used for binary data represented as a series of bytes, but the encoding is
+ * not known. The bytes of the string need not represent characters.
  * 
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * @version $Revision: 1.30 $
+ * @author Ben Litchfield
+ * @author John Hewson
  */
-public class COSString extends COSBase
+public final class COSString extends COSBase
 {
-
-    /**
-     * Log instance.
-     */
     private static final Log LOG = LogFactory.getLog(COSString.class);
 
-    /**
-     * One of the open string tokens.
-     */
-    public static final byte[] STRING_OPEN = new byte[] { 40 }; // "(".getBytes();
-    /**
-     * One of the close string tokens.
-     */
-    public static final byte[] STRING_CLOSE = new byte[] { 41 }; // ")".getBytes( "ISO-8859-1" );
-    /**
-     * One of the open string tokens.
-     */
-    public static final byte[] HEX_STRING_OPEN = new byte[] { 60 }; // "<".getBytes( "ISO-8859-1" );
-    /**
-     * One of the close string tokens.
-     */
-    public static final byte[] HEX_STRING_CLOSE = new byte[] { 62 }; // ">".getBytes( "ISO-8859-1" );
-    /**
-     * the escape character in strings.
-     */
-    public static final byte[] ESCAPE = new byte[] { 92 }; // "\\".getBytes( "ISO-8859-1" );
-
-    /**
-     * CR escape characters.
-     */
-    public static final byte[] CR_ESCAPE = new byte[] { 92, 114 }; // "\\r".getBytes( "ISO-8859-1" );
-    /**
-     * LF escape characters.
-     */
-    public static final byte[] LF_ESCAPE = new byte[] { 92, 110 }; // "\\n".getBytes( "ISO-8859-1" );
-    /**
-     * HT escape characters.
-     */
-    public static final byte[] HT_ESCAPE = new byte[] { 92, 116 }; // "\\t".getBytes( "ISO-8859-1" );
-    /**
-     * BS escape characters.
-     */
-    public static final byte[] BS_ESCAPE = new byte[] { 92, 98 }; // "\\b".getBytes( "ISO-8859-1" );
-    /**
-     * FF escape characters.
-     */
-    public static final byte[] FF_ESCAPE = new byte[] { 92, 102 }; // "\\f".getBytes( "ISO-8859-1" );
-
-    private ByteArrayOutputStream out = null;
-    private String str = null;
-
-    /**
-     * Forces the string to be serialized in hex form but not literal form, the default is to stream in literal form.
-     */
-    private boolean forceHexForm = false;
-
-    /**
-     * Constructor.
-     */
-    public COSString()
-    {
-        out = new ByteArrayOutputStream();
-    }
-
-    /**
-     * Explicit constructor for ease of manual PDF construction.
-     * 
-     * @param value
-     *            The string value of the object.
-     */
-    public COSString(String value)
-    {
-        try
-        {
-            boolean unicode16 = false;
-            char[] chars = value.toCharArray();
-            int length = chars.length;
-            for (int i = 0; i < length; i++)
-            {
-                if (chars[i] > 255)
-                {
-                    unicode16 = true;
-                    break;
-                }
-            }
-            if (unicode16)
-            {
-                byte[] data = value.getBytes("UTF-16BE");
-                out = new ByteArrayOutputStream(data.length + 2);
-                out.write(0xFE);
-                out.write(0xFF);
-                out.write(data);
-            }
-            else
-            {
-                byte[] data = value.getBytes("ISO-8859-1");
-                out = new ByteArrayOutputStream(data.length);
-                out.write(data);
-            }
-        }
-        catch (IOException ignore)
-        {
-            LOG.error(ignore,ignore);
-            // should never happen
-        }
-    }
-
-    /**
-     * Explicit constructor for ease of manual PDF construction.
-     * 
-     * @param value
-     *            The string value of the object.
-     */
-    public COSString(byte[] value)
-    {
-        try
-        {
-            out = new ByteArrayOutputStream(value.length);
-            out.write(value);
-        }
-        catch (IOException ignore)
-        {
-            LOG.error(ignore,ignore);
-            // should never happen
-        }
-    }
-
-    /**
-     * Forces the string to be written in literal form instead of hexadecimal form.
-     * 
-     * @param v
-     *            if v is true the string will be written in literal form, otherwise it will be written in hexa if
-     *            necessary.
-     */
-
-    public void setForceLiteralForm(boolean v)
-    {
-        forceHexForm = !v;
-    }
-
-    /**
-     * Forces the string to be written in hexadecimal form instead of literal form.
-     * 
-     * @param v
-     *            if v is true the string will be written in hexadecimal form otherwise it will be written in literal if
-     *            necessary.
-     */
-
-    public void setForceHexForm(boolean v)
-    {
-        forceHexForm = v;
-    }
+    // legacy behaviour for old PDFParser
+    public static final boolean FORCE_PARSING =
+            Boolean.getBoolean("org.apache.pdfbox.forceParsing");
 
     /**
      * This will create a COS string from a string of hex characters.
-     * 
-     * @param hex
-     *            A hex string.
+     *
+     * @param hex A hex string.
      * @return A cos string with the hex characters converted to their actual bytes.
-     * @throws IOException
-     *             If there is an error with the hex string.
+     * @throws IOException If there is an error with the hex string.
      */
-    public static COSString createFromHexString(String hex) throws IOException
+    public static COSString parseHex(String hex) throws IOException
     {
-        return createFromHexString(hex, false);
-    }
-
-    /**
-     * Creates a COS string from a string of hex characters, optionally ignoring malformed input.
-     * 
-     * @param hex
-     *            A hex string.
-     * @param force
-     *            flag to ignore malformed input
-     * @return A cos string with the hex characters converted to their actual bytes.
-     * @throws IOException
-     *             If there is an error with the hex string.
-     */
-    public static COSString createFromHexString(String hex, boolean force) throws IOException
-    {
-        COSString retval = new COSString();
+        ByteArrayOutputStream bytes = new ByteArrayOutputStream();
         StringBuilder hexBuffer = new StringBuilder(hex.trim());
+
         // if odd number then the last hex digit is assumed to be 0
         if (hexBuffer.length() % 2 != 0)
         {
             hexBuffer.append('0');
         }
+
         int length = hexBuffer.length();
         for (int i = 0; i < length; i += 2)
         {
             try
             {
-                retval.append(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
+                bytes.write(Integer.parseInt(hexBuffer.substring(i, i + 2), 16));
             }
             catch (NumberFormatException e)
             {
-                if (force)
+                if (FORCE_PARSING)
                 {
-                    retval.append('?');
+                    LOG.warn("Encountered a malformed hex string");
+                    bytes.write('?'); // todo: what does Acrobat do? Any example PDFs?
                 }
                 else
                 {
@@ -240,201 +89,154 @@ public class COSString extends COSBase
                 }
             }
         }
-        return retval;
+
+        return new COSString(bytes.toByteArray());
     }
 
+    private byte[] bytes;
+    private boolean forceHexForm;
+
     /**
-     * This will take this string and create a hex representation of the bytes that make the string.
-     * 
-     * @return A hex string representing the bytes in this string.
+     * Creates a new PDF string from a byte array. This method can be used to read a string from
+     * an existing PDF file, or to create a new byte string.
+     *
+     * @param bytes The raw bytes of the PDF text string or byte string.
      */
-    public String getHexString()
+    public COSString(byte[] bytes)
     {
-        StringBuilder retval = new StringBuilder(out.size() * 2);
-        byte[] data = getBytes();
-        int length = data.length;
-        for (int i = 0; i < length; i++)
-        {
-            retval.append(COSHEXTable.HEX_TABLE[(data[i] + 256) % 256]);
-        }
-
-        return retval.toString();
+        setValue(bytes);
     }
 
     /**
-     * This will get the string that this object wraps.
-     * 
-     * @return The wrapped string.
+     * Creates a new <i>text string</i> from a Java String.
+     *
+     * @param text The string value of the object.
      */
-    public String getString()
+    public COSString(String text)
     {
-        if (this.str != null)
+        // check whether the string uses only characters available in PDFDocEncoding
+        boolean isOnlyPDFDocEncoding = true;
+        for (char c : text.toCharArray())
+        {
+            if (!PDFDocEncoding.containsChar(c))
+            {
+                isOnlyPDFDocEncoding = false;
+                break;
+            }
+        }
+
+        if (isOnlyPDFDocEncoding)
         {
-            return this.str;
+            // PDFDocEncoded string
+            bytes = PDFDocEncoding.getBytes(text);
         }
-        String retval;
-        Charset charset = PDFDocEncodingCharset.INSTANCE;
-        byte[] data = getBytes();
-        int start = 0;
-        if (data.length > 2)
+        else
         {
-            if (data[0] == (byte) 0xFF && data[1] == (byte) 0xFE)
+            // UTF-16BE encoded string with a leading byte order marker
+            byte[] data = text.getBytes(Charsets.UTF_16BE);
+            ByteArrayOutputStream out = new ByteArrayOutputStream(data.length + 2);
+            out.write(0xFE); // BOM
+            out.write(0xFF); // BOM
+            try
             {
-                charset = Charset.forName("UTF-16LE");
-                start = 2;
+                out.write(data);
             }
-            else if (data[0] == (byte) 0xFE && data[1] == (byte) 0xFF)
+            catch (IOException e)
             {
-                charset = Charset.forName("UTF-16BE");
-                start = 2;
+                // should never happen
+                throw new RuntimeException(e);
             }
+            bytes = out.toByteArray();
         }
+    }
 
-        retval = new String(data, start, data.length - start, charset);
-        str = retval;
-        return retval;
+    /**
+     * Sets the raw value of this string.
+     *
+     * @param value The raw bytes of the PDF text string or byte string.
+     */
+    public void setValue(byte[] value)
+    {
+        bytes = value.clone();
     }
 
     /**
-     * This will append a byte[] to the string.
-     * 
-     * @param data
-     *            The byte[] to add to this string.
-     * 
-     * @throws IOException
-     *             If an IO error occurs while writing the byte.
+     * Sets whether or not to force the string is to be written in hex form.
+     * This is needed when signing PDF files.
+     *
+     * @param value True to force hex.
      */
-    public void append(byte[] data) throws IOException
+    public void setForceHexForm(boolean value)
     {
-        out.write(data);
-        this.str = null;
+        this.forceHexForm = value;
     }
 
     /**
-     * This will append a byte to the string.
-     * 
-     * @param in
-     *            The byte to add to this string.
-     * 
-     * @throws IOException
-     *             If an IO error occurs while writing the byte.
+     * Returns true if the string is to be written in hex form.
      */
-    public void append(int in) throws IOException
+    public boolean getForceHexForm()
     {
-        out.write(in);
-        this.str = null;
+        return forceHexForm;
     }
 
     /**
-     * This will reset the internal buffer.
+     * Returns the content of this string as a PDF <i>text string</i>.
      */
-    public void reset()
+    public String getString()
     {
-        out.reset();
-        this.str = null;
+        // text string - BOM indicates Unicode
+        if (bytes.length > 2)
+        {
+            if ((bytes[0] & 0xff) == 0xFE && (bytes[1] & 0xff) == 0xFF)
+            {
+                // UTF-16BE
+                return new String(bytes, 2, bytes.length - 2, Charsets.UTF_16BE);
+            }
+            else if ((bytes[0] & 0xff) == 0xFF && (bytes[1] & 0xff) == 0xFE)
+            {
+                // UTF-16LE - not in the PDF spec!
+                return new String(bytes, 2, bytes.length - 2, Charsets.UTF_16LE);
+            }
+        }
+
+        // otherwise use PDFDocEncoding
+        return PDFDocEncoding.toString(bytes);
     }
 
     /**
-     * This will get the bytes of the string.
-     * 
-     * @return A byte array that represents the string.
+     * Returns the content of this string as a PDF <i>ASCII string</i>.
      */
-    public byte[] getBytes()
+    public String getASCII()
     {
-        return out.toByteArray();
+        // ASCII string
+        return new String(bytes, Charsets.US_ASCII);
     }
 
     /**
-     * {@inheritDoc}
+     * Returns the raw bytes of the string. Best used with a PDF <i>byte string</i>.
      */
-    @Override
-    public String toString()
+    public byte[] getBytes()
     {
-        return "COSString{" + this.getString() + "}";
+        return bytes;
     }
 
     /**
-     * This will output this string as a PDF object.
-     * 
-     * @param output
-     *            The stream to write to.
-     * @throws IOException
-     *             If there is an error writing to the stream.
-     */
-    public void writePDF(OutputStream output) throws IOException
-    {
-        boolean outsideASCII = false;
-        // Lets first check if we need to escape this string.
-        byte[] bytes = getBytes();
-        int length = bytes.length;
-        for (int i = 0; i < length && !outsideASCII; i++)
-        {
-            // if the byte is negative then it is an eight bit byte and is
-            // outside the ASCII range.
-            outsideASCII = bytes[i] < 0;
-        }
-        if (!outsideASCII && !forceHexForm)
-        {
-            output.write(STRING_OPEN);
-            for (int i = 0; i < length; i++)
-            {
-                int b = (bytes[i] + 256) % 256;
-                switch (b)
-                {
-                case '(':
-                case ')':
-                case '\\':
-                {
-                    output.write(ESCAPE);
-                    output.write((byte) b);
-                    break;
-                }
-                case 10: // LF
-                {
-                    output.write(LF_ESCAPE);
-                    break;
-                }
-                case 13: // CR
-                {
-                    output.write(CR_ESCAPE);
-                    break;
-                }
-                case '\t':
-                {
-                    output.write(HT_ESCAPE);
-                    break;
-                }
-                case '\b':
-                {
-                    output.write(BS_ESCAPE);
-                    break;
-                }
-                case '\f':
-                {
-                    output.write(FF_ESCAPE);
-                    break;
-                }
-                default:
-                {
-                    output.write((byte) b);
-                }
-                }
-            }
-            output.write(STRING_CLOSE);
-        }
-        else
+     * This will take this string and create a hex representation of the bytes that make the string.
+     *
+     * @return A hex string representing the bytes in this string.
+     */
+    public String toHexString()
+    {
+        StringBuilder sb = new StringBuilder(bytes.length * 2);
+        for (byte b : bytes)
         {
-            output.write(HEX_STRING_OPEN);
-            for (int i = 0; i < length; i++)
-            {
-                output.write(COSHEXTable.TABLE[(bytes[i] + 256) % 256]);
-            }
-            output.write(HEX_STRING_CLOSE);
+            sb.append(Hex.getString(b));
         }
+        return sb.toString();
     }
 
     /**
-     * visitor pattern double dispatch method.
+     * Visitor pattern double dispatch method.
      * 
      * @param visitor The object to notify when visiting this object.
      * @return any object, depending on the visitor implementation, or null
@@ -446,27 +248,22 @@ public class COSString extends COSBase
         return visitor.visitFromString(this);
     }
 
-    /**
-     * {@inheritDoc}
-     */
     @Override
     public boolean equals(Object obj)
     {
         if (obj instanceof COSString)
         {
             COSString strObj = (COSString) obj;
-            return this.getString().equals(strObj.getString()) && this.forceHexForm == strObj.forceHexForm;
+            return getString().equals(strObj.getString()) &&
+                   forceHexForm == strObj.forceHexForm;
         }
         return false;
     }
 
-    /**
-     * {@inheritDoc}
-     */
     @Override
     public int hashCode()
     {
-        int result = getString().hashCode();
-        return result += forceHexForm ? 17 : 0;
+        int result = Arrays.hashCode(bytes);
+        return result + (forceHexForm ? 17 : 0);
     }
 }

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java?rev=1644828&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java Fri Dec 12 04:39:37 2014
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.cos;
+
+import java.io.ByteArrayOutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The "PDFDocEncoding" encoding. Note that this is *not* a Type 1 font encoding, it is used only
+ * within PDF "text strings".
+ */
+class PDFDocEncoding
+{
+    private static final char REPLACEMENT_CHARACTER = '\uFFFD';
+
+    private static final int[] codeToUni;
+    private static final Map<Character, Integer> uniToCode;
+
+    static
+    {
+        codeToUni = new int[256];
+        uniToCode = new HashMap<Character, Integer>(256);
+
+        // initialize with basically ISO-8859-1
+        for (int i = 0; i < 256; i++)
+        {
+            set(i, (char)i);
+        }
+
+        // then do all deviations (based on the table in ISO 32000-1:2008)
+        // block 1
+        set(0x18, '\u02D8'); // BREVE
+        set(0x19, '\u02C7'); // CARON
+        set(0x1A, '\u02C6'); // MODIFIER LETTER CIRCUMFLEX ACCENT
+        set(0x1B, '\u02D9'); // DOT ABOVE
+        set(0x1C, '\u02DD'); // DOUBLE ACUTE ACCENT
+        set(0x1D, '\u02DB'); // OGONEK
+        set(0x1E, '\u02DA'); // RING ABOVE
+        set(0x1F, '\u02DC'); // SMALL TILDE
+        // block 2
+        set(0x7F, REPLACEMENT_CHARACTER); // undefined
+        set(0x80, '\u2022'); // BULLET
+        set(0x81, '\u2020'); // DAGGER
+        set(0x82, '\u2021'); // DOUBLE DAGGER
+        set(0x83, '\u2026'); // HORIZONTAL ELLIPSIS
+        set(0x84, '\u2014'); // EM DASH
+        set(0x85, '\u2013'); // EN DASH
+        set(0x86, '\u0192'); // LATIN SMALL LETTER SCRIPT F
+        set(0x87, '\u2044'); // FRACTION SLASH (solidus)
+        set(0x88, '\u2039'); // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+        set(0x89, '\u203A'); // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+        set(0x8A, '\u2212'); // MINUS SIGN
+        set(0x8B, '\u2030'); // PER MILLE SIGN
+        set(0x8C, '\u201E'); // DOUBLE LOW-9 QUOTATION MARK (quotedblbase)
+        set(0x8D, '\u201C'); // LEFT DOUBLE QUOTATION MARK (quotedblleft)
+        set(0x8E, '\u201D'); // RIGHT DOUBLE QUOTATION MARK (quotedblright)
+        set(0x8F, '\u2018'); // LEFT SINGLE QUOTATION MARK (quoteleft)
+        set(0x90, '\u2019'); // RIGHT SINGLE QUOTATION MARK (quoteright)
+        set(0x91, '\u201A'); // SINGLE LOW-9 QUOTATION MARK (quotesinglbase)
+        set(0x92, '\u2122'); // TRADE MARK SIGN
+        set(0x93, '\uFB01'); // LATIN SMALL LIGATURE FI
+        set(0x94, '\uFB02'); // LATIN SMALL LIGATURE FL
+        set(0x95, '\u0141'); // LATIN CAPITAL LETTER L WITH STROKE
+        set(0x96, '\u0152'); // LATIN CAPITAL LIGATURE OE
+        set(0x97, '\u0160'); // LATIN CAPITAL LETTER S WITH CARON
+        set(0x98, '\u0178'); // LATIN CAPITAL LETTER Y WITH DIAERESIS
+        set(0x99, '\u017D'); // LATIN CAPITAL LETTER Z WITH CARON
+        set(0x9A, '\u0131'); // LATIN SMALL LETTER DOTLESS I
+        set(0x9B, '\u0142'); // LATIN SMALL LETTER L WITH STROKE
+        set(0x9C, '\u0153'); // LATIN SMALL LIGATURE OE
+        set(0x9D, '\u0161'); // LATIN SMALL LETTER S WITH CARON
+        set(0x9E, '\u017E'); // LATIN SMALL LETTER Z WITH CARON
+        set(0x9F, REPLACEMENT_CHARACTER); // undefined
+        set(0xA0, '\u20AC'); // EURO SIGN
+        // end of deviations
+    }
+
+    private static void set(int code, char unicode)
+    {
+        codeToUni[code] = unicode;
+        uniToCode.put(unicode, code);
+    }
+
+    /**
+     * Returns the string representation of the given PDFDocEncoded bytes.
+     */
+    public static String toString(byte[] bytes)
+    {
+        StringBuilder sb = new StringBuilder();
+        for (byte b : bytes)
+        {
+            if ((b & 0xff) >= codeToUni.length)
+            {
+                sb.append('?');
+            }
+            else
+            {
+                sb.append((char)codeToUni[b & 0xff]);
+            }
+        }
+        return sb.toString();
+    }
+
+    /**
+     * Returns the given string encoded with PDFDocEncoding.
+     */
+    public static byte[] getBytes(String text)
+    {
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
+        for (char c : text.toCharArray())
+        {
+            Integer code = uniToCode.get(c);
+            if (code == null)
+            {
+                out.write(0);
+            }
+            else
+            {
+                out.write(c);
+            }
+        }
+        return out.toByteArray();
+    }
+
+    /**
+     * Returns true if the given character is available in PDFDocEncoding.
+     *
+     * @param character UTF-16 character
+     */
+    public static boolean containsChar(char character)
+    {
+        return uniToCode.containsKey(character);
+    }
+}

Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/PDFDocEncoding.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/ASCIIHexFilter.java Fri Dec 12 04:39:37 2014
@@ -23,11 +23,11 @@ import java.io.OutputStream;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.cos.COSDictionary;
-
-import org.apache.pdfbox.persistence.util.COSHEXTable;
+import org.apache.pdfbox.util.Hex;
 
 /**
  * Decodes data encoded in an ASCII hexadecimal form, reproducing the original binary data.
+ *
  * @author Ben Litchfield
  */
 final class ASCIIHexFilter extends Filter
@@ -106,7 +106,7 @@ final class ASCIIHexFilter extends Filte
 
     private boolean isEOD(int c)
     {
-        return (c == 62); // '>' - EOD
+        return c == '>';
     }
 
     @Override
@@ -116,8 +116,7 @@ final class ASCIIHexFilter extends Filte
         int byteRead;
         while ((byteRead = input.read()) != -1)
         {
-            int value = (byteRead + 256) % 256;
-            encoded.write(COSHEXTable.TABLE[value]);
+            encoded.write(Hex.getBytes((byte)byteRead));
         }
         encoded.flush();
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Dec 12 04:39:37 2014
@@ -124,12 +124,6 @@ public abstract class BaseParser
     private static final String NULL = "null";
 
     /**
-     * Default value of the {@link #forceParsing} flag.
-     */
-    public static final boolean FORCE_PARSING =
-        Boolean.getBoolean("org.apache.pdfbox.forceParsing");
-
-    /**
      * This is the stream that will be read from.
      */
     protected PushBackInputStream pdfSource;
@@ -140,29 +134,19 @@ public abstract class BaseParser
     protected COSDocument document;
 
     /**
-     * Flag to skip malformed or otherwise unparseable input where possible.
-     */
-    protected final boolean forceParsing;
-
-    /**
      * Default constructor.
      */
     public BaseParser()
     {
-        this.forceParsing = FORCE_PARSING;
     }
 
     /**
      * Constructor.
      *
-     * @since Apache PDFBox 1.3.0
      * @param input The input stream to read the data from.
-     * @param forceParsingValue flag to skip malformed or otherwise unparseable
-     *                     input where possible
      * @throws IOException If there is an error reading the input stream.
      */
-    public BaseParser(InputStream input, boolean forceParsingValue)
-            throws IOException
+    public BaseParser(InputStream input) throws IOException
     {
         int pushbacksize = 65536;
         try
@@ -177,18 +161,6 @@ public abstract class BaseParser
         }
         this.pdfSource = new PushBackInputStream(
                 new BufferedInputStream(input, 16384), pushbacksize);
-        this.forceParsing = forceParsingValue;
-    }
-
-    /**
-     * Constructor.
-     *
-     * @param input The input stream to read the data from.
-     * @throws IOException If there is an error reading the input stream.
-     */
-    public BaseParser(InputStream input) throws IOException 
-    {
-        this(input, FORCE_PARSING);
     }
 
     /**
@@ -471,19 +443,19 @@ public abstract class BaseParser
             {
                 length = ( (COSNumber) streamLength).intValue();
             }
-// commented out next chunk since for the sequentially working PDFParser
-// we do not know if length object is redefined later on and the currently
-// read indirect object might be obsolete (e.g. not referenced in xref table);
-// this would result in reading wrong number of bytes;
-// Thus the only reliable information is a direct length. 
-// This exclusion shouldn't harm much since in case of indirect objects they will
-// typically be defined after the stream object, thus keeping the directly
-// provided length will fix most cases
-//            else if ( ( streamLength instanceof COSObject ) &&
-//                      ( ( (COSObject) streamLength ).getObject() instanceof COSNumber ) )
-//            {
-//                length = ( (COSNumber) ( (COSObject) streamLength ).getObject() ).intValue();
-//            } 
+            // commented out next chunk since for the sequentially working PDFParser
+            // we do not know if length object is redefined later on and the currently
+            // read indirect object might be obsolete (e.g. not referenced in xref table);
+            // this would result in reading wrong number of bytes;
+            // Thus the only reliable information is a direct length.
+            // This exclusion shouldn't harm much since in case of indirect objects they will
+            // typically be defined after the stream object, thus keeping the directly
+            // provided length will fix most cases
+            // else if ( ( streamLength instanceof COSObject ) &&
+            //           ( ( (COSObject) streamLength ).getObject() instanceof COSNumber ) )
+            // {
+            //     length = ( (COSNumber) ( (COSObject) streamLength ).getObject() ).intValue();
+            // }
             
             if ( length == -1 )
             {
@@ -820,7 +792,7 @@ public abstract class BaseParser
     protected COSString parseCOSString() throws IOException
     {
         char nextChar = (char)pdfSource.read();
-        COSString retval = new COSString();
+        ByteArrayOutputStream out = new ByteArrayOutputStream();
         char openBrace;
         char closeBrace;
         if( nextChar == '(' )
@@ -854,13 +826,13 @@ public abstract class BaseParser
                 braces = checkForMissingCloseParen(braces);
                 if( braces != 0 )
                 {
-                    retval.append( ch );
+                    out.write(ch);
                 }
             }
             else if( ch == openBrace )
             {
                 braces++;
-                retval.append( ch );
+                out.write(ch);
             }
             else if( ch == '\\' )
             {
@@ -869,35 +841,35 @@ public abstract class BaseParser
                 switch(next)
                 {
                     case 'n':
-                        retval.append( '\n' );
+                        out.write('\n');
                         break;
                     case 'r':
-                        retval.append( '\r' );
+                        out.write('\r');
                         break;
                     case 't':
-                        retval.append( '\t' );
+                        out.write('\t');
                         break;
                     case 'b':
-                        retval.append( '\b' );
+                        out.write('\b');
                         break;
                     case 'f':
-                        retval.append( '\f' );
+                        out.write('\f');
                         break;
                     case ')':
                         // PDFBox 276 /Title (c:\)
                         braces = checkForMissingCloseParen(braces);
                         if( braces != 0 )
                         {
-                            retval.append( next );
+                            out.write(next);
                         }
                         else
                         {
-                            retval.append('\\');
+                            out.write('\\');
                         }
                         break;
                     case '(':
                     case '\\':
-                        retval.append( next );
+                        out.write(next);
                         break;
                     case 10:
                     case 13:
@@ -950,20 +922,20 @@ public abstract class BaseParser
                         {
                             throw new IOException( "Error: Expected octal character, actual='" + octal + "'" );
                         }
-                        retval.append( character );
+                        out.write(character);
                         break;
                     }
                     default:
                     {
                         // dropping the backslash
                         // see 7.3.4.2 Literal Strings for further information
-                        retval.append( next );
+                        out.write(next);
                     }
                 }
             }
             else
             {
-                retval.append( ch );
+                out.write(ch);
             }
             if (nextc != -2)
             {
@@ -978,7 +950,7 @@ public abstract class BaseParser
         {
             pdfSource.unread(c);
         }
-        return retval;
+        return new COSString(out.toByteArray());
     }
 
     /**
@@ -1044,7 +1016,7 @@ public abstract class BaseParser
                 break;
             }
         }
-        return COSString.createFromHexString( sBuf.toString(), forceParsing );
+        return COSString.parseHex(sBuf.toString());
     }
     
     /**

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java Fri Dec 12 04:39:37 2014
@@ -32,7 +32,6 @@ import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.cos.COSUnread;
 import org.apache.pdfbox.io.RandomAccess;
@@ -297,7 +296,7 @@ public class ConformingPDFParser extends
     protected COSBase processCosObject(String string) throws IOException {
         if(string != null && string.endsWith(">")) {
             // string of hex codes
-            return COSString.createFromHexString(string.replaceAll("^<", "").replaceAll(">$", ""));
+            return COSString.parseHex(string.replaceAll("^<", "").replaceAll(">$", ""));
         }
         return null;
     }
@@ -322,12 +321,12 @@ public class ConformingPDFParser extends
             lastSection = lastSection.replaceAll("]$", "");
             while(!lastSection.startsWith("[")) {
                 if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
-                    array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
+                    array.add(COSString.parseHex(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
                 lastSection = readBackwardUntilWhitespace();
             }
             lastSection = lastSection.replaceAll("^\\[", "");
             if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
-                array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
+                array.add(COSString.parseHex(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
             obj = array;
         } else if(lastSection != null && lastSection.endsWith(">")) {
             // string of hex codes
@@ -516,7 +515,7 @@ public class ConformingPDFParser extends
                 sb.append((char)singleByte);
                 singleByte = readByte();
             }
-            return new COSString(sb.toString());
+            return new COSString(sb.toString()); // fixme: should be calling COSString(byte[])
         } else {
             throw new RuntimeException("Not yet implemented: " + string
                     + " loation=" + this.currentOffset);

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Fri Dec 12 04:39:37 2014
@@ -311,7 +311,7 @@ public class NonSequentialPDFParser exte
                         + " does not contain an integer value, but: '" + eofLookupRangeStr + "'");
             }
         }
-        setDocument(new COSDocument(false, useScratchFiles));
+        setDocument(new COSDocument(useScratchFiles));
         pdfSource = new PushBackInputStream(raStream, 4096);
     }
 
@@ -1582,7 +1582,7 @@ public class NonSequentialPDFParser exte
                 {
                     // parse object stream
                     PDFObjectStreamParser parser = new PDFObjectStreamParser(
-                            (COSStream) objstmBaseObj, document, forceParsing);
+                            (COSStream) objstmBaseObj, document);
                     parser.parse();
 
                     // get set of object numbers referenced for this object
@@ -1611,7 +1611,7 @@ public class NonSequentialPDFParser exte
     /**
      * 
      * @param dict the dictionary to be decrypted
-     * @param the object number
+     * @param objNr the object number
      * @param objGenNr the object generation number
      * @throws IOException ff something went wrong
      */

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java Fri Dec 12 04:39:37 2014
@@ -50,37 +50,18 @@ public class PDFObjectStreamParser exten
     /**
      * Constructor.
      *
-     * @since Apache PDFBox 1.3.0
      * @param strm The stream to parse.
      * @param doc The document for the current parsing.
-     * @param forceParsing flag to skip malformed or otherwise unparseable
-     *                     input where possible
      * @throws IOException If there is an error initializing the stream.
      */
-    public PDFObjectStreamParser(
-            COSStream strm, COSDocument doc, boolean forceParsing)
-            throws IOException 
+    public PDFObjectStreamParser(COSStream strm, COSDocument doc) throws IOException
     {
-        super(strm.getUnfilteredStream(), forceParsing);
+        super(strm.getUnfilteredStream());
         setDocument(doc);
         stream = strm;
     }
 
     /**
-     * Constructor.
-     *
-     * @param strm The stream to parse.
-     * @param doc The document for the current parsing.
-     *
-     * @throws IOException If there is an error initializing the stream.
-     */
-    public PDFObjectStreamParser(COSStream strm, COSDocument doc)
-            throws IOException 
-    {
-        this(strm, doc, FORCE_PARSING);
-    }
-
-    /**
      * This will parse the tokens in the stream.  This will close the
      * stream when it is finished parsing.
      *

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Fri Dec 12 04:39:37 2014
@@ -63,7 +63,14 @@ public class PDFParser extends BaseParse
     
     private static final String PDF_DEFAULT_VERSION = "1.4";
     private static final String FDF_DEFAULT_VERSION = "1.0";
-    
+
+    // deprecated functionality from BaseParser:
+    /**
+     * Default value of the {@link #forceParsing} flag.
+     */
+    public static final boolean FORCE_PARSING =
+            Boolean.getBoolean("org.apache.pdfbox.forceParsing");
+
     /**
      * A list of duplicate objects found when Parsing the PDF
      * File.
@@ -84,17 +91,21 @@ public class PDFParser extends BaseParse
      * Temp file directory.
      */
     private File tempDirectory = null;
-
     private final boolean useScratchFile;
 
     /**
+     * Flag to skip malformed or otherwise unparseable input where possible.
+     */
+    protected final boolean forceParsing;
+
+    /**
      * Constructor.
      *
      * @param input The input stream that contains the PDF document.
      *
      * @throws IOException If there is an error initializing the stream.
      */
-    public PDFParser( InputStream input ) throws IOException 
+    public PDFParser( InputStream input ) throws IOException
     {
         this(input, FORCE_PARSING);
     }
@@ -108,7 +119,7 @@ public class PDFParser extends BaseParse
      *
      * @throws IOException If there is an error initializing the stream.
      */
-    public PDFParser(InputStream input, boolean force) throws IOException 
+    public PDFParser(InputStream input, boolean force) throws IOException
     {
         this(input, force, false);
     }
@@ -123,9 +134,10 @@ public class PDFParser extends BaseParse
      *
      * @throws IOException If there is an error initializing the stream.
      */
-    public PDFParser(InputStream input, boolean force, boolean useScratchFiles) throws IOException 
+    public PDFParser(InputStream input, boolean force, boolean useScratchFiles) throws IOException
     {
-        super(input, force);
+        super(input);
+        forceParsing = force;
         useScratchFile = useScratchFiles;
     }
 
@@ -168,11 +180,11 @@ public class PDFParser extends BaseParse
         {
             if( tempDirectory != null )
             {
-                document = new COSDocument( tempDirectory, forceParsing, true );
+                document = new COSDocument( tempDirectory, true );
             }
             else if(useScratchFile)
             {
-                document = new COSDocument( null, forceParsing, true );
+                document = new COSDocument( null, true );
             }
             else
             {
@@ -369,7 +381,7 @@ public class PDFParser extends BaseParse
             throw new IOException( "Error: Header doesn't contain versioninfo" );
         }
 
-        //sometimes there are some garbage bytes in the header before the header
+        //sometimes there are some garbage getBytes in the header before the header
         //actually starts, so lets try to find the header first.
         int headerStart = header.indexOf( PDF_HEADER );
         if (headerStart == -1)
@@ -965,7 +977,7 @@ public class PDFParser extends BaseParse
             xrefTrailerResolver.setTrailer( stream );
         }        
         PDFXrefStreamParser parser =
-                new PDFXrefStreamParser( stream, document, forceParsing, xrefTrailerResolver );
+                new PDFXrefStreamParser( stream, document, xrefTrailerResolver );
         parser.parse();
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Fri Dec 12 04:39:37 2014
@@ -57,29 +57,12 @@ public class PDFStreamParser extends Bas
     /**
      * Constructor that takes a stream to parse.
      *
-     * @since Apache PDFBox 1.3.0
      * @param stream The stream to read data from.
-     * @param forceParsing flag to skip malformed or otherwise unparseable
-     *                     input where possible
      * @throws IOException If there is an error reading from the stream.
      */
-    public PDFStreamParser(InputStream stream, boolean forceParsing)
-            throws IOException 
+    public PDFStreamParser(InputStream stream) throws IOException
     {
-        super(stream, forceParsing);
-    }
-
-    /**
-     * Constructor that takes a stream to parse.
-     *
-     * @param stream The stream to read data from.
-     *
-     * @throws IOException If there is an error reading from the stream.
-     */
-    public PDFStreamParser(InputStream stream)
-            throws IOException 
-    {
-        this(stream, FORCE_PARSING);
+        super(stream);
     }
 
     /**
@@ -95,21 +78,6 @@ public class PDFStreamParser extends Bas
     }
 
     /**
-     * Constructor.
-     *
-     * @since Apache PDFBox 1.3.0
-     * @param stream The stream to parse.
-     * @param forceParsing flag to skip malformed or otherwise unparseable
-     *                     input where possible
-     * @throws IOException If there is an error initializing the stream.
-     */
-    public PDFStreamParser(COSStream stream, boolean forceParsing)
-            throws IOException 
-    {
-       this(stream.getUnfilteredStream(), forceParsing);
-    }
-
-    /**
      * Constructor.
      *
      * @param stream The stream to parse.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Fri Dec 12 04:39:37 2014
@@ -44,21 +44,16 @@ public class PDFXrefStreamParser extends
     /**
      * Constructor.
      *
-     * @since 1.3.0
      * @param strm The stream to parse.
      * @param doc The document for the current parsing.
-     * @param forceParsing flag to skip malformed or otherwise unparseable
-     *                     input where possible
      * @param resolver resolver to read the xref/trailer information
      *
      * @throws IOException If there is an error initializing the stream.
      */
-    public PDFXrefStreamParser(
-            COSStream strm, COSDocument doc, boolean forceParsing,
-            XrefTrailerResolver resolver )
+    public PDFXrefStreamParser(COSStream strm, COSDocument doc, XrefTrailerResolver resolver )
             throws IOException
     {
-        super(strm.getUnfilteredStream(), forceParsing);
+        super(strm.getUnfilteredStream());
         setDocument(doc);
         stream = strm;
         this.xrefTrailerResolver = resolver;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSStandardOutputStream.java Fri Dec 12 04:39:37 2014
@@ -20,8 +20,6 @@ import java.io.FilterOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 
-import org.apache.pdfbox.util.StringUtil;
-
 /**
  * simple output stream with some minor features for generating "pretty" PDF files.
  *
@@ -32,17 +30,17 @@ public class COSStandardOutputStream ext
     /**
      * To be used when 2 byte sequence is enforced.
      */
-    public static final byte[] CRLF = StringUtil.getBytes("\r\n");
+    public static final byte[] CRLF = { '\r', '\n' };
 
     /**
      * Line feed character.
      */
-    public static final byte[] LF = StringUtil.getBytes("\n");
+    public static final byte[] LF = { '\n' };
 
     /**
      * standard line separator.
      */
-    public static final byte[] EOL = StringUtil.getBytes("\n");
+    public static final byte[] EOL = { '\n' };
 
     // current byte position in the output stream
     private long position = 0;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/COSWriter.java Fri Dec 12 04:39:37 2014
@@ -60,48 +60,40 @@ import org.apache.pdfbox.pdmodel.PDDocum
 import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
 import org.apache.pdfbox.pdmodel.interactive.digitalsignature.SignatureInterface;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
-import org.apache.pdfbox.util.StringUtil;
+import org.apache.pdfbox.util.Charsets;
+import org.apache.pdfbox.util.Hex;
 
 /**
- * this class acts on a in-memory representation of a pdf document.
- *
- * todo no support for incremental updates
- * todo single xref section only
- * todo no linearization
+ * This class acts on a in-memory representation of a PDF document.
  *
  * @author Michael Traut
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
- * 
+ * @author Ben Litchfield
  */
 public class COSWriter implements ICOSVisitor, Closeable
 {
-
-    /**
-     * Log instance.
-     */
     private static final Log LOG = LogFactory.getLog(COSWriter.class);
 
     /**
      * The dictionary open token.
      */
-    public static final byte[] DICT_OPEN = StringUtil.getBytes("<<");
+    public static final byte[] DICT_OPEN = "<<".getBytes(Charsets.US_ASCII);
     /**
      * The dictionary close token.
      */
-    public static final byte[] DICT_CLOSE = StringUtil.getBytes(">>");
+    public static final byte[] DICT_CLOSE = ">>".getBytes(Charsets.US_ASCII);
     /**
      * space character.
      */
-    public static final byte[] SPACE = StringUtil.getBytes(" ");
+    public static final byte[] SPACE = { ' ' };
     /**
      * The start to a PDF comment.
      */
-    public static final byte[] COMMENT = StringUtil.getBytes("%");
+    public static final byte[] COMMENT = { '%' };
 
     /**
      * The output version of the PDF.
      */
-    public static final byte[] VERSION = StringUtil.getBytes("PDF-1.4");
+    public static final byte[] VERSION = "PDF-1.4".getBytes(Charsets.US_ASCII);
     /**
      * Garbage bytes used to create the PDF header.
      */
@@ -109,57 +101,57 @@ public class COSWriter implements ICOSVi
     /**
      * The EOF constant.
      */
-    public static final byte[] EOF = StringUtil.getBytes("%%EOF");
+    public static final byte[] EOF = "%%EOF".getBytes(Charsets.US_ASCII);
     // pdf tokens
 
     /**
      * The reference token.
      */
-    public static final byte[] REFERENCE = StringUtil.getBytes("R");
+    public static final byte[] REFERENCE = "R".getBytes(Charsets.US_ASCII);
     /**
      * The XREF token.
      */
-    public static final byte[] XREF = StringUtil.getBytes("xref");
+    public static final byte[] XREF = "xref".getBytes(Charsets.US_ASCII);
     /**
      * The xref free token.
      */
-    public static final byte[] XREF_FREE = StringUtil.getBytes("f");
+    public static final byte[] XREF_FREE = "f".getBytes(Charsets.US_ASCII);
     /**
      * The xref used token.
      */
-    public static final byte[] XREF_USED = StringUtil.getBytes("n");
+    public static final byte[] XREF_USED = "n".getBytes(Charsets.US_ASCII);
     /**
      * The trailer token.
      */
-    public static final byte[] TRAILER = StringUtil.getBytes("trailer");
+    public static final byte[] TRAILER = "trailer".getBytes(Charsets.US_ASCII);
     /**
      * The start xref token.
      */
-    public static final byte[] STARTXREF = StringUtil.getBytes("startxref");
+    public static final byte[] STARTXREF = "startxref".getBytes(Charsets.US_ASCII);
     /**
      * The starting object token.
      */
-    public static final byte[] OBJ = StringUtil.getBytes("obj");
+    public static final byte[] OBJ = "obj".getBytes(Charsets.US_ASCII);
     /**
      * The end object token.
      */
-    public static final byte[] ENDOBJ = StringUtil.getBytes("endobj");
+    public static final byte[] ENDOBJ = "endobj".getBytes(Charsets.US_ASCII);
     /**
      * The array open token.
      */
-    public static final byte[] ARRAY_OPEN = StringUtil.getBytes("[");
+    public static final byte[] ARRAY_OPEN = "[".getBytes(Charsets.US_ASCII);
     /**
      * The array close token.
      */
-    public static final byte[] ARRAY_CLOSE = StringUtil.getBytes("]");
+    public static final byte[] ARRAY_CLOSE = "]".getBytes(Charsets.US_ASCII);
     /**
      * The open stream token.
      */
-    public static final byte[] STREAM = StringUtil.getBytes("stream");
+    public static final byte[] STREAM = "stream".getBytes(Charsets.US_ASCII);
     /**
      * The close stream token.
      */
-    public static final byte[] ENDSTREAM = StringUtil.getBytes("endstream");
+    public static final byte[] ENDSTREAM = "endstream".getBytes(Charsets.US_ASCII);
 
     private NumberFormat formatXrefOffset = new DecimalFormat("0000000000");
 
@@ -594,11 +586,8 @@ public class COSWriter implements ICOSVi
     }
 
     /**
-     * write the x ref section for the pdf file
-     *
-     * currently, the pdf is reconstructed from the scratch, so we write a single section
-     *
-     * todo support for incremental writing?
+     * Write the x ref section for the pdf file.
+     * Currently, the pdf is reconstructed from the scratch, so we write a single section.
      *
      * @param doc The document to write the xref from.
      *
@@ -798,7 +787,7 @@ public class COSWriter implements ICOSVi
         // sign the bytes
         SignatureInterface signatureInterface = doc.getSignatureInterface();
         byte[] sign = signatureInterface.sign(signStream);
-        String signature = new COSString(sign).getHexString();
+        String signature = new COSString(sign).toHexString();
         // substract 2 bytes because of the enclosing "<>"
         if (signature.length() > signatureLength - 2)
         {
@@ -961,12 +950,6 @@ public class COSWriter implements ICOSVi
             {
                 COSNull.NULL.accept( this );
             }
-            else if( current instanceof COSString )
-            {
-                COSString copy = new COSString();
-                copy.append(((COSString)current).getBytes());
-                copy.accept(this);
-            }
             else
             {
                 current.accept(this);
@@ -1260,7 +1243,7 @@ public class COSWriter implements ICOSVi
     @Override
     public Object visitFromString(COSString obj) throws IOException
     {
-        if(willEncrypt)
+        if (willEncrypt)
         {
             document.getEncryption().getSecurityHandler().encryptString(
                     obj,
@@ -1268,7 +1251,7 @@ public class COSWriter implements ICOSVi
                     currentObjectKey.getGeneration());
         }
 
-        obj.writePDF( getStandardOutput() );
+        COSWriter.writeString(obj, getStandardOutput());
         return null;
     }
 
@@ -1378,4 +1361,99 @@ public class COSWriter implements ICOSVi
         }
         cosDoc.accept(this);
     }
+
+    /**
+     * This will output the given byte getString as a PDF object.
+     *
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    public static void writeString(COSString string, OutputStream output) throws IOException
+    {
+        writeString(string.getBytes(), string.getForceHexForm(), output);
+    }
+
+    /**
+     * This will output the given text/byte getString as a PDF object.
+     *
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    public static void writeString(byte[] bytes, OutputStream output) throws IOException
+    {
+        writeString(bytes, false, output);
+    }
+
+    /**
+     * This will output the given text/byte string as a PDF object.
+     *
+     * @param output The stream to write to.
+     * @throws IOException If there is an error writing to the stream.
+     */
+    private static void writeString(byte[] bytes, boolean forceHex, OutputStream output)
+            throws IOException
+    {
+        // check for non-ASCII characters
+        boolean isASCII = true;
+        for (byte b : bytes)
+        {
+            // if the byte is negative then it is an eight bit byte and is outside the ASCII range
+            if (b < 0)
+            {
+                isASCII = false;
+                break;
+            }
+        }
+
+        if (isASCII && !forceHex)
+        {
+            // write ASCII string
+            output.write('(');
+            for (byte b : bytes)
+            {
+                switch (b)
+                {
+                    case '(':
+                    case ')':
+                    case '\\':
+                        output.write('\\');
+                        output.write(b);
+                        break;
+                    case '\r':
+                        output.write('\\');
+                        output.write('\r');
+                        break;
+                    case '\n':
+                        output.write('\\');
+                        output.write('\n');
+                        break;
+                    case '\t':
+                        output.write('\\');
+                        output.write('\t');
+                        break;
+                    case '\b':
+                        output.write('\\');
+                        output.write('\b');
+                        break;
+                    case '\f':
+                        output.write('\\');
+                        output.write('\f');
+                        break;
+                    default:
+                        output.write(b);
+                }
+            }
+            output.write(')');
+        }
+        else
+        {
+            // write hex string
+            output.write('<');
+            for (byte b : bytes)
+            {
+                output.write(Hex.getBytes(b));
+            }
+            output.write('>');
+        }
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/ContentStreamWriter.java Fri Dec 12 04:39:37 2014
@@ -86,7 +86,7 @@ public class ContentStreamWriter
     {
         if( o instanceof COSString )
         {
-            ((COSString)o).writePDF( output );
+            COSWriter.writeString((COSString)o, output);
         }
         else if( o instanceof COSFloat )
         {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/edit/PDPageContentStream.java Fri Dec 12 04:39:37 2014
@@ -20,11 +20,10 @@ import java.awt.Color;
 import java.awt.color.ColorSpace;
 import java.awt.geom.AffineTransform;
 import java.awt.geom.PathIterator;
-import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.UnsupportedEncodingException;
+import java.nio.charset.Charset;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.List;
@@ -37,7 +36,7 @@ import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.pdfwriter.COSWriter;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.PDResources;
@@ -54,100 +53,91 @@ import org.apache.pdfbox.pdmodel.graphic
 import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
 import org.apache.pdfbox.pdmodel.graphics.image.PDInlineImage;
+import org.apache.pdfbox.util.Charsets;
 
 /**
- * This class is a convenience for creating page content streams.  You MUST
- * call close() when you are finished with this object.
+ * This class is a convenience for creating page content streams. You MUST call close() when you
+ * are finished with this object.
  *
- * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @author Ben Litchfield
  * 
  */
 public class PDPageContentStream implements Closeable
 {
-    /**
-     * Log instance.
-     */
     private static final Log LOG = LogFactory.getLog(PDPageContentStream.class);
 
+    private static byte[] toAscii(final String text)
+    {
+        return text.getBytes(Charsets.US_ASCII);
+    }
+
+    private static final byte[] BEGIN_TEXT = toAscii("BT\n");
+    private static final byte[] END_TEXT = toAscii("ET\n");
+    private static final byte[] SET_FONT = toAscii("Tf\n");
+    private static final byte[] MOVE_TEXT_POSITION = toAscii("Td\n");
+    private static final byte[] SET_TEXT_MATRIX = toAscii("Tm\n");
+    private static final byte[] SHOW_TEXT = toAscii("Tj\n");
+
+    private static final byte[] SAVE_GRAPHICS_STATE = toAscii("q\n");
+    private static final byte[] RESTORE_GRAPHICS_STATE = toAscii("Q\n");
+    private static final byte[] CONCATENATE_MATRIX = toAscii("cm\n");
+    private static final byte[] XOBJECT_DO = toAscii("Do\n");
+    private static final byte[] RG_STROKING = toAscii("RG\n");
+    private static final byte[] RG_NON_STROKING = toAscii("rg\n");
+    private static final byte[] K_STROKING = toAscii("K\n");
+    private static final byte[] K_NON_STROKING = toAscii("k\n");
+    private static final byte[] G_STROKING = toAscii("G\n");
+    private static final byte[] G_NON_STROKING = toAscii("g\n");
+    private static final byte[] RECTANGLE = toAscii("re\n");
+    private static final byte[] FILL_NON_ZERO = toAscii("f\n");
+    private static final byte[] FILL_EVEN_ODD = toAscii("f*\n");
+    private static final byte[] LINE_TO = toAscii("l\n");
+    private static final byte[] MOVE_TO = toAscii("m\n");
+    private static final byte[] CLOSE_STROKE = toAscii("s\n");
+    private static final byte[] STROKE = toAscii("S\n");
+    private static final byte[] LINE_WIDTH = toAscii("w\n");
+    private static final byte[] LINE_JOIN_STYLE = toAscii("j\n");
+    private static final byte[] LINE_CAP_STYLE = toAscii("J\n");
+    private static final byte[] LINE_DASH_PATTERN = toAscii("d\n");
+    private static final byte[] CLOSE_SUBPATH = toAscii("h\n");
+    private static final byte[] CLIP_PATH_NON_ZERO = toAscii("W\n");
+    private static final byte[] CLIP_PATH_EVEN_ODD = toAscii("W*\n");
+    private static final byte[] NOP = toAscii("n\n");
+    private static final byte[] BEZIER_312 = toAscii("c\n");
+    private static final byte[] BEZIER_32 = toAscii("v\n");
+    private static final byte[] BEZIER_313 = toAscii("y\n");
+
+    private static final byte[] BMC = toAscii("BMC\n");
+    private static final byte[] BDC = toAscii("BDC\n");
+    private static final byte[] EMC = toAscii("EMC\n");
+
+    private static final byte[] SET_STROKING_COLORSPACE = toAscii("CS\n");
+    private static final byte[] SET_NON_STROKING_COLORSPACE = toAscii("cs\n");
+
+    private static final byte[] SET_STROKING_COLOR_SIMPLE = toAscii("SC\n");
+    private static final byte[] SET_STROKING_COLOR_COMPLEX = toAscii("SCN\n");
+    private static final byte[] SET_NON_STROKING_COLOR_SIMPLE = toAscii("sc\n");
+    private static final byte[] SET_NON_STROKING_COLOR_COMPLEX = toAscii("scn\n");
+
+    private static final byte[] OPENING_BRACKET = toAscii("[");
+    private static final byte[] CLOSING_BRACKET = toAscii("]");
+    private static final byte[] NEWLINE = toAscii("\n");
+
+    private static final int SPACE = 32;
+
+    // instance variables
     private OutputStream output;
-    private boolean inTextMode = false;
     private PDResources resources;
+    private boolean inTextMode = false;
 
     private PDColorSpace currentStrokingColorSpace = PDDeviceGray.INSTANCE;
     private PDColorSpace currentNonStrokingColorSpace = PDDeviceGray.INSTANCE;
 
     // cached storage component for getting color values
-    private float[] colorComponents = new float[4];
-
-    private NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US);
-
-    private static final String ISO8859 = "ISO-8859-1";
+    private final float[] colorComponents = new float[4];
 
-    private static byte[] getISOBytes(final String s)
-    {
-        try
-        {
-            return s.getBytes(ISO8859);
-        }
-        catch (final UnsupportedEncodingException ex)
-        {
-            throw new IllegalStateException(ex);
-        }
-    }
-
-    private static final byte[] BEGIN_TEXT = getISOBytes("BT\n");
-    private static final byte[] END_TEXT = getISOBytes("ET\n");
-    private static final byte[] SET_FONT = getISOBytes("Tf\n");
-    private static final byte[] MOVE_TEXT_POSITION = getISOBytes("Td\n");
-    private static final byte[] SET_TEXT_MATRIX = getISOBytes("Tm\n");
-    private static final byte[] SHOW_TEXT = getISOBytes("Tj\n");
-
-    private static final byte[] SAVE_GRAPHICS_STATE = getISOBytes("q\n");
-    private static final byte[] RESTORE_GRAPHICS_STATE = getISOBytes("Q\n");
-    private static final byte[] CONCATENATE_MATRIX = getISOBytes("cm\n");
-    private static final byte[] XOBJECT_DO = getISOBytes("Do\n");
-    private static final byte[] RG_STROKING = getISOBytes("RG\n");
-    private static final byte[] RG_NON_STROKING = getISOBytes("rg\n");
-    private static final byte[] K_STROKING = getISOBytes("K\n");
-    private static final byte[] K_NON_STROKING = getISOBytes("k\n");
-    private static final byte[] G_STROKING = getISOBytes("G\n");
-    private static final byte[] G_NON_STROKING = getISOBytes("g\n");
-    private static final byte[] RECTANGLE = getISOBytes("re\n");
-    private static final byte[] FILL_NON_ZERO = getISOBytes("f\n");
-    private static final byte[] FILL_EVEN_ODD = getISOBytes("f*\n");
-    private static final byte[] LINE_TO = getISOBytes("l\n");
-    private static final byte[] MOVE_TO = getISOBytes("m\n");
-    private static final byte[] CLOSE_STROKE = getISOBytes("s\n");
-    private static final byte[] STROKE = getISOBytes("S\n");
-    private static final byte[] LINE_WIDTH = getISOBytes("w\n");
-    private static final byte[] LINE_JOIN_STYLE = getISOBytes("j\n");
-    private static final byte[] LINE_CAP_STYLE = getISOBytes("J\n");
-    private static final byte[] LINE_DASH_PATTERN = getISOBytes("d\n");
-    private static final byte[] CLOSE_SUBPATH = getISOBytes("h\n");
-    private static final byte[] CLIP_PATH_NON_ZERO = getISOBytes("W\n");
-    private static final byte[] CLIP_PATH_EVEN_ODD = getISOBytes("W*\n");
-    private static final byte[] NOP = getISOBytes("n\n");
-    private static final byte[] BEZIER_312 = getISOBytes("c\n");
-    private static final byte[] BEZIER_32 = getISOBytes("v\n");
-    private static final byte[] BEZIER_313 = getISOBytes("y\n");
-
-    private static final byte[] BMC = getISOBytes("BMC\n");
-    private static final byte[] BDC = getISOBytes("BDC\n");
-    private static final byte[] EMC = getISOBytes("EMC\n");
-
-    private static final byte[] SET_STROKING_COLORSPACE = getISOBytes("CS\n");
-    private static final byte[] SET_NON_STROKING_COLORSPACE = getISOBytes("cs\n");
-
-    private static final byte[] SET_STROKING_COLOR_SIMPLE = getISOBytes("SC\n");
-    private static final byte[] SET_STROKING_COLOR_COMPLEX = getISOBytes("SCN\n");
-    private static final byte[] SET_NON_STROKING_COLOR_SIMPLE = getISOBytes("sc\n");
-    private static final byte[] SET_NON_STROKING_COLOR_COMPLEX = getISOBytes("scn\n");
-
-    private static final byte[] OPENING_BRACKET = getISOBytes("[");
-    private static final byte[] CLOSING_BRACKET = getISOBytes("]");
-    private static final byte[] NEWLINE = getISOBytes("\n");
-
-    private static final int SPACE = 32;
+    // number format
+    private final NumberFormat formatDecimal = NumberFormat.getNumberInstance(Locale.US);
 
     /**
      * Create a new PDPage content stream.
@@ -631,10 +621,7 @@ public class PDPageContentStream impleme
         {
             throw new IOException("Error: must call beginText() before drawString");
         }
-        COSString string = new COSString(text);
-        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        string.writePDF(buffer);
-        appendRawCommands(buffer.toByteArray());
+        COSWriter.writeString(text.getBytes(Charset.forName("ISO-8859-1")), output); // todo: use font's encoding
         appendRawCommands(SPACE);
         appendRawCommands(SHOW_TEXT);
     }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/PDEncryption.java Fri Dec 12 04:39:37 2014
@@ -258,9 +258,7 @@ public class PDEncryption
      */
     public void setOwnerKey(byte[] o) throws IOException
     {
-        COSString owner = new COSString();
-        owner.append( o );
-        dictionary.setItem(COSName.O, owner);
+        dictionary.setItem(COSName.O, new COSString(o));
     }
 
     /**
@@ -290,9 +288,7 @@ public class PDEncryption
      */
     public void setUserKey(byte[] u) throws IOException
     {
-        COSString user = new COSString();
-        user.append( u );
-        dictionary.setItem(COSName.U, user);
+        dictionary.setItem(COSName.U, new COSString(u));
     }
 
     /**
@@ -322,9 +318,7 @@ public class PDEncryption
      */
     public void setOwnerEncryptionKey(byte[] oe) throws IOException
     {
-        COSString ownerEncryptionKey = new COSString();
-        ownerEncryptionKey.append(oe);
-        dictionary.setItem( COSName.OE, ownerEncryptionKey );
+        dictionary.setItem( COSName.OE, new COSString(oe) );
     }
 
     /**
@@ -354,9 +348,7 @@ public class PDEncryption
      */
     public void setUserEncryptionKey(byte[] ue) throws IOException
     {
-        COSString userEncryptionKey = new COSString();
-        userEncryptionKey.append(ue);
-        dictionary.setItem( COSName.UE, userEncryptionKey );
+        dictionary.setItem( COSName.UE, new COSString(ue) );
     }
 
     /**
@@ -427,9 +419,7 @@ public class PDEncryption
         COSArray array = new COSArray();
         for (byte[] recipient : recipients)
         {
-            COSString recip = new COSString();
-            recip.append(recipient);
-            recip.setForceLiteralForm(true);
+            COSString recip = new COSString(recipient);
             array.add(recip);
         }
         dictionary.setItem(COSName.RECIPIENTS, array);
@@ -579,9 +569,7 @@ public class PDEncryption
      */
     public void setPerms(byte[] perms) throws IOException
     {
-        COSString user = new COSString();
-        user.append( perms );
-        dictionary.setItem( COSName.PERMS, user );
+        dictionary.setItem( COSName.PERMS, new COSString(perms) );
     }
 
     /**

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java?rev=1644828&r1=1644827&r2=1644828&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/encryption/SecurityHandler.java Fri Dec 12 04:39:37 2014
@@ -501,8 +501,7 @@ public abstract class SecurityHandler
         ByteArrayInputStream data = new ByteArrayInputStream(string.getBytes());
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         encryptData(objNum, genNum, data, buffer, true /* decrypt */);
-        string.reset();
-        string.append(buffer.toByteArray());
+        string.setValue(buffer.toByteArray());
     }
 
     /**
@@ -519,8 +518,7 @@ public abstract class SecurityHandler
         ByteArrayInputStream data = new ByteArrayInputStream(string.getBytes());
         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
         encryptData(objNum, genNum, data, buffer, false /* decrypt */);
-        string.reset();
-        string.append(buffer.toByteArray());
+        string.setValue(buffer.toByteArray());
     }
 
     /**



Mime
View raw message