pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From jahew...@apache.org
Subject svn commit: r1606936 - in /pdfbox/trunk: examples/src/main/java/org/apache/pdfbox/examples/util/ pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/ pdfbox/src/main/java/org/apache/pdfbox/rendering/ pdfbox/src/main/java/org/apache/pdfbox/tex...
Date Mon, 30 Jun 2014 21:36:49 GMT
Author: jahewson
Date: Mon Jun 30 21:36:49 2014
New Revision: 1606936

URL: http://svn.apache.org/r1606936
Log:
PDFBOX-2126: Refactor TextPosition out of PDFStreamEngine and optimize clipping

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
Modified:
    pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java
    pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/BeginText.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowText.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java
    pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java

Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java (original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java Mon Jun 30 21:36:49 2014
@@ -100,7 +100,7 @@ public class PrintImageLocations extends
                     PDPage page = (PDPage)allPages.get( i );
                     System.out.println( "Processing page: " + i );
                     printer.processStream( page.findResources(), page.getContents().getStream(),
-                    		page.findCropBox(), page.findRotation() );
+                    		page.findCropBox() );
                 }
             }
             finally

Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java (original)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java Mon Jun 30 21:36:49 2014
@@ -116,7 +116,7 @@ public class PrintTextLocations extends 
                 text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" +
                 text.getXScale() + " height=" + text.getHeightDir() + " space=" +
                 text.getWidthOfSpace() + " width=" +
-                text.getWidthDirAdj() + "]" + text.getCharacter() );
+                text.getWidthDirAdj() + "]" + text.getUnicode() );
     }
 
     /**

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/state/PDGraphicsState.java Mon Jun 30 21:36:49 2014
@@ -459,7 +459,7 @@ public class PDGraphicsState implements 
             clone.strokingColor = strokingColor; // immutable
             clone.nonStrokingColor = nonStrokingColor; // immutable
             clone.lineDashPattern = lineDashPattern; // immutable
-            clone.clippingPath = (Area) clippingPath.clone();
+            clone.clippingPath = clippingPath; // not cloned, see intersectClippingPath
             return clone;
         }
         catch (CloneNotSupportedException e)
@@ -555,6 +555,10 @@ public class PDGraphicsState implements 
      */
     public void intersectClippingPath(GeneralPath path)
     {
+        // lazy cloning of clipping path for performance
+        clippingPath = (Area) clippingPath.clone();
+
+        // intersection as usual
         clippingPath.intersect(new Area(path));
     }
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java Mon Jun 30 21:36:49 2014
@@ -82,7 +82,6 @@ import org.apache.pdfbox.pdmodel.graphic
 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceDictionary;
 import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream;
-import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.util.Matrix;
 import org.apache.pdfbox.util.PDFStreamEngine;
 import org.apache.pdfbox.util.ResourceLoader;
@@ -107,6 +106,9 @@ public class PageDrawer extends PDFStrea
     private int clipWindingRule = -1;
     private GeneralPath linePath = new GeneralPath();
 
+    // last clipping path
+    private Area lastClip;
+
     private final Map<PDFont, Glyph2D> fontGlyph2D = new HashMap<PDFont, Glyph2D>();
     private final Map<PDFont, Font> awtFonts = new HashMap<PDFont, Font>();
 
@@ -178,7 +180,7 @@ public class PageDrawer extends PDFStrea
         if (page.getContents() != null)
         {
             PDResources resources = page.findResources();
-            processStream(resources, page.getContents().getStream(), page.findCropBox(), page.findRotation());
+            processStream(resources, page.getContents().getStream(), page.findCropBox());
         }
 
         List<PDAnnotation> annotations = page.getAnnotations();
@@ -235,7 +237,7 @@ public class PageDrawer extends PDFStrea
         graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
         graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON);
 
-        initStream(pageDimension, 0);
+        initStream(pageDimension);
 
         // transform ctm
         Matrix concat = matrix.multiply(getGraphicsState().getCurrentTransformationMatrix());
@@ -253,82 +255,96 @@ public class PageDrawer extends PDFStrea
         processSubStream(pattern.getResources(), (COSStream)pattern.getCOSObject());
     }
 
-    /**
-     * You should override this method if you want to perform an action when a text is being processed.
-     * 
-     * @param text The text to process
-     */
-    protected void processTextPosition(TextPosition text)
+    // sets the clipping path using caching for performance, we track lastClip manually because
+    // Graphics2D#getClip() returns a new object instead of the same one passed to setClip
+    private void setClip()
     {
-        try
+        Area clippingPath = getGraphicsState().getCurrentClippingPath();
+        if (clippingPath != lastClip)
+        {
+            graphics.setClip(clippingPath);
+            lastClip = clippingPath;
+        }
+    }
+
+    @Override
+    public void beginText() throws IOException
+    {
+        PDGraphicsState state = getGraphicsState();
+        Composite composite;
+        Paint paint;
+        switch (state.getTextState().getRenderingMode())
         {
-            PDGraphicsState graphicsState = getGraphicsState();
-            Composite composite;
-            Paint paint;
-            switch (graphicsState.getTextState().getRenderingMode())
-            {
             case PDTextState.RENDERING_MODE_FILL_TEXT:
-                composite = graphicsState.getNonStrokeJavaComposite();
+                composite = state.getNonStrokeJavaComposite();
                 paint = getNonStrokingPaint();
                 break;
             case PDTextState.RENDERING_MODE_STROKE_TEXT:
-                composite = graphicsState.getStrokeJavaComposite();
+                composite = state.getStrokeJavaComposite();
                 paint = getStrokingPaint();
                 break;
             case PDTextState.RENDERING_MODE_NEITHER_FILL_NOR_STROKE_TEXT:
                 // basic support for text rendering mode "invisible"
                 // TODO why are we drawing anything at all?
                 paint = COLOR_TRANSPARENT;
-                composite = graphicsState.getStrokeJavaComposite();
+                composite = state.getStrokeJavaComposite();
                 break;
             default:
                 // TODO : need to implement....
-                LOG.debug("Unsupported RenderingMode " + this.getGraphicsState().getTextState().getRenderingMode()
+                LOG.debug("Unsupported RenderingMode "
+                        + this.getGraphicsState().getTextState().getRenderingMode()
                         + " in PageDrawer.processTextPosition()." + " Using RenderingMode "
                         + PDTextState.RENDERING_MODE_FILL_TEXT + " instead");
-                composite = graphicsState.getNonStrokeJavaComposite();
+                composite = state.getNonStrokeJavaComposite();
                 paint = getNonStrokingPaint();
-            }
-            graphics.setComposite(composite);
-            graphics.setPaint(paint);
+        }
+        graphics.setComposite(composite);
+        graphics.setPaint(paint);
+        setClip();
+    }
 
-            PDFont font = text.getFont();
-            AffineTransform at = text.getTextPos().createAffineTransform();
+    @Override
+    protected void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight,
+                                float widthText, String unicode, int[] charCodes, PDFont font,
+                                float fontSize) throws IOException
+    {
+        try
+        {
+            AffineTransform at = textMatrix.createAffineTransform();
             PDMatrix fontMatrix = font.getFontMatrix();
-            // TODO setClip() is a massive performance hot spot. Investigate optimization possibilities
-            graphics.setClip(graphicsState.getCurrentClippingPath());
 
             // use different methods to draw the string
             if (font.isType3Font())
             {
-                // Type3 fonts don't use the same units within the font matrix as all the other fonts
+                // Type3 fonts don't use the same units within the font matrix as the other fonts
                 at.scale(fontMatrix.getValue(0, 0), fontMatrix.getValue(1, 1));
                 // Type3 fonts are using streams for each character
-                drawType3String((PDType3Font) font, text, at);
+                drawType3String((PDType3Font) font, charCodes, at);
             }
             else
             {
                 Glyph2D glyph2D = createGlyph2D(font);
                 if (glyph2D != null)
                 {
-                    AffineTransform fontMatrixAT = new AffineTransform(fontMatrix.getValue(0, 0), fontMatrix.getValue(
-                            0, 1), fontMatrix.getValue(1, 0), fontMatrix.getValue(1, 1), fontMatrix.getValue(2, 0),
-                            fontMatrix.getValue(2, 1));
+                    AffineTransform fontMatrixAT = new AffineTransform(
+                            fontMatrix.getValue(0, 0), fontMatrix.getValue(0, 1),
+                            fontMatrix.getValue(1, 0), fontMatrix.getValue(1, 1),
+                            fontMatrix.getValue(2, 0), fontMatrix.getValue(2, 1));
                     at.concatenate(fontMatrixAT);
                     // Let PDFBox render the font if supported
-                    drawGlyph2D(glyph2D, text.getCodePoints(), at);
+                    drawGlyph2D(glyph2D, charCodes, at);
                 }
                 else
                 {
                     // Use AWT to render the font (standard14 fonts, substituted embedded fonts)
                     // TODO to be removed in the long run
-                    drawString(font, text.getCharacter(), at);
+                    drawString(font, unicode, at);
                 }
             }
         }
-        catch (IOException io)
+        catch (IOException e)
         {
-            LOG.error (io, io);
+            LOG.error(e.getMessage(), e);  // todo: really?
         }
     }
 
@@ -357,18 +373,17 @@ public class PageDrawer extends PDFStrea
      * Render the text using a type 3 font.
      * 
      * @param font the type3 font
-     * @param text the text to be rendered
+     * @param charCodes internal PDF character codes of glyphs
      * @param at the transformation
      * 
      * @throws IOException if something went wrong
      */
-    private void drawType3String(PDType3Font font, TextPosition text, AffineTransform at) throws IOException
+    private void drawType3String(PDType3Font font, int[] charCodes, AffineTransform at) throws IOException
     {
-        int[] codePoints = text.getCodePoints();
-        int textLength = codePoints.length;
+        int textLength = charCodes.length;
         for (int i = 0; i < textLength; i++)
         {
-            COSStream stream = font.getCharStream((char)codePoints[i]);
+            COSStream stream = font.getCharStream((char)charCodes[i]);
             if (stream != null)
             {
                 // save the current graphics state and matrices
@@ -389,7 +404,7 @@ public class PageDrawer extends PDFStrea
             }
             else
             {
-                LOG.debug("drawType3String: stream for character " + (char)codePoints[i] + " not found");
+                LOG.debug("drawType3String: stream for character " + (char)charCodes[i] + " not found");
             }
         }
     }
@@ -699,7 +714,7 @@ public class PageDrawer extends PDFStrea
         graphics.setPaint(strokingPaint);
         graphics.setStroke(getStroke());
         graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF);
-        graphics.setClip(getGraphicsState().getCurrentClippingPath());
+        setClip();
         graphics.draw(linePath);
         linePath.reset();
     }
@@ -724,7 +739,7 @@ public class PageDrawer extends PDFStrea
         graphics.setPaint(nonStrokingPaint);
         linePath.setWindingRule(windingRule);
         graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF);
-        graphics.setClip(getGraphicsState().getCurrentClippingPath());
+        setClip();
         graphics.fill(linePath);
         linePath.reset();
     }
@@ -781,7 +796,7 @@ public class PageDrawer extends PDFStrea
     public void drawImage(Image awtImage, AffineTransform at) throws IOException
     {
         graphics.setComposite(getGraphicsState().getNonStrokeJavaComposite());
-        graphics.setClip(getGraphicsState().getCurrentClippingPath());
+        setClip();
         PDSoftMask softMask = getGraphicsState().getSoftMask();
         if( softMask != null ) 
         {
@@ -826,6 +841,7 @@ public class PageDrawer extends PDFStrea
         graphics.setPaint(paint);
         graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF);
         graphics.setClip(null);
+        lastClip = null;
         graphics.fill(getGraphicsState().getCurrentClippingPath());
     }
 
@@ -894,16 +910,16 @@ public class PageDrawer extends PDFStrea
             // check underlying g2d
             double unitSize = 1.0;
 
-            Area resultClippingArea = new Area(getGraphicsState().getCurrentClippingPath());
+            Area groupClip = new Area(getGraphicsState().getCurrentClippingPath());
             if (clippingPath != null)
             {
                 Area newArea = new Area(clippingPath);            
-                resultClippingArea.intersect(newArea);
+                groupClip.intersect(newArea);
             }
 
             AffineTransform at = g2dOriginal.getTransform();
             at.scale(unitSize, unitSize);
-            Shape clippingPathInPixels = at.createTransformedShape(resultClippingArea);
+            Shape clippingPathInPixels = at.createTransformedShape(groupClip);
             Rectangle2D bounds2D = clippingPathInPixels.getBounds2D();
 
             minX = (int) Math.floor(bounds2D.getMinX());
@@ -914,16 +930,16 @@ public class PageDrawer extends PDFStrea
             width = maxX - minX;
             height = maxY - minY;
             image = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB); // FIXME - color space
-            Graphics2D groupG2D = image.createGraphics();
-            groupG2D.translate(-minX, -minY);
-            groupG2D.transform(at);
-            groupG2D.setClip(resultClippingArea);
+            Graphics2D g = image.createGraphics();
+            g.translate(-minX, -minY);
+            g.transform(at);
+            g.setClip(groupClip);
 
             AffineTransform atInv;
             Matrix matrix1 = null;
             try
             {
-                atInv = groupG2D.getTransform().createInverse();
+                atInv = g.getTransform().createInverse();
                 atInv.scale(width, -height);
                 atInv.translate(0, -1);
                 matrix1 = new Matrix();
@@ -940,7 +956,7 @@ public class PageDrawer extends PDFStrea
             state.setAlphaConstants(1.0);
             state.setNonStrokeAlphaConstants(1.0);
             state.setSoftMask(null);
-            graphics = groupG2D;
+            graphics = g;
             try
             {
                 processSubStream(resources, content);

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/TextPosition.java Mon Jun 30 21:36:49 2014
@@ -28,13 +28,13 @@ public final class TextPosition
 {
     // text matrix for the start of the text object, coordinates are in display units
     // and have not been adjusted
-    private final Matrix textPos;
+    private final Matrix textMatrix;
 
     // ending X and Y coordinates in display units
     private final float endX;
     private final float endY;
 
-    private final float maxTextHeight; // maximum height of text, in display units
+    private final float maxHeight; // maximum height of text, in display units
     private final int rotation; // 0, 90, 180, 270 degrees of page rotation
     private final float x;
     private final float y;
@@ -43,42 +43,42 @@ public final class TextPosition
 
     private final float widthOfSpace; // width of a space, in display units
 
-    private final int[] unicodeCP;
+    private final int[] charCodes; // internal PDF character codes
     private final PDFont font;
     private final float fontSize;
     private final int fontSizePt;
 
     // mutable
     private float[] widths;
-    private String string;
+    private String unicode;
 
     /**
      * Constructor.
      *
      * @param pageRotation rotation of the page that the text is located in
-     * @param pageWidthValue rotation of the page that the text is located in
-     * @param pageHeightValue rotation of the page that the text is located in
-     * @param textPositionSt TextMatrix for start of text (in display units)
-     * @param endXValue x coordinate of the end position
-     * @param endYValue y coordinate of the end position
-     * @param maxFontH Maximum height of text (in display units)
-     * @param individualWidth The width of the given character/string. (in ? units)
+     * @param pageWidth rotation of the page that the text is located in
+     * @param pageHeight rotation of the page that the text is located in
+     * @param textMatrix TextMatrix for start of text (in display units)
+     * @param endX x coordinate of the end position
+     * @param endY y coordinate of the end position
+     * @param maxHeight Maximum height of text (in display units)
+     * @param individualWidth The width of the given character/string. (in text units)
      * @param spaceWidth The width of the space character. (in display units)
-     * @param string The character to be displayed.
-     * @param codePoints An array containing the codepoints of the given string.
-     * @param currentFont The current font for this text position.
-     * @param fontSizeValue The new font size.
+     * @param unicode The string of Unicode characters to be displayed.
+     * @param charCodes An array of the internal PDF character codes for the glyphs in this text.
+     * @param font The current font for this text position.
+     * @param fontSize The new font size.
      * @param fontSizeInPt The font size in pt units.
      */
-    public TextPosition(int pageRotation, float pageWidthValue, float pageHeightValue,
-                        Matrix textPositionSt, float endXValue, float endYValue, float maxFontH,
-                        float individualWidth,  float spaceWidth, String string, int[] codePoints,
-                        PDFont currentFont, float fontSizeValue, int fontSizeInPt)
+    public TextPosition(int pageRotation, float pageWidth, float pageHeight, Matrix textMatrix,
+                        float endX, float endY, float maxHeight, float individualWidth,
+                        float spaceWidth, String unicode, int[] charCodes, PDFont font,
+                        float fontSize, int fontSizeInPt)
     {
-        this.textPos = textPositionSt;
+        this.textMatrix = textMatrix;
 
-        this.endX = endXValue;
-        this.endY = endYValue;
+        this.endX = endX;
+        this.endY = endY;
 
         int rotation = pageRotation;
         // make sure it is 0 to 270 and no negative numbers
@@ -92,26 +92,26 @@ public final class TextPosition
         }
         this.rotation = rotation;
 
-        this.maxTextHeight = maxFontH;
-        this.pageHeight = pageHeightValue;
-        this.pageWidth = pageWidthValue;
+        this.maxHeight = maxHeight;
+        this.pageHeight = pageHeight;
+        this.pageWidth = pageWidth;
 
         this.widths = new float[] { individualWidth };
         this.widthOfSpace = spaceWidth;
-        this.string = string;
-        this.unicodeCP = codePoints;
-        this.font = currentFont;
-        this.fontSize = fontSizeValue;
+        this.unicode = unicode;
+        this.charCodes = charCodes;
+        this.font = font;
+        this.fontSize = fontSize;
         this.fontSizePt = fontSizeInPt;
 
         x = getXRot(rotation);
         if (rotation == 0 || rotation == 180)
         {
-            y = pageHeight - getYLowerLeftRot(rotation);
+            y = this.pageHeight - getYLowerLeftRot(rotation);
         }
         else
         {
-            y = pageWidth - getYLowerLeftRot(rotation);
+            y = this.pageWidth - getYLowerLeftRot(rotation);
         }
     }
 
@@ -120,29 +120,29 @@ public final class TextPosition
      *
      * @return The string on the screen.
      */
-    public String getCharacter()
+    public String getUnicode()
     {
-        return string;
+        return unicode;
     }
 
     /**
-     * Return the codepoints of the characters stored in this object.
+     * Return the internal PDF character codes of the glyphs in this text.
      *
-     * @return an array containing all codepoints.
+     * @return an array of internal PDF character codes
      */
-    public int[] getCodePoints()    // todo: NOT Unicode!!
+    public int[] getCharacterCodes()
     {
-        return unicodeCP;
+        return charCodes;
     }
 
     /**
-     * Return the Matrix textPos stored in this object.
+     * Return the text matrix stored in this object.
      *
-     * @return The Matrix containing all infos of the starting textposition
+     * @return The Matrix containing the starting text position
      */
-    public Matrix getTextPos()
+    public Matrix getTextMatrix()
     {
-        return textPos;
+        return textMatrix;
     }
 
     /**
@@ -151,10 +151,10 @@ public final class TextPosition
      */
     public float getDir()
     {
-        float a = textPos.getValue(0,0);
-        float b = textPos.getValue(0,1);
-        float c = textPos.getValue(1,0);
-        float d = textPos.getValue(1,1);
+        float a = textMatrix.getValue(0,0);
+        float b = textMatrix.getValue(0,1);
+        float c = textMatrix.getValue(1,0);
+        float d = textMatrix.getValue(1,1);
 
         // 12 0   left to right
         // 0 12
@@ -194,19 +194,19 @@ public final class TextPosition
     {
         if (rotation == 0)
         {
-            return textPos.getValue(2,0);
+            return textMatrix.getValue(2,0);
         }
         else if (rotation == 90)
         {
-            return textPos.getValue(2,1);
+            return textMatrix.getValue(2,1);
         }
         else if (rotation == 180)
         {
-            return pageWidth - textPos.getValue(2,0);
+            return pageWidth - textMatrix.getValue(2,0);
         }
         else if (rotation == 270)
         {
-            return pageHeight - textPos.getValue(2,1);
+            return pageHeight - textMatrix.getValue(2,1);
         }
         return 0;
     }
@@ -245,19 +245,19 @@ public final class TextPosition
     {
         if (rotation == 0)
         {
-            return textPos.getValue(2,1);
+            return textMatrix.getValue(2,1);
         }
         else if (rotation == 90)
         {
-            return pageWidth - textPos.getValue(2,0);
+            return pageWidth - textMatrix.getValue(2,0);
         }
         else if (rotation == 180)
         {
-            return pageHeight - textPos.getValue(2,1);
+            return pageHeight - textMatrix.getValue(2,1);
         }
         else if (rotation == 270)
         {
-            return textPos.getValue(2,0);
+            return textMatrix.getValue(2,0);
         }
         return 0;
     }
@@ -303,11 +303,11 @@ public final class TextPosition
     {
         if (rotation == 90 || rotation == 270)
         {
-            return Math.abs(endY - textPos.getYPosition());
+            return Math.abs(endY - textMatrix.getYPosition());
         }
         else
         {
-            return Math.abs(endX - textPos.getXPosition());
+            return Math.abs(endX - textMatrix.getXPosition());
         }
     }
 
@@ -338,7 +338,7 @@ public final class TextPosition
      */
     public float getHeight()
     {
-        return maxTextHeight;
+        return maxHeight;
     }
 
     /**
@@ -349,7 +349,7 @@ public final class TextPosition
     public float getHeightDir()
     {
         // this is not really a rotation-dependent calculation, but this is defined for symmetry
-        return maxTextHeight;
+        return maxHeight;
     }
 
     /**
@@ -399,7 +399,7 @@ public final class TextPosition
      */
     public float getXScale()
     {
-        return textPos.getXScale();
+        return textMatrix.getXScale();
     }
 
     /**
@@ -407,7 +407,7 @@ public final class TextPosition
      */
     public float getYScale()
     {
-        return textPos.getYScale();
+        return textMatrix.getYScale();
     }
 
     /**
@@ -477,7 +477,7 @@ public final class TextPosition
      */
     public void mergeDiacritic(TextPosition diacritic, TextNormalize normalize)
     {
-        if (diacritic.getCharacter().length() > 1)
+        if (diacritic.getUnicode().length() > 1)
         {
             return;
         }
@@ -487,7 +487,7 @@ public final class TextPosition
 
         float currCharXStart = getXDirAdj();
 
-        int strLen = string.length();
+        int strLen = unicode.length();
         boolean wasAdded = false;
 
         for (int i = 0; i < strLen && !wasAdded; i++)
@@ -561,10 +561,10 @@ public final class TextPosition
         // we add the diacritic to the right or left of the character depending on the direction
         // of the character. Note that this is only required because the text is currently stored in
         // presentation order and not in logical order
-        int dir = Character.getDirectionality(string.charAt(i));
+        int dir = Character.getDirectionality(unicode.charAt(i));
         StringBuilder sb = new StringBuilder();
 
-        sb.append(string.substring(0, i));
+        sb.append(unicode.substring(0, i));
 
         float[] widths2 = new float[widths.length + 1];
         System.arraycopy(widths, 0, widths2, 0, i);
@@ -574,24 +574,24 @@ public final class TextPosition
             dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING ||
             dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
         {
-            sb.append(normalize.normalizeDiacritic(diacritic.getCharacter()));
+            sb.append(normalize.normalizeDiacritic(diacritic.getUnicode()));
             widths2[i] = 0;
-            sb.append(string.charAt(i));
+            sb.append(unicode.charAt(i));
             widths2[i + 1] = widths[i];
         }
         else
         {
-            sb.append(string.charAt(i));
+            sb.append(unicode.charAt(i));
             widths2[i] = widths[i];
-            sb.append(normalize.normalizeDiacritic(diacritic.getCharacter()));
+            sb.append(normalize.normalizeDiacritic(diacritic.getUnicode()));
             widths2[i + 1] = 0;
         }
 
         // get the rest of the string
-        sb.append(string.substring(i + 1, string.length()));
+        sb.append(unicode.substring(i + 1, unicode.length()));
         System.arraycopy(widths, i + 1, widths2, i + 2, widths.length - i - 1);
 
-        string = sb.toString();
+        unicode = sb.toString();
         widths = widths2;
     }
 
@@ -600,7 +600,7 @@ public final class TextPosition
      */
     public boolean isDiacritic()
     {
-        String text = this.getCharacter();
+        String text = this.getUnicode();
         if (text.length() != 1)
         {
             return false; 
@@ -618,6 +618,6 @@ public final class TextPosition
      */
     public String toString()
     {
-        return getCharacter();
+        return getUnicode();
     }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFMarkedContentExtractor.java Mon Jun 30 21:36:49 2014
@@ -36,7 +36,7 @@ import org.apache.pdfbox.text.TextPositi
  * @author koch
  * @version $Revision$
  */
-public class PDFMarkedContentExtractor extends PDFStreamEngine
+public class PDFMarkedContentExtractor extends PDFTextStreamEngine
 {
     private boolean suppressDuplicateOverlappingText = true;
     private List<PDMarkedContent> markedContents = new ArrayList<PDMarkedContent>();
@@ -163,13 +163,14 @@ public class PDFMarkedContentExtractor e
      *
      * @param text The text to process.
      */
+    @Override
     protected void processTextPosition( TextPosition text )
     {
         boolean showCharacter = true;
         if( this.suppressDuplicateOverlappingText )
         {
             showCharacter = false;
-            String textCharacter = text.getCharacter();
+            String textCharacter = text.getUnicode();
             float textX = text.getX();
             float textY = text.getY();
             List<TextPosition> sameTextCharacters = this.characterListMapping.get( textCharacter );
@@ -195,7 +196,7 @@ public class PDFMarkedContentExtractor e
             for( int i=0; i<sameTextCharacters.size(); i++ )
             {
                 TextPosition character = (TextPosition)sameTextCharacters.get( i );
-                String charCharacter = character.getCharacter();
+                String charCharacter = character.getUnicode();
                 float charX = character.getX();
                 float charY = character.getY();
                 //only want to suppress

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Mon Jun 30 21:36:49 2014
@@ -45,15 +45,12 @@ import org.apache.pdfbox.pdmodel.font.PD
 import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
 import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
-import org.apache.pdfbox.text.TextPosition;
 import org.apache.pdfbox.util.operator.OperatorProcessor;
 import org.apache.pdfbox.util.operator.PDFOperator;
 
 /**
  * Processes a PDF content stream and executes certain operations.
  * Provides a callback interface for clients that want to do things with the stream.
- *
- * @see org.apache.pdfbox.util.PDFTextStripper
  * 
  * @author Ben Litchfield
  */
@@ -66,13 +63,10 @@ public class PDFStreamEngine
 
     private Matrix textMatrix;
     private Matrix textLineMatrix;
-    private final Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();
 
+    private final Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();
     private final Stack<PDResources> streamResourcesStack = new Stack<PDResources>();
 
-    private int pageRotation;
-    private PDRectangle drawingRectangle;
-
     // skip malformed or otherwise unparseable input where possible
     private boolean forceParsing;
 
@@ -178,14 +172,11 @@ public class PDFStreamEngine
      * Initialises a stream for processing.
      *
      * @param drawingSize the size of the page
-     * @param rotation the page rotation
      */
-    protected void initStream(PDRectangle drawingSize, int rotation)
+    protected void initStream(PDRectangle drawingSize)
     {
-        drawingRectangle = drawingSize;
-        pageRotation = rotation;
         graphicsStack.clear();
-        graphicsStack.push(new PDGraphicsState(drawingRectangle));
+        graphicsStack.push(new PDGraphicsState(drawingSize));
         textMatrix = null;
         textLineMatrix = null;
         streamResourcesStack.clear();
@@ -197,13 +188,12 @@ public class PDFStreamEngine
      * @param resources The location to retrieve resources.
      * @param cosStream the Stream to execute.
      * @param drawingSize the size of the page
-     * @param rotation the page rotation
      * @throws IOException if there is an error accessing the stream.
      */
-    public void processStream(PDResources resources, COSStream cosStream, PDRectangle drawingSize,
-                              int rotation) throws IOException
+    public void processStream(PDResources resources, COSStream cosStream, PDRectangle drawingSize)
+            throws IOException
     {
-        initStream(drawingSize, rotation);
+        initStream(drawingSize);
         processSubStream(resources, cosStream);
     }
 
@@ -217,7 +207,7 @@ public class PDFStreamEngine
     public void processSubStream(PDResources resources, COSStream cosStream) throws IOException
     {
         // sanity check
-        if (drawingRectangle == null)
+        if (graphicsStack.isEmpty())
         {
             throw new IllegalStateException("Call to processSubStream() before processStream() " +
                                             "or initStream()");
@@ -277,13 +267,24 @@ public class PDFStreamEngine
     }
 
     /**
-     * Process encoded text from the PDF Stream. You should override this method if you want to
+     * Called when the BT operator is encountered. This method is for overriding in subclasses, the
+     * default implementation does nothing.
+     *
+     * @throws IOException if there was an error processing the text
+     */
+    public void beginText() throws IOException
+    {
+        // overridden in subclasses
+    }
+
+    /**
+     * Process text from the PDF Stream. You should override this method if you want to
      * perform an action when encoded text is being processed.
      * 
      * @param string The encoded text
      * @throws IOException If there is an error processing the string
      */
-    public void processEncodedText(byte[] string) throws IOException
+    public void processText(byte[] string) throws IOException
     {
         // Note on variable names. There are three different units being used in this code.
         // Character sizes are given in glyph units, text locations are initially given in text
@@ -308,38 +309,14 @@ public class PDFStreamEngine
         // all fonts have the width/height of a character in thousandths of a unit of text space
         float fontMatrixXScaling = 1 / 1000f;
         float fontMatrixYScaling = 1 / 1000f;
-        float glyphSpaceToTextSpaceFactor = 1 / 1000f;
         // expect Type3 fonts, those are providing the width of a character in glyph space units
         if (font instanceof PDType3Font)
         {
             PDMatrix fontMatrix = font.getFontMatrix();
             fontMatrixXScaling = fontMatrix.getValue(0, 0);
             fontMatrixYScaling = fontMatrix.getValue(1, 1);
-            // This will typically be 1000 but in the case of a type3 font
-            // this might be a different number
-            glyphSpaceToTextSpaceFactor = 1f / fontMatrix.getValue(0, 0);
-        }
-        float spaceWidthText = 0;
-        try
-        {
-            // to avoid crash as described in PDFBOX-614, see what the space displacement should be
-            spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
-        }
-        catch (Throwable exception)
-        {
-            LOG.warn(exception, exception);
         }
 
-        if (spaceWidthText == 0)
-        {
-            spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
-            // the average space width appears to be higher than necessary so make it smaller
-            spaceWidthText *= .80f;
-        }
-        if (spaceWidthText == 0)
-        {
-            spaceWidthText = 1.0f; // if could not find font, use a generic value
-        }
         float maxVerticalDisplacementText = 0;
 
         Matrix textStateParameters = new Matrix();
@@ -347,9 +324,6 @@ public class PDFStreamEngine
         textStateParameters.setValue(1, 1, fontSizeText);
         textStateParameters.setValue(2, 1, riseText);
 
-        float pageHeight = drawingRectangle.getHeight();
-        float pageWidth = drawingRectangle.getWidth();
-
         Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
         Matrix textXctm = new Matrix();
         Matrix textMatrixEnd = new Matrix();
@@ -361,24 +335,20 @@ public class PDFStreamEngine
         {
             // Decode the value to a Unicode character
             codeLength = 1;
-            String c = font.encode(string, i, codeLength);
-            int[] codePoints;
-            if (c == null && i + 1 < string.length)
+            String unicode = font.encode(string, i, codeLength);
+            int[] charCodes;
+            if (unicode == null && i + 1 < string.length)
             {
                 // maybe a multibyte encoding
                 codeLength++;
-                c = font.encode(string, i, codeLength);
-                codePoints = new int[] { font.getCodeFromArray(string, i, codeLength) };
+                unicode = font.encode(string, i, codeLength);
+                charCodes = new int[] { font.getCodeFromArray(string, i, codeLength) };
             }
             else
             {
-                codePoints = new int[] { font.getCodeFromArray(string, i, codeLength) };
+                charCodes = new int[] { font.getCodeFromArray(string, i, codeLength) };
             }
 
-            // the space width has to be transformed into display units
-            float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText *
-                    textMatrix.getXScale()  * ctm.getXScale();
-
             // TODO: handle horizontal displacement
             // get the width and height of this character in text units
             float charHorizontalDisplacementText = font.getFontWidth(string, i, codeLength);
@@ -450,32 +420,35 @@ public class PDFStreamEngine
             float startXPosition = textMatrixStart.getXPosition();
             float widthText = endXPosition - startXPosition;
 
-            // PDFBOX-373: Replace a null entry with "?" so it is not printed as "(null)"
-            if (c == null)
-            {
-                c = "?";
-            }
-
             float totalVerticalDisplacementDisp = maxVerticalDisplacementText * fontSizeText *
                     textXctm.getYScale();
 
-            // process the decoded text
-            processTextPosition(new TextPosition(pageRotation, pageWidth, pageHeight,
-                    textMatrixStart, endXPosition, endYPosition, totalVerticalDisplacementDisp,
-                    widthText, spaceWidthDisp, c, codePoints, font, fontSizeText,
-                    (int)(fontSizeText * textMatrix.getXScale())));
+            // process the decoded glyph
+            processGlyph(textMatrixStart, new Point2D.Float(endXPosition, endYPosition),
+                    totalVerticalDisplacementDisp, widthText, unicode, charCodes,
+                    font, fontSizeText);
         }
     }
 
     /**
-     * A method provided as an event interface to allow a subclass to perform some specific
-     * functionality when text needs to be processed.
+     * Called when a glyph is to be processed.This method is intended for overriding in subclasses,
+     * the default implementation does nothing.
      *
-     * @param text The text to be processed.
-     */
-    protected void processTextPosition(TextPosition text)
+     * @param textMatrix the text matrix at the start of the glyph
+     * @param end the end position of the glyph in text space
+     * @param maxHeight the height of the glyph in device space
+     * @param widthText the width of the glyph in text space
+     * @param unicode the Unicode text for this glyph, or null. May be meaningless.
+     * @param charCodes array of internal PDF character codes for the glyph todo: should be 1 code?
+     * @param font the current font
+     * @param fontSize font size in text space
+     * @throws IOException if the glyph cannot be processed
+     */
+    protected void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight,
+                                float widthText, String unicode, int[] charCodes, PDFont font,
+                                float fontSize) throws IOException
     {
-        // subclasses can override to provide specific functionality.
+        // overridden in subclasses
     }
 
     /**

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1606936&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Mon Jun 30 21:36:49 2014
@@ -0,0 +1,138 @@
+package org.apache.pdfbox.util;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.pdmodel.PDResources;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.font.PDFont;
+import org.apache.pdfbox.pdmodel.font.PDType3Font;
+import org.apache.pdfbox.text.TextPosition;
+
+import java.awt.geom.Point2D;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * PDFStreamEngine subclass for advanced processing of text via TextPosition.
+ *
+ * @see org.apache.pdfbox.text.TextPosition
+ * @author Ben Litchfield
+ * @author John Hewson
+ */
+public class PDFTextStreamEngine extends PDFStreamEngine
+{
+    private static final Log log = LogFactory.getLog(PDFStreamEngine.class);
+
+    private int pageRotation;
+    private PDRectangle pageSize;
+
+    private PDFTextStreamEngine()
+    {
+    }
+
+    /**
+     * Constructor with engine properties. The property keys are all PDF operators, the values are
+     * class names used to execute those operators. An empty value means that the operator will be
+     * silently ignored.
+     *
+     * @param properties The engine properties.
+     */
+    public PDFTextStreamEngine(Properties properties)
+    {
+        super(properties);
+    }
+
+    /**
+     * This will initialise and process the contents of the stream.
+     *
+     * @param resources The location to retrieve resources.
+     * @param cosStream the Stream to execute.
+     * @param pageSize the size of the page
+     * @param rotation the page rotation
+     * @throws java.io.IOException if there is an error accessing the stream.
+     */
+    public void processStream(PDResources resources, COSStream cosStream, PDRectangle pageSize,
+                              int rotation) throws IOException
+    {
+        this.pageRotation = rotation;
+        this.pageSize = pageSize;
+        super.processStream(resources, cosStream, pageSize);
+    }
+
+    /**
+     * This method was originally written by Ben Litchfield for PDFStreamEngine.
+     */
+    @Override
+    protected final void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight,
+                                      float widthText, String unicode,
+                                      int[] charCodes, PDFont font, float fontSize)
+                                      throws IOException
+    {
+        // Note on variable names. There are three different units being used in this code.
+        // Character sizes are given in glyph units, text locations are initially given in text
+        // units, and we want to save the data in display units. The variable names should end with
+        // Text or Disp to represent if the values are in text or disp units (no glyph units are
+        // saved).
+
+        float fontSizeText = getGraphicsState().getTextState().getFontSize();
+        float horizontalScalingText = getGraphicsState().getTextState().getHorizontalScaling()/100f;
+        Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
+
+        float glyphSpaceToTextSpaceFactor = 1 / 1000f;
+        if (font instanceof PDType3Font)
+        {
+            // This will typically be 1000 but in the case of a type3 font
+            // this might be a different number
+            glyphSpaceToTextSpaceFactor = 1f / font.getFontMatrix().getValue(0, 0);
+        }
+
+        float spaceWidthText = 0;
+        try
+        {
+            // to avoid crash as described in PDFBOX-614, see what the space displacement should be
+            spaceWidthText = font.getSpaceWidth() * glyphSpaceToTextSpaceFactor;
+        }
+        catch (Throwable exception)
+        {
+            log.warn(exception, exception);
+        }
+
+        if (spaceWidthText == 0)
+        {
+            spaceWidthText = font.getAverageFontWidth() * glyphSpaceToTextSpaceFactor;
+            // the average space width appears to be higher than necessary so make it smaller
+            spaceWidthText *= .80f;
+        }
+        if (spaceWidthText == 0)
+        {
+            spaceWidthText = 1.0f; // if could not find font, use a generic value
+        }
+
+        // the space width has to be transformed into display units
+        float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText *
+                textMatrix.getXScale()  * ctm.getXScale();
+
+        // PDFBOX-373: Replace a null entry with "?" so it is not printed as "(null)"
+        if (unicode == null)
+        {
+            unicode = "?";
+        }
+
+        processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(),
+                pageSize.getHeight(), textMatrix, end.x, end.y, maxHeight, widthText,
+                spaceWidthDisp, unicode, charCodes, font, fontSize,
+                (int)(fontSize * textMatrix.getXScale())));
+    }
+
+    /**
+     * A method provided as an event interface to allow a subclass to perform some specific
+     * functionality when text needs to be processed.
+     *
+     * @param text The text to be processed.
+     */
+    protected void processTextPosition(TextPosition text)
+    {
+        // subclasses can override to provide specific functionality
+    }
+}

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Mon Jun 30 21:36:49 2014
@@ -60,7 +60,7 @@ import org.apache.pdfbox.text.TextPositi
  *
  * @author Ben Litchfield
  */
-public class PDFTextStripper extends PDFStreamEngine
+public class PDFTextStripper extends PDFTextStreamEngine
 {
     private static float DEFAULT_INDENT_THRESHOLD = 2.0f;
     private static float DEFAULT_DROP_THRESHOLD = 2.5f;
@@ -545,7 +545,7 @@ public class PDFTextStripper extends PDF
             while (textIter.hasNext())
             {
                 TextPosition position = textIter.next();
-                String stringValue = position.getCharacter();
+                String stringValue = position.getUnicode();
                 for (int a = 0; a < stringValue.length(); a++)
                 {
                     byte dir = Character.getDirectionality(stringValue.charAt(a));
@@ -591,7 +591,7 @@ public class PDFTextStripper extends PDF
             {
                 TextPosition position = textIter.next();
                 PositionWrapper current = new PositionWrapper(position);
-                String characterValue = position.getCharacter();
+                String characterValue = position.getUnicode();
 
                 // Resets the average character width when we see a change in font
                 // or a change in the font size
@@ -706,8 +706,8 @@ public class PDFTextStripper extends PDF
                     if (expectedStartOfNextWordX != EXPECTED_START_OF_NEXT_WORD_X_RESET_VALUE &&
                         expectedStartOfNextWordX < positionX &&
                         // only bother adding a space if the last character was not a space
-                        lastPosition.getTextPosition().getCharacter() != null &&
-                        !lastPosition.getTextPosition().getCharacter().endsWith(" "))
+                        lastPosition.getTextPosition().getUnicode() != null &&
+                        !lastPosition.getTextPosition().getUnicode().endsWith(" "))
                     {
                         line.add(LineItem.getWordSeparator());
                     }
@@ -786,7 +786,7 @@ public class PDFTextStripper extends PDF
      */
     protected void writeCharacters(TextPosition text) throws IOException
     {
-        output.write(text.getCharacter());
+        output.write(text.getUnicode());
     }
 
     /**
@@ -838,7 +838,7 @@ public class PDFTextStripper extends PDF
         if (suppressDuplicateOverlappingText)
         {
             showCharacter = false;
-            String textCharacter = text.getCharacter();
+            String textCharacter = text.getUnicode();
             float textX = text.getX();
             float textY = text.getY();
             TreeMap<Float, TreeSet<Float>> sameTextCharacters =
@@ -1666,7 +1666,7 @@ public class PDFTextStripper extends PDF
     protected Pattern matchListItemPattern(PositionWrapper pw) 
     {
         TextPosition tp = pw.getTextPosition();
-        String txt = tp.getCharacter();
+        String txt = tp.getUnicode();
         return matchPattern(txt,getListItemPatterns());
     }
 
@@ -1843,7 +1843,7 @@ public class PDFTextStripper extends PDF
         else 
         {
             TextPosition text = item.getTextPosition();
-            lineBuilder.append(text.getCharacter());
+            lineBuilder.append(text.getUnicode());
             wordPositions.add(text);
         }
         return lineBuilder;
@@ -1889,7 +1889,7 @@ public class PDFTextStripper extends PDF
      * Note that the number of entries in that list may differ from the number of characters in the
      * string due to normalization.
      *
-     * @author Axel Dörfler
+     * @author Axel D�rfler
      */
     private static final class WordWithTextPositions
     {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripperByArea.java Mon Jun 30 21:36:49 2014
@@ -156,6 +156,7 @@ public class PDFTextStripperByArea exten
     /**
      * {@inheritDoc}
      */
+    @Override
     protected void processTextPosition( TextPosition text )
     {
         Iterator<String> regionIter = regionArea.keySet().iterator();

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/BeginText.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/BeginText.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/BeginText.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/BeginText.java Mon Jun 30 21:36:49 2014
@@ -16,6 +16,7 @@
  */
 package org.apache.pdfbox.util.operator;
 
+import java.io.IOException;
 import java.util.List;
 
 import org.apache.pdfbox.cos.COSBase;
@@ -35,9 +36,10 @@ public class BeginText extends OperatorP
      * @param operator The operator that is being executed.
      * @param arguments List
      */
-    public void process(PDFOperator operator, List<COSBase> arguments)
+    public void process(PDFOperator operator, List<COSBase> arguments) throws IOException
     {
         context.setTextMatrix( new Matrix());
         context.setTextLineMatrix( new Matrix() );
+        context.beginText();
     }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowText.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowText.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowText.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowText.java Mon Jun 30 21:36:49 2014
@@ -41,7 +41,7 @@ public class ShowText extends OperatorPr
     public void process(PDFOperator operator, List<COSBase> arguments) throws IOException
     {
         COSString string = (COSString)arguments.get( 0 );
-        context.processEncodedText( string.getBytes() );
+        context.processText(string.getBytes());
     }
 
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/ShowTextGlyph.java Mon Jun 30 21:36:49 2014
@@ -58,7 +58,7 @@ public class ShowTextGlyph extends Opera
             }
             else if( next instanceof COSString )
             {
-                context.processEncodedText( ((COSString)next).getBytes() );
+                context.processText(((COSString) next).getBytes());
             }
             else
             {

Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java Mon Jun 30 21:36:49 2014
@@ -72,7 +72,7 @@ public class PreflightContentStream exte
             if (pstream != null)
             {
                 processStream(processeedPage.findResources(), pstream.getStream(), 
-                		processeedPage.findCropBox(), processeedPage.findRotation());
+                		processeedPage.findCropBox());
             }
         }
         catch (ContentStreamException e)
@@ -96,7 +96,7 @@ public class PreflightContentStream exte
     {
         try
         {
-            initStream(this.processeedPage.findCropBox(), 0);
+            initStream(this.processeedPage.findCropBox());
             processSubStream(xobj.getResources(), xobj.getCOSStream());
         }
         catch (ContentStreamException e)

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java?rev=1606936&r1=1606935&r2=1606936&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFText2HTML.java Mon Jun 30 21:36:49 2014
@@ -143,7 +143,7 @@ public class PDFText2HTML extends PDFTex
                     }
                     if (currentFontSize > 13.0f)
                     { // most body text is 12pt
-                        titleText.append(position.getCharacter());
+                        titleText.append(position.getUnicode());
                     }
                 }
             }



Mime
View raw message