Return-Path: X-Original-To: apmail-pdfbox-commits-archive@www.apache.org Delivered-To: apmail-pdfbox-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 5E39A11730 for ; Sat, 30 Aug 2014 02:28:01 +0000 (UTC) Received: (qmail 75723 invoked by uid 500); 30 Aug 2014 02:28:01 -0000 Delivered-To: apmail-pdfbox-commits-archive@pdfbox.apache.org Received: (qmail 75699 invoked by uid 500); 30 Aug 2014 02:28:01 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 75690 invoked by uid 99); 30 Aug 2014 02:28:01 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 30 Aug 2014 02:28:01 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sat, 30 Aug 2014 02:27:29 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 289302388AC8; Sat, 30 Aug 2014 02:27:04 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Subject: svn commit: r1621411 [6/7] - in /pdfbox/trunk: ./ examples/src/main/java/org/apache/pdfbox/examples/pdmodel/ fontbox/src/main/java/org/apache/fontbox/afm/ fontbox/src/main/java/org/apache/fontbox/cff/ fontbox/src/main/java/org/apache/fontbox/cff/charse... Date: Sat, 30 Aug 2014 02:27:00 -0000 To: commits@pdfbox.apache.org From: jahewson@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140830022704.289302388AC8@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/color/PDCalRGB.java Sat Aug 30 02:26:57 2014 @@ -22,7 +22,7 @@ import org.apache.pdfbox.cos.COSDictiona import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.pdmodel.common.PDMatrix; +import org.apache.pdfbox.util.Matrix; /** * A CalRGB colour space is a CIE-based colour space with one transformation stage instead of two. @@ -151,16 +151,16 @@ public class PDCalRGB extends PDCIEBased * If the underlying dictionary contains null then the identity matrix will be returned. * @return the linear interpretation matrix */ - public final PDMatrix getGammaMatrix() + public final Matrix getGammaMatrix() { COSArray matrix = (COSArray)dictionary.getDictionaryObject(COSName.MATRIX); if(matrix == null) { - return new PDMatrix(); + return new Matrix(); } else { - return new PDMatrix(matrix); + return new Matrix(matrix); } } @@ -210,12 +210,12 @@ public class PDCalRGB extends PDCIEBased * Passing in null will clear the matrix. * @param matrix the new linear interpretation matrix, or null */ - public final void setGammaMatrix(PDMatrix matrix) + public final void setGammaMatrix(Matrix matrix) { COSArray matrixArray = null; if(matrix != null) { - matrixArray = matrix.getCOSArray(); + matrixArray = matrix.toCOSArray(); } dictionary.setItem(COSName.MATRIX, matrixArray); } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java Sat Aug 30 02:26:57 2014 @@ -18,15 +18,12 @@ package org.apache.pdfbox.rendering; import java.awt.BasicStroke; import java.awt.Color; -import java.awt.Font; import java.awt.Graphics; import java.awt.Graphics2D; import java.awt.Paint; import java.awt.RenderingHints; import java.awt.Shape; import java.awt.TexturePaint; -import java.awt.font.FontRenderContext; -import java.awt.font.GlyphVector; import java.awt.geom.AffineTransform; import java.awt.geom.Area; import java.awt.geom.GeneralPath; @@ -42,26 +39,20 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.fontbox.cff.CFFFont; -import org.apache.fontbox.ttf.TrueTypeFont; -import org.apache.fontbox.type1.Type1Font; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdmodel.font.PDCIDFontType0; +import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.graphics.image.PDImage; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; +import org.apache.pdfbox.rendering.font.CIDType0Glyph2D; import org.apache.pdfbox.rendering.font.Glyph2D; import org.apache.pdfbox.rendering.font.TTFGlyph2D; import org.apache.pdfbox.rendering.font.Type1Glyph2D; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.common.PDMatrix; import org.apache.pdfbox.pdmodel.common.PDRectangle; -import org.apache.pdfbox.pdmodel.font.PDFFontManager; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType0Font; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font; import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.font.PDFontDescriptor; -import org.apache.pdfbox.pdmodel.font.PDFontDescriptorDictionary; import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType1CFont; @@ -82,6 +73,7 @@ import org.apache.pdfbox.pdmodel.interac import org.apache.pdfbox.pdmodel.interactive.annotation.PDAppearanceStream; import org.apache.pdfbox.util.Matrix; import org.apache.pdfbox.util.PDFGraphicsStreamEngine; +import org.apache.pdfbox.util.Vector; /** * Paints a page in a PDF document to a Graphics context. @@ -104,8 +96,10 @@ public class PageDrawer extends PDFGraph // last clipping path private Area lastClip; + // buffered clipping area for text being drawn + private Area textClippingArea; + private final Map fontGlyph2D = new HashMap(); - private final Map awtFonts = new HashMap(); private PDRectangle pageSize; @@ -221,7 +215,7 @@ public class PageDrawer extends PDFGraph initStream(pageDimension); - // transform ctm + // transformPoint ctm Matrix concat = matrix.multiply(getGraphicsState().getCurrentTransformationMatrix()); getGraphicsState().setCurrentTransformationMatrix(concat); @@ -256,85 +250,44 @@ public class PageDrawer extends PDFGraph } @Override - protected void processText(byte[] string) throws IOException + protected void showText(byte[] string, float adjustment) throws IOException { - // - // DEPRECATED: Used for AWT text only. Will be removed soon! Don't edit me. - // - PDGraphicsState state = getGraphicsState(); RenderingMode renderingMode = state.getTextState().getRenderingMode(); - if (renderingMode == RenderingMode.FILL) + // buffer the text clip because it represents a single clipping area + if (renderingMode.isClip()) { - graphics.setComposite(state.getNonStrokingJavaComposite()); - graphics.setPaint(getNonStrokingPaint()); + textClippingArea = new Area(); } - else if (renderingMode == RenderingMode.STROKE) - { - graphics.setComposite(state.getStrokingJavaComposite()); - graphics.setPaint(getStrokingPaint()); - graphics.setStroke(getStroke()); - } - else if (renderingMode == RenderingMode.NEITHER) - { - return; - } - else + + super.showText(string, adjustment); + + // apply the buffered clip as one area + if (renderingMode.isClip()) { - LOG.debug("Unsupported RenderingMode " + - this.getGraphicsState().getTextState().getRenderingMode() + - " in PageDrawer.processTextPosition()." + " Using RenderingMode " + - RenderingMode.FILL + " instead"); - graphics.setComposite(state.getNonStrokingJavaComposite()); - graphics.setPaint(getNonStrokingPaint()); + state.intersectClippingPath(textClippingArea); + textClippingArea = null; } - - super.processText(string); } @Override - protected void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight, - float widthText, String unicode, int[] charCodes, PDFont font, - float fontSize) throws IOException + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, + Vector displacement) throws IOException { - try - { - AffineTransform at = textMatrix.createAffineTransform(); - PDMatrix fontMatrix = font.getFontMatrix(); + AffineTransform at = textRenderingMatrix.createAffineTransform(); + at.concatenate(font.getFontMatrix().createAffineTransform()); - // use different methods to draw the string - if (font.isType3Font()) - { - // Type3 fonts don't use the same units within the font matrix as the other fonts - at.scale(fontMatrix.getValue(0, 0), fontMatrix.getValue(1, 1)); - // Type3 fonts are using streams for each character - drawType3String((PDType3Font) font, charCodes, at); - } - else - { - Glyph2D glyph2D = createGlyph2D(font); - if (glyph2D != null) - { - AffineTransform fontMatrixAT = new AffineTransform( - fontMatrix.getValue(0, 0), fontMatrix.getValue(0, 1), - fontMatrix.getValue(1, 0), fontMatrix.getValue(1, 1), - fontMatrix.getValue(2, 0), fontMatrix.getValue(2, 1)); - at.concatenate(fontMatrixAT); - // Let PDFBox render the font if supported - drawGlyphs2D(glyph2D, charCodes, at); - } - else - { - // Use AWT to render the font (standard14 fonts, substituted embedded fonts) - // TODO to be removed in the long run - drawString(font, unicode, at); - } - } + if (font instanceof PDType3Font) + { + // Type3 fonts use PDF streams for each character + drawType3String((PDType3Font) font, code, at); } - catch (IOException e) + else { - LOG.error(e.getMessage(), e); // todo: really? + // all other fonts use vectors + Glyph2D glyph2D = createGlyph2D(font); + drawGlyph2D(glyph2D, code, at); } } @@ -342,56 +295,40 @@ public class PageDrawer extends PDFGraph * Render the font using the Glyph2D interface. * * @param glyph2D the Glyph2D implementation provided a GeneralPath for each glyph - * @param codePoints the string to be rendered + * @param code character code * @param at the transformation * @throws IOException if something went wrong */ - private void drawGlyphs2D(Glyph2D glyph2D, int[] codePoints, AffineTransform at) throws IOException + private void drawGlyph2D(Glyph2D glyph2D, int code, AffineTransform at) throws IOException { graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); PDGraphicsState state = getGraphicsState(); RenderingMode renderingMode = state.getTextState().getRenderingMode(); - boolean needsFill = true; - boolean needsStroke = true; - - for (int codePoint : codePoints) + GeneralPath path = glyph2D.getPathForCharacterCode(code); + if (path != null) { - GeneralPath path = glyph2D.getPathForCharacterCode(codePoint); - if (path != null) - { - Shape glyph = at.createTransformedShape(path); + Shape glyph = at.createTransformedShape(path); - if (renderingMode.isFill()) - { - if (needsFill) - { - graphics.setComposite(state.getNonStrokingJavaComposite()); - graphics.setPaint(getNonStrokingPaint()); - needsFill = false; - needsStroke = true; - } - graphics.fill(glyph); - } + if (renderingMode.isFill()) + { + graphics.setComposite(state.getNonStrokingJavaComposite()); + graphics.setPaint(getNonStrokingPaint()); + graphics.fill(glyph); + } - if (renderingMode.isStroke()) - { - if (needsStroke) - { - graphics.setComposite(state.getStrokingJavaComposite()); - graphics.setPaint(getStrokingPaint()); - graphics.setStroke(getStroke()); - needsFill = true; - needsStroke = false; - } - graphics.draw(glyph); - } + if (renderingMode.isStroke()) + { + graphics.setComposite(state.getStrokingJavaComposite()); + graphics.setPaint(getStrokingPaint()); + graphics.setStroke(getStroke()); + graphics.draw(glyph); + } - if (renderingMode.isClip()) - { - state.intersectClippingPath(new Area(glyph)); - } + if (renderingMode.isClip()) + { + textClippingArea.add(new Area(glyph)); } } } @@ -400,157 +337,36 @@ public class PageDrawer extends PDFGraph * Render the text using a type 3 font. * * @param font the type3 font - * @param charCodes internal PDF character codes of glyphs + * @param code internal PDF character codes of glyph * @param at the transformation * * @throws IOException if something went wrong */ - private void drawType3String(PDType3Font font, int[] charCodes, AffineTransform at) throws IOException + private void drawType3String(PDType3Font font, int code, AffineTransform at) throws IOException { - int textLength = charCodes.length; - for (int i = 0; i < textLength; i++) + COSStream stream = font.getCharStream(code); + if (stream != null) { - COSStream stream = font.getCharStream((char) charCodes[i]); - if (stream != null) - { - // save the current graphics state and matrices - saveGraphicsState(); - Matrix textMatrix = getTextMatrix(); - Matrix textLineMatrix = getTextLineMatrix(); - - Matrix ctm = new Matrix(); - ctm.setFromAffineTransform(at); - getGraphicsState().setCurrentTransformationMatrix(ctm); - processSubStream(font.getType3Resources(), stream); + // save the current graphics state and matrices + saveGraphicsState(); + Matrix textMatrix = getTextMatrix(); + Matrix textLineMatrix = getTextLineMatrix(); + + Matrix ctm = new Matrix(); + ctm.setFromAffineTransform(at); + getGraphicsState().setCurrentTransformationMatrix(ctm); + processSubStream(font.getType3Resources(), stream); - // restore the saved graphics state and matrices - restoreGraphicsState(); - setTextLineMatrix(textLineMatrix); - setTextMatrix(textMatrix); - - } - else - { - LOG.debug("drawType3String: stream for character " + (char) charCodes[i] + " not found"); - } - } - } - - /** - * This will draw a string on a canvas using the font. - * - * @param font the font to be used to draw the string - * @param string The string to draw. - * @param at The transformation matrix with all information for scaling and shearing of the font. - * - * @throws IOException If there is an error drawing the specific string. - */ - private void drawString(PDFont font, String string, AffineTransform at) throws IOException - { - if (string == null) { - // AWT fonts can't handle the case where there is no Unicode mapping for the character, - // as we don't know what character it is. We use the replacement character which is - // better than nothing, to show that something is missing. - LOG.error("Could not render a character in font " + font.getBaseFont()); - string = "\uFFFD"; // REPLACEMENT CHARACTER - } - - Font awtFont = createAWTFont(font); - FontRenderContext frc = new FontRenderContext(new AffineTransform(), true, true); - GlyphVector glyphs = awtFont.createGlyphVector(frc, string); - graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); - writeFont(at, glyphs); - } - - private void writeFont(final AffineTransform at, final GlyphVector glyphs) - { - // - // DEPRECATED: Will be removed soon! Don't edit me. - // - - // Convert from PDF, where glyphs are upright when direction is from - // bottom to top, to AWT, where this is the other way around - - // PDFBOX-2141: do not use graphics.transform(), because this prevents - // the correct rendering of shading patterns - // don't apply the translation to each glyph, only scale and shear - AffineTransform atRS = new AffineTransform(at.getScaleX(), at.getShearY(), - -at.getShearX(), -at.getScaleY(), 0, 0); - - for (int i = 0; i < glyphs.getNumGlyphs(); i++) - { - glyphs.setGlyphTransform(i, atRS); - } - graphics.drawGlyphVector(glyphs, (float) at.getTranslateX(), (float) at.getTranslateY()); - } - - /** - * Provides an AWT font for the given PDFont. - * - * @param font the font which needs an AWT font - * @return the corresponding AWT font - * @throws IOException if something went wrong - */ - private Font createAWTFont(PDFont font) throws IOException - { - // - // DEPRECATED: Will be removed soon! Don't edit me. - // + // restore the saved graphics state and matrices + restoreGraphicsState(); + setTextLineMatrix(textLineMatrix); + setTextMatrix(textMatrix); - Font awtFont = null; - // Is there already a AWTFont for the given font? - if (awtFonts.containsKey(font)) - { - awtFont = awtFonts.get(font); } else { - LOG.info("Using AWT font for " + font.getBaseFont()); - - if (font instanceof PDType1Font) - { - PDType1Font type1Font = (PDType1Font) font; - PDFontDescriptor fd = type1Font.getFontDescriptor(); - if (fd instanceof PDFontDescriptorDictionary) - { - PDFontDescriptorDictionary fdDictionary = (PDFontDescriptorDictionary) fd; - if (fdDictionary.getFontFile() == null) - { - // check if the font is part of our environment - if (fd.getFontName() != null) - { - awtFont = PDFFontManager.getAwtFont(fd.getFontName()); - } - if (awtFont == null) - { - LOG.info("Can't find the specified font " + fd.getFontName()); - } - } - } - else - { - // check if the font is part of our environment - String baseFont = type1Font.getBaseFont(); - awtFont = PDFFontManager.getAwtFont(baseFont); - if (awtFont == null) - { - LOG.info("Can't find the specified basefont " + baseFont); - } - } - } - else - { - LOG.info("Unsupported type of font " + font.getClass().getName()); - } - if (awtFont == null) - { - // Fallback: we can't find anything, so we have to use the standard font - awtFont = PDFFontManager.getAWTFallbackFont(); - LOG.info("Using font " + awtFont.getName() + " instead of " + font.getBaseFont()); - } - awtFonts.put(font, awtFont); + LOG.error("Stream for Type 3 character " + code + " not found"); } - return awtFont; } /** @@ -562,84 +378,62 @@ public class PageDrawer extends PDFGraph */ private Glyph2D createGlyph2D(PDFont font) throws IOException { - Glyph2D glyph2D = null; // Is there already a Glyph2D for the given font? if (fontGlyph2D.containsKey(font)) { - glyph2D = fontGlyph2D.get(font); + return fontGlyph2D.get(font); } - else + + Glyph2D glyph2D = null; + if (font instanceof PDTrueTypeFont) { - // check if the given font is supported - if (font instanceof PDTrueTypeFont) - { - PDTrueTypeFont ttfFont = (PDTrueTypeFont) font; - // get the true type font raw data - TrueTypeFont ttf = ttfFont.getTTFFont(); - if (ttf != null) - { - glyph2D = new TTFGlyph2D(ttfFont); - } - } - else if (font instanceof PDType1Font) + PDTrueTypeFont ttfFont = (PDTrueTypeFont)font; + glyph2D = new TTFGlyph2D(ttfFont); // TTF is never null + } + else if (font instanceof PDType1Font) + { + PDType1Font pdType1Font = (PDType1Font)font; + glyph2D = new Type1Glyph2D(pdType1Font); // T1 is never null + } + else if (font instanceof PDType1CFont) + { + PDType1CFont type1CFont = (PDType1CFont)font; + if (type1CFont.getCFFType1Font() != null) // todo: could be null (need to incorporate fallback) { - PDType1Font pdType1Font = (PDType1Font) font; - PDType1CFont type1CFont = pdType1Font.getType1CFont(); - if (type1CFont != null) - { - // get the cffFont raw data - CFFFont cffFont = type1CFont.getCFFFont(); - if (cffFont != null) - { - glyph2D = new Type1Glyph2D(cffFont, type1CFont.getFontEncoding()); - } - } - else - { - // get the pfb raw data - Type1Font type1Font = pdType1Font.getType1Font(); - if (type1Font != null) - { - glyph2D = new Type1Glyph2D(type1Font, pdType1Font.getFontEncoding()); - } - } + glyph2D = new Type1Glyph2D(type1CFont); } - else if (font instanceof PDType0Font) + } + else if (font instanceof PDType0Font) + { + PDType0Font type0Font = (PDType0Font) font; + if (type0Font.getDescendantFont() instanceof PDCIDFontType2) { - PDType0Font type0Font = (PDType0Font) font; - if (type0Font.getDescendantFont() instanceof PDCIDFontType2Font) - { - // a Type2 CIDFont contains a TTF font - PDCIDFontType2Font cidType2Font = (PDCIDFontType2Font) type0Font.getDescendantFont(); - // get the true type font raw data - TrueTypeFont ttf = cidType2Font.getTTFFont(); - if (ttf != null) - { - glyph2D = new TTFGlyph2D(type0Font); - } - } - else if (type0Font.getDescendantFont() instanceof PDCIDFontType0Font) - { - // a Type0 CIDFont contains CFF font - PDCIDFontType0Font cidType2Font = (PDCIDFontType0Font) type0Font.getDescendantFont(); - PDType1CFont type1CFont = cidType2Font.getType1CFont(); - if (type1CFont != null) - { - // get the cffFont raw data - CFFFont cffFont = type1CFont.getCFFFont(); - if (cffFont != null) - { - glyph2D = new Type1Glyph2D(cffFont, type1CFont.getFontEncoding()); - } - } - } + glyph2D = new TTFGlyph2D(type0Font); // TTF is never null } - // cache the Glyph2D instance - if (glyph2D != null) + else if (type0Font.getDescendantFont() instanceof PDCIDFontType0) { - fontGlyph2D.put(font, glyph2D); + // a Type0 CIDFont contains CFF font + PDCIDFontType0 cidType0Font = (PDCIDFontType0)type0Font.getDescendantFont(); + glyph2D = new CIDType0Glyph2D(cidType0Font); // todo: could be null (need incorporate fallback) } } + else + { + throw new IllegalStateException("Bad font type: " + font.getClass().getSimpleName()); + } + + // cache the Glyph2D instance + if (glyph2D != null) + { + fontGlyph2D.put(font, glyph2D); + } + + if (glyph2D == null) + { + // todo: make sure this never happens + throw new UnsupportedOperationException("No font for " + font.getBaseFont()); + } + return glyph2D; } @@ -659,10 +453,10 @@ public class PageDrawer extends PDFGraph } /** - * Generates awt raster for a soft mask + * Generates AWT raster for a soft mask * * @param softMask - * @return awt raster for soft mask + * @return AWT raster for soft mask * @throws IOException */ private Raster createSoftMaskRaster(PDSoftMask softMask) throws IOException Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Glyph2D.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Glyph2D.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Glyph2D.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Glyph2D.java Sat Aug 30 02:26:57 2014 @@ -20,6 +20,7 @@ package org.apache.pdfbox.rendering.font; import java.awt.geom.GeneralPath; +import java.io.IOException; /** * This interface is implemented by several font specific classes which is called to get the @@ -34,7 +35,7 @@ public interface Glyph2D * * @return the GeneralPath for the given character code */ - public GeneralPath getPathForCharacterCode(int code); + public GeneralPath getPathForCharacterCode(int code) throws IOException; /** * Remove all cached resources. Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/TTFGlyph2D.java Sat Aug 30 02:26:57 2014 @@ -26,11 +26,10 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.fontbox.cmap.CMap; import org.apache.fontbox.ttf.GlyphData; import org.apache.fontbox.ttf.HeaderTable; import org.apache.fontbox.ttf.TrueTypeFont; -import org.apache.pdfbox.pdmodel.font.PDCIDFontType2Font; +import org.apache.pdfbox.pdmodel.font.PDCIDFontType2; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDTrueTypeFont; import org.apache.pdfbox.pdmodel.font.PDType0Font; @@ -42,18 +41,12 @@ public class TTFGlyph2D implements Glyph { private static final Log LOG = LogFactory.getLog(TTFGlyph2D.class); - private PDFont pdFont; - private TrueTypeFont ttf; - private PDCIDFontType2Font descendantFont; - private String name; + private final PDFont font; + private final TrueTypeFont ttf; private float scale = 1.0f; - private boolean hasScaling = false; - private Map glyphs = new HashMap(); - private CMap fontCMap = null; - private boolean isCIDFont = false; - private boolean hasIdentityCIDMapping = false; - private boolean hasCID2GIDMapping = false; - private boolean hasTwoByteMappings = false; + private boolean hasScaling; + private final Map glyphs = new HashMap(); + private final boolean isCIDFont; /** * Constructor. @@ -62,7 +55,7 @@ public class TTFGlyph2D implements Glyph */ public TTFGlyph2D(PDTrueTypeFont ttfFont) throws IOException { - this(ttfFont.getTTFFont(), ttfFont, null); + this(ttfFont.getTrueTypeFont(), ttfFont, false); } /** @@ -72,15 +65,15 @@ public class TTFGlyph2D implements Glyph */ public TTFGlyph2D(PDType0Font type0Font) throws IOException { - this(((PDCIDFontType2Font)type0Font.getDescendantFont()).getTTFFont(), type0Font, - (PDCIDFontType2Font)type0Font.getDescendantFont()); + this(((PDCIDFontType2)type0Font.getDescendantFont()).getTrueTypeFont(), type0Font, true); } - public TTFGlyph2D(TrueTypeFont ttf, PDFont pdFont, PDCIDFontType2Font descFont) + public TTFGlyph2D(TrueTypeFont ttf, PDFont font, boolean isCIDFont) throws IOException { - this.pdFont = pdFont; + this.font = font; this.ttf = ttf; + this.isCIDFont = isCIDFont; // get units per em, which is used as scaling factor HeaderTable header = this.ttf.getHeader(); if (header != null && header.getUnitsPerEm() != 1000) @@ -90,143 +83,93 @@ public class TTFGlyph2D implements Glyph scale = 1000f / header.getUnitsPerEm(); hasScaling = true; } - extractFontSpecifics(pdFont, descFont); - } - - /** - * Extract all font specific information. - * - * @param pdFont the given PDFont - */ - private void extractFontSpecifics(PDFont pdFont, PDCIDFontType2Font descFont) - { - name = pdFont.getBaseFont(); - if (descFont != null) - { - isCIDFont = true; - descendantFont = descFont; - hasIdentityCIDMapping = descendantFont.hasIdentityCIDToGIDMap(); - hasCID2GIDMapping = descendantFont.hasCIDToGIDMap(); - fontCMap = pdFont.getCMap(); - if (fontCMap != null) - { - hasTwoByteMappings = fontCMap.hasTwoByteMappings(); - } - } - } - - /** - * Get the GID for the given CIDFont. - * - * @param code the given CID - * @return the mapped GID - */ - private int getGID(int code) - { - if (hasIdentityCIDMapping) - { - // identity mapping - return code; - } - if (hasCID2GIDMapping) - { - // use the provided CID2GID mapping - return descendantFont.mapCIDToGID(code); - } - if (fontCMap != null) - { - String string = fontCMap.lookup(code, hasTwoByteMappings ? 2 : 1); - if (string != null) - { - return string.codePointAt(0); - } - } - return code; } @Override - public GeneralPath getPathForCharacterCode(int code) + public GeneralPath getPathForCharacterCode(int code) throws IOException { - int glyphId = getGIDForCharacterCode(code); - - if (glyphId > 0) - { - return getPathForGlyphId(glyphId); - } - glyphId = code; - // there isn't any mapping, but probably an optional CMap - if (fontCMap != null) - { - String string = fontCMap.lookup(code, hasTwoByteMappings ? 2 : 1); - if (string != null) - { - glyphId = string.codePointAt(0); - } - } - return getPathForGlyphId(glyphId); + int gid = getGIDForCharacterCode(code); + return getPathForGID(gid, code); } // Try to map the given code to the corresponding glyph-ID - private int getGIDForCharacterCode(int code) + private int getGIDForCharacterCode(int code) throws IOException { if (isCIDFont) { - return getGID(code); + return ((PDType0Font)font).codeToGID(code); } else { - return ((PDTrueTypeFont)pdFont).getGIDForCharacterCode(code); + return ((PDTrueTypeFont)font).codeToGID(code); } } /** * Returns the path describing the glyph for the given glyphId. * - * @param glyphId the glyphId + * @param gid the GID + * @param code the character code * * @return the GeneralPath for the given glyphId */ - public GeneralPath getPathForGlyphId(int glyphId) + public GeneralPath getPathForGID(int gid, int code) throws IOException { - GeneralPath glyphPath = null; - if (glyphs.containsKey(glyphId)) + GeneralPath glyphPath; + if (glyphs.containsKey(gid)) + { + glyphPath = glyphs.get(gid); + } + else if (gid == 0) { - glyphPath = glyphs.get(glyphId); + if (isCIDFont) + { + int cid = ((PDType0Font) font).codeToCID(code); + String cidHex = String.format("%04x", cid); + LOG.warn("No glyph for " + code + " (CID " + cidHex + ") in font " + font.getName()); + } + else + { + LOG.warn("No glyph for " + code + " in font " + font.getName()); + } + + // GID 0 is not drawn, see PDFBOX-1735 + glyphPath = new GeneralPath(); + glyphs.put(gid, glyphPath); } else { GlyphData[] glyphData = ttf.getGlyph().getGlyphs(); - if (glyphId < glyphData.length && glyphData[glyphId] != null) + if (gid >= glyphData.length) { - GlyphData glyph = glyphData[glyphId]; + LOG.warn(font.getName() + ": Glyph not found: " + gid); + glyphPath = new GeneralPath(); + glyphs.put(gid, glyphPath); + } + else if (glyphData[gid] == null) + { + // empty glyph (e.g. space, newline) + glyphPath = new GeneralPath(); + glyphs.put(gid, glyphPath); + } + else + { + GlyphData glyph = glyphData[gid]; glyphPath = glyph.getPath(); if (hasScaling) { AffineTransform atScale = AffineTransform.getScaleInstance(scale, scale); glyphPath.transform(atScale); } - glyphs.put(glyphId, glyphPath); - } - else - { - if (LOG.isDebugEnabled()) - { - LOG.debug(name + ": Glyph not found:" + glyphId); - } + glyphs.put(gid, glyphPath); } } - return glyphPath != null ? (GeneralPath) glyphPath.clone() : null; + return glyphPath != null ? (GeneralPath) glyphPath.clone() : null; // todo: expensive } @Override public void dispose() { - ttf = null; - descendantFont = null; - fontCMap = null; - if (glyphs != null) - { - glyphs.clear(); - } + glyphs.clear(); } } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Type1Glyph2D.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Type1Glyph2D.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Type1Glyph2D.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/font/Type1Glyph2D.java Sat Aug 30 02:26:57 2014 @@ -18,121 +18,128 @@ package org.apache.pdfbox.rendering.font import java.awt.geom.GeneralPath; import java.io.IOException; -import java.util.Collection; import java.util.HashMap; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.fontbox.cff.CFFFont; -import org.apache.fontbox.type1.Type1Font; -import org.apache.fontbox.type1.Type1Mapping; import org.apache.pdfbox.encoding.Encoding; +import org.apache.pdfbox.encoding.GlyphList; +import org.apache.pdfbox.pdmodel.font.PDType1Equivalent; /** - * This class provides a glyph to GeneralPath conversion for Type 1 PFB and CFF fonts. + * Glyph to GeneralPath conversion for Type 1 PFB and CFF, and TrueType fonts with a 'post' table. */ public class Type1Glyph2D implements Glyph2D { private static final Log LOG = LogFactory.getLog(Type1Glyph2D.class); - private HashMap glyphs = new HashMap(); - private Map codeToName = new HashMap(); - private String fontName = null; - - /** - * Constructs a new Type1Glyph2D object for a CFF/Type2 font. - * - * @param font CFF/Type2 font - * @param encoding PDF Encoding or null - */ - public Type1Glyph2D(CFFFont font, Encoding encoding) - { - this(font.getName(), font.getType1Mappings(), encoding); + // alternative names for glyphs which are commonly encountered + private static final Map ALT_NAMES = new HashMap(); + static + { + ALT_NAMES.put("ff", "f_f"); + ALT_NAMES.put("ffi", "f_f_i"); + ALT_NAMES.put("ffl", "f_f_l"); + ALT_NAMES.put("fi", "f_i"); + ALT_NAMES.put("fl", "f_l"); + ALT_NAMES.put("st", "s_t"); + ALT_NAMES.put("IJ", "I_J"); + ALT_NAMES.put("ij", "i_j"); + } + + // unicode names for ligatures, needed to undo mapping in org.apache.pdfbox.Encoding + private static final Map LIGATURE_UNI_NAMES = new HashMap(); + static + { + LIGATURE_UNI_NAMES.put("ff", "uniFB00"); + LIGATURE_UNI_NAMES.put("fi", "uniFB01"); + LIGATURE_UNI_NAMES.put("fl", "uniFB02"); + LIGATURE_UNI_NAMES.put("ffi", "uniFB03"); + LIGATURE_UNI_NAMES.put("ffl", "uniFB04"); + LIGATURE_UNI_NAMES.put("pi", "uni03C0"); } + private final HashMap cache = new HashMap(); + private final PDType1Equivalent font; + /** - * Constructs a new Type1Glyph2D object for a Type 1 (PFB) font. + * Constructor. * - * @param font Type 1 (PFB) font - * @param encoding PDF Encoding or null + * @param font PDF Type1 font. */ - public Type1Glyph2D(Type1Font font, Encoding encoding) + public Type1Glyph2D(PDType1Equivalent font) { - this(font.getFontName(), font.getType1Mappings(), encoding); + this.font = font; } - /** - * Private constructor. - */ - private Type1Glyph2D(String fontName, Collection mappings, Encoding encoding) + @Override + public GeneralPath getPathForCharacterCode(int code) { - this.fontName = fontName; - // start with built-in encoding - for (Type1Mapping mapping : mappings) + // cache + if (cache.containsKey(code)) { - codeToName.put(mapping.getCode(), mapping.getName()); + return cache.get(code); } - // override existing entries with an optional PDF Encoding - if (encoding != null) + + // fetch + try { - Map encodingCodeToName = encoding.getCodeToNameMap(); - for (Integer key : encodingCodeToName.keySet()) + String name = font.codeToName(code); + GeneralPath path = null; + if (!name.equals(".notdef") && font.hasGlyph(name)) { - codeToName.put(key, encodingCodeToName.get(key)); + path = font.getPath(name); } - } - for (Type1Mapping mapping : mappings) - { - GeneralPath path; - try + else { - path = mapping.getType1CharString().getPath(); - glyphs.put(mapping.getName(), path); + // try alternative name + String altName = ALT_NAMES.get(name); + if (altName != null && !name.equals(".notdef") && font.hasGlyph(altName)) + { + path = font.getPath(altName); + } + else + { + // try unicode name + String unicodes = GlyphList.toUnicode(name); + if (unicodes != null) + { + if (unicodes.length() == 1) + { + String uniName = String.format("uni%04X", unicodes.codePointAt(0)); + path = font.getPath(uniName); + } + else if (unicodes.length() > 1) + { + if (LIGATURE_UNI_NAMES.containsKey(name)) + { + path = font.getPath(LIGATURE_UNI_NAMES.get(name)); + } + } + } + } } - catch (IOException exception) + + if (path == null) { - LOG.error("Type 1 glyph rendering failed", exception); + LOG.warn("No glyph for " + code + " (" + name + ") in font " + font.getName()); + path = font.getPath(".notdef"); } - } - } - - /** - * Returns the path describing the glyph for the given name. - * - * @param name the name of the glyph - * @return the GeneralPath for the given glyph - */ - public GeneralPath getPathForGlyphName(String name) - { - return glyphs.get(name); - } - @Override - public GeneralPath getPathForCharacterCode(int code) - { - if (codeToName.containsKey(code)) - { - String name = codeToName.get(code); - return glyphs.get(name); + cache.put(code, path); + return path; } - else + catch (IOException e) { - LOG.debug(fontName + ": glyph mapping for " + code + " not found"); + LOG.error("Glyph rendering failed", e); // todo: escalate this error? + return new GeneralPath(); } - return null; } @Override public void dispose() { - if (glyphs != null) - { - glyphs.clear(); - } - if (codeToName != null) - { - codeToName.clear(); - } + cache.clear(); } } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/Matrix.java Sat Aug 30 02:26:57 2014 @@ -16,7 +16,12 @@ */ package org.apache.pdfbox.util; +import org.apache.pdfbox.cos.COSArray; +import org.apache.pdfbox.cos.COSFloat; +import org.apache.pdfbox.cos.COSNumber; + import java.awt.geom.AffineTransform; +import java.awt.geom.Point2D; /** * This class will be used for matrix manipulation. @@ -45,6 +50,35 @@ public class Matrix implements Cloneable } /** + * Constructor. + */ + public Matrix(COSArray array) + { + single = new float[DEFAULT_SINGLE.length]; + single[0] = ((COSNumber)array.get(0)).floatValue(); + single[1] = ((COSNumber)array.get(1)).floatValue(); + single[3] = ((COSNumber)array.get(2)).floatValue(); + single[4] = ((COSNumber)array.get(3)).floatValue(); + single[6] = ((COSNumber)array.get(4)).floatValue(); + single[7] = ((COSNumber)array.get(5)).floatValue(); + } + + /** + * Constructor. + */ + public Matrix(float a, float b, float c, float d, float e, float f) + { + single = new float[DEFAULT_SINGLE.length]; + single[0] = a; + single[1] = b; + single[3] = c; + single[4] = d; + single[6] = e; + single[7] = f; + single[8] = 1; + } + + /** * This method resets the numbers in this Matrix to the original values, which are * the values that a newly constructed Matrix would have. */ @@ -148,6 +182,27 @@ public class Matrix implements Cloneable } /** + * Concatenates (premultiplies) the given matrix to this matrix. + * + * @param matrix The matrix to concatenate. + */ + public void concatenate(Matrix matrix) + { + matrix.multiply(this, this); + } + + /** + * Translates this matrix by the given vector. + * + * @param vector 2D vector + */ + public void translate(Vector vector) + { + Matrix m = Matrix.getTranslatingInstance(vector.getX(), vector.getY()); + concatenate(m); + } + + /** * This will take the current matrix and multipy it with a matrix that is passed in. * * @param b The matrix to multiply by. @@ -241,6 +296,56 @@ public class Matrix implements Cloneable } /** + * Transforms the given point by this matrix. + * + * @param point point to transform + */ + public void transform(Point2D point) { + float x = (float)point.getX(); + float y = (float)point.getY(); + float a = single[0]; + float b = single[1]; + float c = single[3]; + float d = single[4]; + float e = single[6]; + float f = single[7]; + point.setLocation(x * a + y * c + e, x * b + y * d + f); + } + + /** + * Transforms the given point by this matrix. + * + * @param x x-coordinate + * @param y y-coordinate + */ + public Point2D transformPoint(double x, double y) { + float a = single[0]; + float b = single[1]; + float c = single[3]; + float d = single[4]; + float e = single[6]; + float f = single[7]; + return new Point2D.Double(x * a + y * c + e, x * b + y * d + f); + } + + /** + * Transforms the given point by this matrix. + * + * @param vector @2D vector + */ + public Vector transform(Vector vector) { + float a = single[0]; + float b = single[1]; + float c = single[3]; + float d = single[4]; + float e = single[6]; + float f = single[7]; + float x = vector.getX(); + float y = vector.getY(); + return new Vector(x * a + y * c + e, x * b + y * d + f); + } + + /** * Create a new matrix with just the scaling operators. * * @return A new matrix with just the scaling operators. @@ -305,6 +410,19 @@ public class Matrix implements Cloneable } /** + * Produces a copy of the first matrix, with the second matrix concatenated. + * + * @param a The matrix to copy. + * @param b The matrix to concatenate. + */ + public static Matrix concatenate(Matrix a, Matrix b) + { + Matrix copy = a.clone(); + copy.concatenate(b); + return copy; + } + + /** * Clones this object. * @return cloned matrix as an object. */ @@ -403,4 +521,19 @@ public class Matrix implements Cloneable { return single[7]; } + + /** + * Returns a COS array which represnets this matrix. + */ + public COSArray toCOSArray() + { + COSArray array = new COSArray(); + array.add(new COSFloat(0)); + array.add(new COSFloat(1)); + array.add(new COSFloat(3)); + array.add(new COSFloat(4)); + array.add(new COSFloat(6)); + array.add(new COSFloat(7)); + return array; + } } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Sat Aug 30 02:26:57 2014 @@ -18,7 +18,9 @@ package org.apache.pdfbox.util; import java.awt.geom.GeneralPath; import java.awt.geom.Point2D; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; @@ -38,15 +40,16 @@ import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDResources; -import org.apache.pdfbox.pdmodel.common.PDMatrix; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFontFactory; +import org.apache.pdfbox.pdmodel.font.PDType0Font; import org.apache.pdfbox.pdmodel.font.PDType3Font; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState; import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.state.PDTextState; import org.apache.pdfbox.util.operator.Operator; import org.apache.pdfbox.util.operator.OperatorProcessor; @@ -255,9 +258,9 @@ public class PDFStreamEngine while (iter.hasNext()) { Object next = iter.next(); - if (LOG.isDebugEnabled()) + if (LOG.isTraceEnabled()) { - LOG.debug("processing substream token: " + next); + LOG.trace("processing substream token: " + next); } if (next instanceof COSObject) { @@ -310,7 +313,7 @@ public class PDFStreamEngine */ public void showText(byte[] string) throws IOException { - processText(string); + showText(string, 0); } /** @@ -322,187 +325,109 @@ public class PDFStreamEngine */ public void showAdjustedText(List strings, List adjustments) throws IOException { - float fontsize = getGraphicsState().getTextState().getFontSize(); - float horizontalScaling = getGraphicsState().getTextState().getHorizontalScaling() / 100; for (int i = 0, len = strings.size(); i < len; i++) { - float adjustment = adjustments.get(i); - Matrix adjMatrix = new Matrix(); - adjustment =- (adjustment / 1000) * horizontalScaling * fontsize; - // TODO vertical writing mode - adjMatrix.setValue( 2, 0, adjustment ); - showAdjustedTextRun(strings.get(i), adjMatrix); + showText(strings.get(i), adjustments.get(i)); } } /** - * Called when a single run of text with a spacing adjustment is to be shown. - * - * @param string the encoded text - * @param adjustment spacing adjustment to apply before showing the string - * @throws IOException if there was an error showing the text - */ - protected void showAdjustedTextRun(byte[] string, Matrix adjustment) throws IOException - { - setTextMatrix(adjustment.multiply(getTextMatrix(), adjustment)); - processText(string); - } - - /** * Process text from the PDF Stream. You should override this method if you want to * perform an action when encoded text is being processed. - * + * * @param string the encoded text + * @param adjustment a position adjustment from a TJ array to be applied after the glyph * @throws IOException if there is an error processing the string */ - protected void processText(byte[] string) throws IOException + protected void showText(byte[] string, float adjustment) throws IOException { - // Note on variable names. There are three different units being used in this code. - // Character sizes are given in glyph units, text locations are initially given in text - // units, and we want to save the data in display units. The variable names should end with - // Text or Disp to represent if the values are in text or disp units (no glyph units are - // saved). - - PDGraphicsState graphicsState = getGraphicsState(); - - final float fontSizeText = graphicsState.getTextState().getFontSize(); - final float horizontalScalingText = graphicsState.getTextState().getHorizontalScaling() / 100f; - final float riseText = graphicsState.getTextState().getRise(); - final float wordSpacingText = graphicsState.getTextState().getWordSpacing(); - final float characterSpacingText = graphicsState.getTextState().getCharacterSpacing(); - - // We won't know the actual number of characters until - // we process the byte data(could be two bytes each) but - // it won't ever be more than string.length*2(there are some cases - // were a single byte will result in two output characters "fi" + PDGraphicsState state = getGraphicsState(); + PDTextState textState = state.getTextState(); - PDFont font = graphicsState.getTextState().getFont(); + // get the current font + PDFont font = textState.getFont(); if (font == null) { - LOG.warn("font is undefined, creating default font"); + LOG.warn("No current font, will use default"); font = PDFontFactory.createDefaultFont(); } - // all fonts have the width/height of a character in thousandths of a unit of text space - float fontMatrixXScaling = 1 / 1000f; - float fontMatrixYScaling = 1 / 1000f; - // expect Type3 fonts, those are providing the width of a character in glyph space units - if (font instanceof PDType3Font) - { - PDMatrix fontMatrix = font.getFontMatrix(); - fontMatrixXScaling = fontMatrix.getValue(0, 0); - fontMatrixYScaling = fontMatrix.getValue(1, 1); - } - - float maxVerticalDisplacementText = 0; - - Matrix textStateParameters = new Matrix(); - textStateParameters.setValue(0, 0, fontSizeText * horizontalScalingText); - textStateParameters.setValue(1, 1, fontSizeText); - textStateParameters.setValue(2, 1, riseText); - Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); - Matrix textXctm = new Matrix(); - Matrix textMatrixEnd = new Matrix(); - Matrix td = new Matrix(); - Matrix tempMatrix = new Matrix(); - - int codeLength; - for (int i = 0; i < string.length; i += codeLength) - { - // Decode the value to a Unicode character - codeLength = 1; - String unicode = font.encode(string, i, codeLength); - int[] charCodes; - if (unicode == null && i + 1 < string.length) - { - // maybe a multibyte encoding - codeLength++; - unicode = font.encode(string, i, codeLength); - charCodes = new int[] { font.getCodeFromArray(string, i, codeLength) }; - } - else + float fontSize = textState.getFontSize(); + float horizontalScaling = textState.getHorizontalScaling() / 100f; + float charSpacing = textState.getCharacterSpacing(); + + // put the text state parameters into matrix form + Matrix parameters = new Matrix( + fontSize * horizontalScaling, 0, // 0 + 0, fontSize, // 0 + 0, textState.getRise()); // 1 + + // read the stream until it is empty + InputStream in = new ByteArrayInputStream(string); + while (in.available() > 0) + { + // decode a character + int before = in.available(); + int code = font.readCode(in); + int codeLength = before - in.available(); + String unicode = font.toUnicode(code); + + // Word spacing shall be applied to every occurrence of the single-byte character code + // 32 in a string when using a simple font or a composite font that defines code 32 as + // a single-byte code. + float wordSpacing = 0; + if (codeLength == 1) { - charCodes = new int[] { font.getCodeFromArray(string, i, codeLength) }; + if (code == 32) + { + wordSpacing += textState.getWordSpacing(); + } } - // TODO: handle horizontal displacement - // get the width and height of this character in text units - float charHorizontalDisplacementText = font.getFontWidth(string, i, codeLength); - float charVerticalDisplacementText = font.getFontHeight(string, i, codeLength); - - // multiply the width/height with the scaling factor - charHorizontalDisplacementText = charHorizontalDisplacementText * fontMatrixXScaling; - charVerticalDisplacementText = charVerticalDisplacementText * fontMatrixYScaling; - - maxVerticalDisplacementText = Math.max(maxVerticalDisplacementText, - charVerticalDisplacementText); - - // PDF Spec - 5.5.2 Word Spacing - // - // Word spacing works the same was as character spacing, but applies - // only to the space character, code 32. - // - // Note: Word spacing is applied to every occurrence of the single-byte - // character code 32 in a string. This can occur when using a simple - // font or a composite font that defines code 32 as a single-byte code. - // It does not apply to occurrences of the byte value 32 in multiple-byte - // codes. - // - // RDD - My interpretation of this is that only character code 32's that - // encode to spaces should have word spacing applied. Cases have been - // observed where a font has a space character with a character code - // other than 32, and where word spacing (Tw) was used. In these cases, - // applying word spacing to either the non-32 space or to the character - // code 32 non-space resulted in errors consistent with this interpretation. - // - float spacingText = 0; - if (string[i] == 0x20 && codeLength == 1) - { - spacingText += wordSpacingText; - } - textMatrix.multiply(ctm, textXctm); - // Convert textMatrix to display units - // We need to instantiate a new Matrix instance here as it is passed to the TextPosition - // constructor below - Matrix textMatrixStart = textStateParameters.multiply(textXctm); - - // TODO: tx should be set for horizontal text and ty for vertical text - // which seems to be specified in the font (not the direction in the matrix). - float tx = charHorizontalDisplacementText * fontSizeText * horizontalScalingText; - float ty = 0; - // reset the matrix instead of creating a new one - td.reset(); - td.setValue(2, 0, tx); - td.setValue(2, 1, ty); - - // The text matrix gets updated after each glyph is placed. The updated - // version will have the X and Y coordinates for the next glyph. - // textMatrixEnd contains the coordinates of the end of the last glyph without - // taking characterSpacingText and spacintText into account, otherwise it'll be - // impossible to detect new words within text extraction - textStateParameters.multiply(td, tempMatrix); - tempMatrix.multiply(textXctm, textMatrixEnd); - final float endXPosition = textMatrixEnd.getXPosition(); - final float endYPosition = textMatrixEnd.getYPosition(); - - // add some spacing to the text matrix (see comment above) - tx = (charHorizontalDisplacementText * fontSizeText + characterSpacingText + - spacingText) * horizontalScalingText; - td.setValue(2, 0, tx); - td.multiply(textMatrix, textMatrix); - - // determine the width of this character - // XXX: Note that if we handled vertical text, we should be using Y here - float startXPosition = textMatrixStart.getXPosition(); - float widthText = endXPosition - startXPosition; + // text rendering matrix (text space -> device space) + Matrix ctm = state.getCurrentTransformationMatrix(); + Matrix textRenderingMatrix = parameters.multiply(textMatrix).multiply(ctm); + + // get glyph's position vector if this is vertical text + // changes to vertical text should be tested with PDFBOX-2294 and PDFBOX-1422 + if (font.isVertical()) + { + // position vector, in text space + Vector v = font.getPositionVector(code); + + // apply the position vector to the horizontal origin to get the vertical origin + textRenderingMatrix.translate(v); + } - float totalVerticalDisplacementDisp = maxVerticalDisplacementText * fontSizeText * - textXctm.getYScale(); + // get glyph's horizontal and vertical displacements, in text space + Vector w = font.getDisplacement(code); // process the decoded glyph - processGlyph(textMatrixStart, new Point2D.Float(endXPosition, endYPosition), - totalVerticalDisplacementDisp, widthText, unicode, charCodes, - font, fontSizeText); + showGlyph(textRenderingMatrix, font, code, unicode, w); + + // TJ adjustment after final glyph + float tj = 0; + if (in.available() == 0) + { + tj = adjustment; + } + + // calculate the combined displacements + float tx, ty; + if (font.isVertical()) + { + tx = 0; + ty = (w.getY() - tj / 1000) * fontSize + charSpacing + wordSpacing; + } + else + { + tx = ((w.getX() - tj / 1000) * fontSize + charSpacing + wordSpacing) * + horizontalScaling; + ty = 0; + } + + // update the text matrix + textMatrix.concatenate(Matrix.getTranslatingInstance(tx, ty)); } } @@ -510,19 +435,15 @@ public class PDFStreamEngine * Called when a glyph is to be processed.This method is intended for overriding in subclasses, * the default implementation does nothing. * - * @param textMatrix the text matrix at the start of the glyph - * @param end the end position of the glyph in text space - * @param maxHeight the height of the glyph in device space - * @param widthText the width of the glyph in text space - * @param unicode the Unicode text for this glyph, or null. May be meaningless. - * @param charCodes array of internal PDF character codes for the glyph todo: should be 1 code? + * @param textRenderingMatrix the current text rendering matrix, Trm * @param font the current font - * @param fontSize font size in text space + * @param code internal PDF character code for the glyph + * @param unicode the Unicode text for this glyph, or null if the PDF does provide it + * @param displacement the displacement (i.e. advance) of the glyph in text space * @throws IOException if the glyph cannot be processed */ - protected void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight, - float widthText, String unicode, int[] charCodes, PDFont font, - float fontSize) throws IOException + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, + Vector displacement) throws IOException { // overridden in subclasses } @@ -567,7 +488,10 @@ public class PDFStreamEngine { if (!unsupportedOperators.contains(operation)) { - LOG.info("unsupported/disabled operation: " + operation); + if (LOG.isDebugEnabled()) + { + LOG.debug("unsupported/disabled operation: " + operation); + } unsupportedOperators.add(operation); } } @@ -683,10 +607,10 @@ public class PDFStreamEngine } /** - * use the current transformation matrix to transform a single point. + * use the current transformation matrix to transformPoint a single point. * - * @param x x-coordinate of the point to be transform - * @param y y-coordinate of the point to be transform + * @param x x-coordinate of the point to be transformPoint + * @param y y-coordinate of the point to be transformPoint * @return the transformed coordinates as Point2D.Double */ public Point2D.Double transformedPoint(double x, double y) @@ -698,9 +622,9 @@ public class PDFStreamEngine } /** - * use the current transformation matrix to transform a PDRectangle. + * use the current transformation matrix to transformPoint a PDRectangle. * - * @param rect the PDRectangle to transform + * @param rect the PDRectangle to transformPoint * @return the transformed coordinates as a GeneralPath */ public GeneralPath transformedPDRectanglePath(PDRectangle rect) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStreamEngine.java Sat Aug 30 02:26:57 2014 @@ -22,10 +22,11 @@ import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDSimpleFont; import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState; import org.apache.pdfbox.text.TextPosition; -import java.awt.geom.Point2D; import java.io.IOException; import java.util.Properties; @@ -80,11 +81,46 @@ public class PDFTextStreamEngine extends * This method was originally written by Ben Litchfield for PDFStreamEngine. */ @Override - protected final void processGlyph(Matrix textMatrix, Point2D.Float end, float maxHeight, - float widthText, String unicode, - int[] charCodes, PDFont font, float fontSize) - throws IOException + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, + Vector displacement) throws IOException { + // + // legacy calculations which were previously in PDFStreamEngine + // + + PDGraphicsState state = getGraphicsState(); + Matrix ctm = state.getCurrentTransformationMatrix(); + float fontSize = state.getTextState().getFontSize(); + float horizontalScaling = state.getTextState().getHorizontalScaling() / 100f; + Matrix textMatrix = getTextMatrix(); + + // 1/2 the bbox is used as the height todo: why? + float glyphHeight = font.getBoundingBox().getHeight() / 2; + + // transformPoint from glyph space -> text space + float height = (float)font.getFontMatrix().transformPoint(0, glyphHeight).getY(); + + // (modified) combined displacement, this is calculated *without* taking the character + // spacing and word spacing into account, due to legacy code in TextStripper + float tx = displacement.getX() * fontSize * horizontalScaling; + float ty = 0; // todo: support vertical writing mode + + // (modified) combined displacement matrix + Matrix td = Matrix.getTranslatingInstance(tx, ty); + + // (modified) text rendering matrix + Matrix nextTextRenderingMatrix = td.multiply(textMatrix).multiply(ctm); // text space -> device space + float nextX = nextTextRenderingMatrix.getXPosition(); + float nextY = nextTextRenderingMatrix.getYPosition(); + + // (modified) width and height calculations + float dxDisplay = nextX - textRenderingMatrix.getXPosition(); + float dyDisplay = height * textRenderingMatrix.getYScale(); + + // + // start of the original method + // + // Note on variable names. There are three different units being used in this code. // Character sizes are given in glyph units, text locations are initially given in text // units, and we want to save the data in display units. The variable names should end with @@ -93,7 +129,7 @@ public class PDFTextStreamEngine extends float fontSizeText = getGraphicsState().getTextState().getFontSize(); float horizontalScalingText = getGraphicsState().getTextState().getHorizontalScaling()/100f; - Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); + //Matrix ctm = getGraphicsState().getCurrentTransformationMatrix(); float glyphSpaceToTextSpaceFactor = 1 / 1000f; if (font instanceof PDType3Font) @@ -126,19 +162,32 @@ public class PDFTextStreamEngine extends } // the space width has to be transformed into display units - float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText * - textMatrix.getXScale() * ctm.getXScale(); + float spaceWidthDisplay = spaceWidthText * fontSizeText * horizontalScalingText * + textRenderingMatrix.getXScale() * ctm.getXScale(); - // PDFBOX-373: Replace a null entry with "?" so it is not printed as "(null)" + // when there is no Unicode mapping available, Acrobat simply coerces the character code + // into Unicode, so we do the same. Subclasses of PDFStreamEngine don't necessarily want + // this, which is why we leave it until this point in PDFTextStreamEngine. if (unicode == null) { - unicode = "?"; + if (font instanceof PDSimpleFont) + { + char c = (char) code; + unicode = new String(new char[] { c }); + } + else + { + // Acrobat doesn't seem to coerce composite font's character codes, instead it + // skips them. See the "allah2.pdf" TestTextStripper file. + return; + } } processTextPosition(new TextPosition(pageRotation, pageSize.getWidth(), - pageSize.getHeight(), textMatrix, end.x, end.y, maxHeight, widthText, - spaceWidthDisp, unicode, charCodes, font, fontSize, - (int)(fontSize * textMatrix.getXScale()))); + pageSize.getHeight(), textRenderingMatrix, nextX, nextY, + dyDisplay, dxDisplay, + spaceWidthDisplay, unicode, new int[] { code } , font, fontSize, + (int)(fontSize * textRenderingMatrix.getXScale()))); } /** Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/text/ShowTextGlyph.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/text/ShowTextGlyph.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/text/ShowTextGlyph.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/operator/text/ShowTextGlyph.java Sat Aug 30 02:26:57 2014 @@ -41,24 +41,24 @@ public class ShowTextGlyph extends Opera List adjustments = new ArrayList(); List strings = new ArrayList(); - boolean lastWasAdjustment = false; + boolean lastWasString = false; for(int i = 0, len = array.size(); i < len; i++) { COSBase next = array.get(i); - if(next instanceof COSNumber) + if (next instanceof COSNumber) { adjustments.add(((COSNumber)next).floatValue()); - lastWasAdjustment = true; + lastWasString = false; } else if(next instanceof COSString) { - if (!lastWasAdjustment) + if (lastWasString) { - adjustments.add(0f); + adjustments.add(0f); // adjustment for previous string } strings.add(((COSString)next).getBytes()); - lastWasAdjustment = false; + lastWasString = true; } else { @@ -66,6 +66,12 @@ public class ShowTextGlyph extends Opera } } + // adjustment for final string + if (lastWasString) + { + adjustments.add(0f); + } + context.showAdjustedText(strings, adjustments); } } Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/ParallelParameterized.java Sat Aug 30 02:26:57 2014 @@ -31,7 +31,7 @@ import java.util.concurrent.TimeUnit; */ public class ParallelParameterized extends Parameterized { - static final long TIMEOUT_SECS = 30; + static final long TIMEOUT_SECS = 120; private static class FixedThreadPoolScheduler implements RunnerScheduler { Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encoding/PDFDocEncodingCharsetTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encoding/PDFDocEncodingCharsetTest.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encoding/PDFDocEncodingCharsetTest.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encoding/PDFDocEncodingCharsetTest.java Sat Aug 30 02:26:57 2014 @@ -36,12 +36,9 @@ public class PDFDocEncodingCharsetTest e */ public void testEncoding() throws UnsupportedEncodingException { - //TODO Use when switching to JavaSE-1.6 - //Charset charset = PDFDocEncodingCharset.INSTANCE; - //Check basic round-trip String text = "Test \u20AC$£ ;-) Gr\u00FCezi\u2026"; - byte[] encoded = text.getBytes(PDFDocEncodingCharset.NAME); + byte[] encoded = text.getBytes(PDFDocEncodingCharset.INSTANCE); int[] expected = new int[] { 0x54, 0x65, 0x73, 0x74, 0x20, //Test 0xA0, 0x24, 0xA3, 0x20, //Currency @@ -50,16 +47,16 @@ public class PDFDocEncodingCharsetTest e 0x83 //ellipsis }; compareEncoded(encoded, expected); - String decoded = new String(encoded, PDFDocEncodingCharset.NAME); + String decoded = new String(encoded, PDFDocEncodingCharset.INSTANCE); assertEquals(text, decoded); text = "Bad\u03C0\u2023char"; expected = new int[] { 0x42, 0x61, 0x64, 0x3F, 0x3F, 0x63, 0x68, 0x61, 0x72 //unencodable characters as '?' }; - encoded = text.getBytes(PDFDocEncodingCharset.NAME); + encoded = text.getBytes(PDFDocEncodingCharset.INSTANCE); compareEncoded(encoded, expected); - decoded = new String(encoded, PDFDocEncodingCharset.NAME); + decoded = new String(encoded, PDFDocEncodingCharset.INSTANCE); assertEquals("Bad??char", decoded); } Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/font/TestTTFParser.java Sat Aug 30 02:26:57 2014 @@ -18,13 +18,14 @@ package org.apache.pdfbox.pdmodel.font; import java.io.IOException; import java.io.InputStream; -import org.apache.fontbox.ttf.CMAPEncodingEntry; -import org.apache.fontbox.ttf.CMAPTable; +import org.apache.fontbox.ttf.CmapSubtable; +import org.apache.fontbox.ttf.CmapTable; import org.apache.fontbox.ttf.NameRecord; import org.apache.fontbox.ttf.PostScriptTable; import org.apache.fontbox.ttf.TTFParser; import org.apache.fontbox.ttf.TrueTypeFont; import org.apache.pdfbox.encoding.Encoding; +import org.apache.pdfbox.encoding.GlyphList; import org.apache.pdfbox.encoding.WinAnsiEncoding; import org.junit.Assert; import org.junit.Test; @@ -51,18 +52,18 @@ public class TestTTFParser TrueTypeFont arial = parser.parseTTF(arialIs); - CMAPTable cmap = arial.getCMAP(); + CmapTable cmap = arial.getCmap(); Assert.assertNotNull(cmap); - CMAPEncodingEntry[] cmaps = cmap.getCmaps(); + CmapSubtable[] cmaps = cmap.getCmaps(); Assert.assertNotNull(cmaps); - CMAPEncodingEntry uc = null; + CmapSubtable uc = null; - for (CMAPEncodingEntry e : cmaps) + for (CmapSubtable e : cmaps) { if (e.getPlatformId() == NameRecord.PLATFORM_WINDOWS - && e.getPlatformEncodingId() == NameRecord.PLATFORM_ENCODING_WINDOWS_UNICODE) + && e.getPlatformEncodingId() == NameRecord.ENCODING_WINDOWS_UNICODE_BMP) { uc = e; break; @@ -93,27 +94,27 @@ public class TestTTFParser || "product".equals(name) || "integral".equals(name) || "Omega".equals(name) || "radical".equals(name) || "tilde".equals(name)) { - Assert.assertTrue(enc.getNameForCharacter((char) charCode).startsWith(name)); + Assert.assertTrue(GlyphList.unicodeToName((char) charCode).startsWith(name)); } else if ("bar".equals(name)) { - Assert.assertTrue(enc.getNameForCharacter((char) charCode).endsWith(name)); + Assert.assertTrue(GlyphList.unicodeToName((char) charCode).endsWith(name)); } else if ("sfthyphen".equals(name)) { - Assert.assertEquals("softhyphen", enc.getNameForCharacter((char) charCode)); + Assert.assertEquals("softhyphen", GlyphList.unicodeToName((char) charCode)); } - else if ("periodcentered".equals(name) && !enc.getNameForCharacter((char) charCode).equals(name)) + else if ("periodcentered".equals(name) && !GlyphList.unicodeToName((char) charCode).equals(name)) { - Assert.assertEquals("bulletoperator", enc.getNameForCharacter((char) charCode)); + Assert.assertEquals("bulletoperator", GlyphList.unicodeToName((char) charCode)); } else if ("fraction".equals(name)) { - Assert.assertEquals("divisionslash", enc.getNameForCharacter((char) charCode)); + Assert.assertEquals("divisionslash", GlyphList.unicodeToName((char) charCode)); } else if ("mu".equals(name)) { - Assert.assertEquals("mu1", enc.getNameForCharacter((char) charCode)); + Assert.assertEquals("mu1", GlyphList.unicodeToName((char) charCode)); } else if ("pi".equals(name)) { @@ -121,7 +122,7 @@ public class TestTTFParser } else { - Assert.assertEquals(enc.getNameForCharacter((char) charCode), name); + Assert.assertEquals(GlyphList.unicodeToName((char) charCode), name); } } } Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestTextStripper.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestTextStripper.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestTextStripper.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/util/TestTextStripper.java Sat Aug 30 02:26:57 2014 @@ -228,6 +228,7 @@ public class TestTextStripper extends Te } } + //System.out.println(" " + inFile + (bSort ? " (sorted)" : "")); PDDocument document = PDDocument.load(inFile); try { @@ -299,7 +300,7 @@ public class TestTextStripper extends Te { this.bFail = true; fail("FAILURE: Line mismatch for file " + inFile.getName() + - " ( sort = "+bSort+")" + + " (sort = "+bSort+")" + " at expected line: " + expectedReader.getLineNumber() + " at actual line: " + actualReader.getLineNumber() + "\nexpected line was: \"" + expectedLine + "\"" + Modified: pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== Binary files - no diff available. Modified: pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf.txt URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf.txt?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== Binary files - no diff available. Modified: pdfbox/trunk/pdfbox/src/test/resources/logging.properties URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/logging.properties?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/test/resources/logging.properties (original) +++ pdfbox/trunk/pdfbox/src/test/resources/logging.properties Sat Aug 30 02:26:57 2014 @@ -13,8 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -handlers=java.util.logging.FileHandler +handlers=java.util.logging.FileHandler java.util.logging.ConsoleHandler .level=INFO +java.util.logging.ConsoleHandler.level = SEVERE + +java.util.logging.FileHandler.level = FINE java.util.logging.FileHandler.pattern = target/pdfbox.log java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java (original) +++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/content/PreflightContentStream.java Sat Aug 30 02:26:57 2014 @@ -26,7 +26,9 @@ import static org.apache.pdfbox.prefligh import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CONTENT_STREAM_INVALID_ARGUMENT; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CONTENT_STREAM_UNSUPPORTED_OP; +import java.io.ByteArrayInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.List; import org.apache.pdfbox.cos.COSArray; @@ -40,7 +42,6 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.font.PDFont; -import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.state.PDTextState; import org.apache.pdfbox.pdmodel.graphics.state.RenderingMode; @@ -50,7 +51,6 @@ import org.apache.pdfbox.preflight.excep import org.apache.pdfbox.preflight.font.container.FontContainer; import org.apache.pdfbox.preflight.font.util.GlyphException; import org.apache.pdfbox.util.operator.Operator; -import org.apache.pdfbox.util.operator.Operator; import org.apache.pdfbox.util.operator.OperatorProcessor; public class PreflightContentStream extends PreflightStreamEngine @@ -346,23 +346,13 @@ public class PreflightContentStream exte return; } - int codeLength = 1; - for (int i = 0; i < string.length; i += codeLength) + InputStream in = new ByteArrayInputStream(string); + while (in.available() > 0) { - // explore the string to detect character code (length can be 1 or 2 bytes) - int cid = -1; - codeLength = 1; try { - // according to the encoding, extract the character identifier - cid = font.encodeToCID(string, i, codeLength); - if (cid == -1 && i + 1 < string.length) - { - // maybe a multibyte encoding - codeLength++; - cid = font.encodeToCID(string, i, codeLength); - } - fontContainer.checkGlyphWith(cid); + int code = font.readCode(in); + fontContainer.checkGlyphWidth(code); } catch (IOException e) {