Return-Path: X-Original-To: apmail-pdfbox-commits-archive@www.apache.org Delivered-To: apmail-pdfbox-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 4B8BA11324 for ; Fri, 6 Jun 2014 17:49:36 +0000 (UTC) Received: (qmail 63974 invoked by uid 500); 6 Jun 2014 17:49:36 -0000 Delivered-To: apmail-pdfbox-commits-archive@pdfbox.apache.org Received: (qmail 63927 invoked by uid 500); 6 Jun 2014 17:49:36 -0000 Mailing-List: contact commits-help@pdfbox.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pdfbox.apache.org Delivered-To: mailing list commits@pdfbox.apache.org Received: (qmail 63920 invoked by uid 99); 6 Jun 2014 17:49:36 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Jun 2014 17:49:36 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 06 Jun 2014 17:49:34 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 9E3FC2388860; Fri, 6 Jun 2014 17:49:14 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1600968 - /pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Date: Fri, 06 Jun 2014 17:49:14 -0000 To: commits@pdfbox.apache.org From: tilman@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140606174914.9E3FC2388860@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: tilman Date: Fri Jun 6 17:49:14 2014 New Revision: 1600968 URL: http://svn.apache.org/r1600968 Log: PDFBOX-2101: save RGB and Gray JPEG images directly, as suggested by Jeremias Maerki Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1600968&r1=1600967&r2=1600968&view=diff ============================================================================== --- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original) +++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Fri Jun 6 17:49:14 2014 @@ -20,11 +20,16 @@ import java.awt.image.BufferedImage; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.HashSet; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.pdmodel.PDDocument; @@ -35,6 +40,8 @@ import org.apache.pdfbox.pdmodel.encrypt import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.pdmodel.graphics.PDXObject; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.graphics.image.TIFFInputStream; import org.apache.pdfbox.util.ImageIOUtil; @@ -56,6 +63,14 @@ public class ExtractImages private static final String ADDKEY = "-addkey"; private static final String NONSEQ = "-nonSeq"; + private static final List DCT_FILTERS = new ArrayList(); + + static + { + DCT_FILTERS.add( COSName.DCT_DECODE.getName() ); + DCT_FILTERS.add( COSName.DCT_DECODE_ABBREVIATION.getName() ); + } + private ExtractImages() { } @@ -230,6 +245,15 @@ public class ExtractImages } resources.clear(); } + + // get and write the unmodified JPEG stream + private void writeJpeg2OutputStream(PDImageXObject ximage, OutputStream out) + throws IOException + { + InputStream data = ximage.getPDStream().getPartiallyFilteredStream(DCT_FILTERS); + IOUtils.copy(data, out); + IOUtils.closeQuietly(data); + } /** * Writes the image to a file with the filename + an appropriate suffix, like "Image.jpg". @@ -239,6 +263,13 @@ public class ExtractImages */ private void write2file(PDImageXObject xobj, String filename) throws IOException { + if (xobj.getSuffix() == null || xobj.getSuffix().isEmpty()) + { + System.err.println ("image has no suffix, skipped"); + System.err.println ("filter(s): " + xobj.getCOSStream().getFilters()); + return; + } + FileOutputStream out = null; try { @@ -250,7 +281,25 @@ public class ExtractImages { TIFFInputStream.writeToOutputStream(xobj, out); } - else + else if ("jpg".equals(xobj.getSuffix())) + { + String colorSpaceName = xobj.getColorSpace().getName(); + if (PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) || + PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName)) + { + // RGB and Gray colorspace: + // get and write the unmodified JPEG stream + writeJpeg2OutputStream(xobj, out); + } + else + { + // CMYK and other "unusual" colorspaces + // create BufferedImage with correct colors and then save into a + // JPEG (some quality loss) + ImageIOUtil.writeImage(xobj.getImage(), xobj.getSuffix(), out); + } + } + else { ImageIOUtil.writeImage(image, xobj.getSuffix(), out); }