pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From til...@apache.org
Subject svn commit: r1600968 - /pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
Date Fri, 06 Jun 2014 17:49:14 GMT
Author: tilman
Date: Fri Jun  6 17:49:14 2014
New Revision: 1600968

URL: http://svn.apache.org/r1600968
Log:
PDFBOX-2101: save RGB and Gray JPEG images directly, as suggested by Jeremias Maerki

Modified:
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1600968&r1=1600967&r2=1600968&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Fri Jun  6
17:49:14 2014
@@ -20,11 +20,16 @@ import java.awt.image.BufferedImage;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.HashSet;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.io.IOUtils;
 
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -35,6 +40,8 @@ import org.apache.pdfbox.pdmodel.encrypt
 import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
 import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
 import org.apache.pdfbox.pdmodel.graphics.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
 import org.apache.pdfbox.pdmodel.graphics.image.TIFFInputStream;
 import org.apache.pdfbox.util.ImageIOUtil;
 
@@ -56,6 +63,14 @@ public class ExtractImages
     private static final String ADDKEY = "-addkey";
     private static final String NONSEQ = "-nonSeq";
 
+    private static final List<String> DCT_FILTERS = new ArrayList<String>();
+
+    static
+    {
+        DCT_FILTERS.add( COSName.DCT_DECODE.getName() );
+        DCT_FILTERS.add( COSName.DCT_DECODE_ABBREVIATION.getName() );
+    }
+
     private ExtractImages()
     {
     }
@@ -230,6 +245,15 @@ public class ExtractImages
         }
         resources.clear();
     }
+    
+    // get and write the unmodified JPEG stream
+    private void writeJpeg2OutputStream(PDImageXObject ximage, OutputStream out)
+            throws IOException
+    {
+        InputStream data = ximage.getPDStream().getPartiallyFilteredStream(DCT_FILTERS);
       
+        IOUtils.copy(data, out);
+        IOUtils.closeQuietly(data);
+    }
 
     /**
      * Writes the image to a file with the filename + an appropriate suffix, like "Image.jpg".
@@ -239,6 +263,13 @@ public class ExtractImages
      */
     private void write2file(PDImageXObject xobj, String filename) throws IOException
     {
+        if (xobj.getSuffix() == null || xobj.getSuffix().isEmpty())
+        {
+            System.err.println ("image has no suffix, skipped");
+            System.err.println ("filter(s): " + xobj.getCOSStream().getFilters());
+            return;
+        }
+
         FileOutputStream out = null;
         try
         {
@@ -250,7 +281,25 @@ public class ExtractImages
                 {
                     TIFFInputStream.writeToOutputStream(xobj, out);
                 }
-                else
+                else if ("jpg".equals(xobj.getSuffix()))
+                {
+                    String colorSpaceName = xobj.getColorSpace().getName();
+                    if (PDDeviceGray.INSTANCE.getName().equals(colorSpaceName) ||
+                            PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName))
+                    {
+                        // RGB and Gray colorspace:
+                        // get and write the unmodified JPEG stream
+                        writeJpeg2OutputStream(xobj, out);
+                    }
+                    else
+                    {
+                        // CMYK and other "unusual" colorspaces
+                        // create BufferedImage with correct colors and then save into a

+                        // JPEG (some quality loss)
+                        ImageIOUtil.writeImage(xobj.getImage(), xobj.getSuffix(), out);
+                    }
+                }
+                else 
                 {
                     ImageIOUtil.writeImage(image, xobj.getSuffix(), out);
                 }



Mime
View raw message