pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From msahy...@apache.org
Subject svn commit: r1829154 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java
Date Sat, 14 Apr 2018 17:20:11 GMT
Author: msahyoun
Date: Sat Apr 14 17:20:11 2018
New Revision: 1829154

URL: http://svn.apache.org/viewvc?rev=1829154&view=rev
Log:
PDFBOX-4182, PDFBOX-4188: add new merge mode which closes the source PDDocument after the
individual merge; early implementation

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java?rev=1829154&r1=1829153&r2=1829154&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java Sat
Apr 14 17:20:11 2018
@@ -87,9 +87,32 @@ public class PDFMergerUtility
     private boolean ignoreAcroFormErrors = false;
     private PDDocumentInformation destinationDocumentInformation = null;
     private PDMetadata destinationMetadata = null;
+
+    private DocumentMergeMode documentMergeMode = DocumentMergeMode.PDFBOX_LEGACY_MODE;
     private AcroFormMergeMode acroFormMergeMode = AcroFormMergeMode.PDFBOX_LEGACY_MODE;
     
     /**
+     * The mode to use when merging documents.
+     * 
+     * <p><ul>
+     * <li>{@link DocumentMergeMode#OPTIMIZE_RESOURCES_MODE} Optimizes resource handling
such as
+     *      closing documents early. <strong>Not all document elements are merged</strong>
compared to
+     *      the PDFBOX_LEGACY_MODE. Currently supported are:
+     *      <ul>
+     *          <li>Page content and resources
+     *      </ul>  
+     * <li>{@link DocumentMergeMode#PDFBOX_LEGACY_MODE} fields with the same fully
qualified name
+     *      will be renamed and treated as independent. This mode was used in versions
+     *      of PDFBox up to 2.x.
+     * </ul>
+     */
+    public enum DocumentMergeMode
+    {
+        OPTIMIZE_RESOURCES_MODE,
+        PDFBOX_LEGACY_MODE
+    }
+    
+    /**
      * The mode to use when merging AcroForm between documents.
      * 
      * <p><ul>
@@ -115,6 +138,26 @@ public class PDFMergerUtility
         sources = new ArrayList<>();
         fileInputStreams = new ArrayList<>();
     }
+    
+    /**
+     * Get the mode to be used for merging the documents
+     * 
+     * {@link DocumentMergeMode}
+     */
+    public DocumentMergeMode getDocumentMergeMode()
+    {
+        return documentMergeMode;
+    }
+    
+    /**
+     * Set the mode to be used for merging the documents
+     * 
+     * {@link DocumentMergeMode}
+     */
+    public void setAcroFormMergeMode(DocumentMergeMode theDocumentMergeMode)
+    {
+        this.documentMergeMode = theDocumentMergeMode;
+    }
 
     /**
      * Get the name of the destination file.
@@ -270,6 +313,71 @@ public class PDFMergerUtility
      */
     public void mergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException
     {
+        if (documentMergeMode == DocumentMergeMode.PDFBOX_LEGACY_MODE)
+        {
+            legacyMergeDocuments(memUsageSetting);
+        }
+        else if (documentMergeMode == DocumentMergeMode.OPTIMIZE_RESOURCES_MODE)
+        {
+            optimizedMergeDocuments(memUsageSetting, sources);
+        }
+    }
+    
+    private void optimizedMergeDocuments(MemoryUsageSetting memUsageSetting, List<InputStream>
sourceDocuments) throws IOException
+    {
+        try (PDDocument destination = new PDDocument(memUsageSetting))
+        {
+            PDFCloneUtility cloner = new PDFCloneUtility(destination);
+            for (InputStream sourceInputStream : sources)
+            {
+                try (PDDocument sourceDoc = PDDocument.load(sourceInputStream, memUsageSetting))
+                {
+                    for (PDPage page : sourceDoc.getPages())
+                    {
+                        PDPage newPage = new PDPage((COSDictionary) cloner.cloneForNewDocument(page.getCOSObject()));
+                        newPage.setCropBox(page.getCropBox());
+                        newPage.setMediaBox(page.getMediaBox());
+                        newPage.setRotation(page.getRotation());
+                        PDResources resources = page.getResources();
+                        if (resources != null)
+                        {
+                            // this is smart enough to just create references for resources
that are used on multiple pages
+                            newPage.setResources(new PDResources((COSDictionary) cloner.cloneForNewDocument(resources)));
+                        }
+                        else
+                        {
+                            newPage.setResources(new PDResources());
+                        }
+                        destination.addPage(newPage);
+                    }
+                    sourceDoc.close();
+                }
+                sourceInputStream.close();
+            }
+            
+            if (destinationStream == null)
+            {
+                destination.save(destinationFileName);
+            }
+            else
+            {
+                destination.save(destinationStream);
+            }
+        }
+    }
+    
+    
+    /**
+     * Merge the list of source documents, saving the result in the destination
+     * file.
+     *
+     * @param memUsageSetting defines how memory is used for buffering PDF streams;
+     *                        in case of <code>null</code> unrestricted main
memory is used 
+     * 
+     * @throws IOException If there is an error saving the document.
+     */
+    private void legacyMergeDocuments(MemoryUsageSetting memUsageSetting) throws IOException
+    {
         if (sources != null && !sources.isEmpty())
         {
             // Make sure that:



Mime
View raw message