pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From le...@apache.org
Subject svn commit: r1652198 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: BaseParser.java NonSequentialPDFParser.java PDFParser.java
Date Thu, 15 Jan 2015 17:52:18 GMT
Author: lehmi
Date: Thu Jan 15 17:52:18 2015
New Revision: 1652198

URL: http://svn.apache.org/r1652198
Log:
PDFBOX-2600: removed old sequential parser code

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1652198&r1=1652197&r2=1652198&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Thu Jan
15 17:52:18 2015
@@ -196,16 +196,6 @@ public abstract class BaseParser impleme
         }
     }
     
-    /**
-     * Set the document for this stream.
-     *
-     * @param doc The current document.
-     */
-    public void setDocument( COSDocument doc )
-    {
-        document = doc;
-    }
-
     private static boolean isHexDigit(char ch)
     {
         return (ch >= '0' && ch <= '9') ||
@@ -742,21 +732,6 @@ public abstract class BaseParser impleme
     }
 
     /**
-     * This will parse a PDF string.
-     *
-     * @param isDictionary indicates if the stream is a dictionary or not
-     * @return The parsed PDF string.
-     *
-     * @throws IOException If there is an error reading from the stream.
-     * @deprecated Not needed anymore. Use {@link #parseCOSString()} instead. PDFBOX-1437
-     */
-    @Deprecated
-    protected COSString parseCOSString(boolean isDictionary) throws IOException
-    {
-        return parseCOSString();
-    }
-
-    /**
      * This will parse a PDF string.
      *
      * @return The parsed PDF string.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1652198&r1=1652197&r2=1652198&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
(original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
Thu Jan 15 17:52:18 2015
@@ -286,7 +286,7 @@ public class NonSequentialPDFParser exte
     public NonSequentialPDFParser(File file, String decryptionPassword, InputStream keyStore,

             String alias, boolean useScratchFiles) throws IOException
     {
-        super(EMPTY_INPUT_STREAM, false);
+        super(EMPTY_INPUT_STREAM);
         pdfFile = file;
         raStream = new RandomAccessBufferedFileInputStream(pdfFile);
         password = decryptionPassword;
@@ -310,7 +310,7 @@ public class NonSequentialPDFParser exte
                         + " does not contain an integer value, but: '" + eofLookupRangeStr
+ "'");
             }
         }
-        setDocument(new COSDocument(useScratchFiles));
+        document = new COSDocument(useScratchFiles);
         pdfSource = new PushBackInputStream(raStream, 4096);
     }
 
@@ -396,7 +396,7 @@ public class NonSequentialPDFParser exte
     public NonSequentialPDFParser(InputStream input, String decryptionPassword, InputStream
keyStore,
             String alias, boolean useScratchFiles) throws IOException
     {
-        super(EMPTY_INPUT_STREAM, false);
+        super(EMPTY_INPUT_STREAM);
         pdfFile = createTmpFile(input);
         raStream = new RandomAccessBufferedFileInputStream(pdfFile);
         password = decryptionPassword;
@@ -925,14 +925,15 @@ public class NonSequentialPDFParser exte
 
         return pagesDictionary;
     }
-    
-    /** Parses all objects needed by pages and closes input stream. */
-    
+
     /**
-     * {@inheritDoc}
+     * This will parse the stream and populate the COSDocument object.  This will close
+     * the stream when it is done parsing.
+     *
+     * @throws IOException If there is an error reading from the stream or corrupt data
+     * is found.
      */
-    @Override
-    public void parse() throws IOException
+     public void parse() throws IOException
     {
         boolean exceptionOccurred = true; // set to false if all is processed
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1652198&r1=1652197&r2=1652198&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Thu Jan 15
17:52:18 2015
@@ -16,24 +16,15 @@
  */
 package org.apache.pdfbox.pdfparser;
 
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
-import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSStream;
 import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
@@ -54,8 +45,6 @@ public class PDFParser extends BaseParse
      */
     private static final Log LOG = LogFactory.getLog(PDFParser.class);
 
-    private static final int SPACE_BYTE = 32;
-
     private static final String PDF_HEADER = "%PDF-";
     private static final String FDF_HEADER = "%FDF-";
     
@@ -64,41 +53,13 @@ public class PDFParser extends BaseParse
     private static final String PDF_DEFAULT_VERSION = "1.4";
     private static final String FDF_DEFAULT_VERSION = "1.0";
 
-    // deprecated functionality from BaseParser:
-    /**
-     * Default value of the {@link #forceParsing} flag.
-     */
-    public static final boolean FORCE_PARSING =
-            Boolean.getBoolean("org.apache.pdfbox.forceParsing");
-
-    /**
-     * A list of duplicate objects found when Parsing the PDF
-     * File.
-     */
-    private List<ConflictObj> conflictList = new ArrayList<ConflictObj>();
-    
-    /**
-     * COSStream objects to check for length correctness.
-     */
-    private final Set<COSStream> streamLengthCheckSet = new HashSet<COSStream>();
-
-    /** Collects all Xref/trailer objects and resolves them into single
-     *  object using startxref reference. 
+    /** 
+     * Collects all Xref/trailer objects and resolves them into single
+     * object using startxref reference. 
      */
     protected XrefTrailerResolver xrefTrailerResolver = new XrefTrailerResolver();
 
     /**
-     * Temp file directory.
-     */
-    private File tempDirectory = null;
-    private final boolean useScratchFile;
-
-    /**
-     * Flag to skip malformed or otherwise unparseable input where possible.
-     */
-    protected final boolean forceParsing;
-
-    /**
      * Constructor.
      *
      * @param input The input stream that contains the PDF document.
@@ -107,213 +68,7 @@ public class PDFParser extends BaseParse
      */
     public PDFParser( InputStream input ) throws IOException
     {
-        this(input, FORCE_PARSING);
-    }
-
-    /**
-     * Constructor to allow control over RandomAccessFile.
-     * Also enables parser to skip corrupt objects to try and force parsing
-     * @param input The input stream that contains the PDF document.
-     * @param force When true, the parser will skip corrupt pdf objects and
-     * will continue parsing at the next object in the file
-     *
-     * @throws IOException If there is an error initializing the stream.
-     */
-    public PDFParser(InputStream input, boolean force) throws IOException
-    {
-        this(input, force, false);
-    }
-
-    /**
-     * Constructor to allow control over RandomAccessFile.
-     * Also enables parser to skip corrupt objects to try and force parsing
-     * @param input The input stream that contains the PDF document.
-     * @param force When true, the parser will skip corrupt pdf objects and
-     * will continue parsing at the next object in the file
-     * @param useScratchFiles enables the usage of a scratch file if set to true
-     *
-     * @throws IOException If there is an error initializing the stream.
-     */
-    public PDFParser(InputStream input, boolean force, boolean useScratchFiles) throws IOException
-    {
         super(input);
-        forceParsing = force;
-        useScratchFile = useScratchFiles;
-    }
-
-    /**
-     * This is the directory where pdfbox will create a temporary file
-     * for storing pdf document stream in.  By default this directory will
-     * be the value of the system property java.io.tmpdir.
-     *
-     * @param tmpDir The directory to create scratch files needed to store
-     *        pdf document streams.
-     */
-    public void setTempDirectory( File tmpDir )
-    {
-        tempDirectory = tmpDir;
-    }
-
-    /**
-     * Returns true if parsing should be continued. By default, forceParsing is returned.
-     * This can be overridden to add application specific handling (for example to stop
-     * parsing when the number of exceptions thrown exceed a certain number).
-     *
-     * @param e The exception if vailable. Can be null if there is no exception available
-     * @return true if parsing could be continued, otherwise false
-     */
-    protected boolean isContinueOnError(Exception e)
-    {
-        return forceParsing;
-    }
-
-    /**
-     * This will parse the stream and populate the COSDocument object.  This will close
-     * the stream when it is done parsing.
-     *
-     * @throws IOException If there is an error reading from the stream or corrupt data
-     * is found.
-     */
-    public void parse() throws IOException
-    {
-        try
-        {
-            if( tempDirectory != null )
-            {
-                document = new COSDocument( tempDirectory, true );
-            }
-            else if(useScratchFile)
-            {
-                document = new COSDocument( null, true );
-            }
-            else
-            {
-                document = new COSDocument(forceParsing);
-            }
-            setDocument( document );
-
-            parseHeader();
-
-            //Some PDF files have garbage between the header and the
-            //first object
-            skipToNextObj();
-
-            boolean wasLastParsedObjectEOF = false;
-            while(true)
-            {
-                if(pdfSource.isEOF())
-                {
-                    break;
-                }
-                                
-                try
-                {
-                    // don't reset flag to false if it is already true
-                    wasLastParsedObjectEOF |= parseObject();
-                }
-                catch(IOException e)
-                {
-                    /*
-                     * PDF files may have random data after the EOF marker. Ignore errors
if
-                     * last object processed is EOF.
-                     */
-                    if( wasLastParsedObjectEOF )
-                    {
-                        break;
-                    }
-                    if(isContinueOnError(e))
-                    {
-                        /*
-                         * Warning is sent to the PDFBox.log and to the Console that
-                         * we skipped over an object
-                         */
-                        LOG.warn("Parsing Error, Skipping Object", e);
-                        
-                        skipSpaces();
-                        long lastOffset = pdfSource.getOffset();
-                        skipToNextObj();
-                        
-                        /* the nextObject is the one we want to skip 
-                         * so read the 'Object Number' without interpret it
-                         * in order to force the skipObject
-                         */
-                        if (lastOffset == pdfSource.getOffset())
-                        {
-                            readStringNumber();
-                            skipToNextObj();
-                        }
-                    }
-                    else
-                    {
-                        throw e;
-                    }
-                }
-                skipSpaces();
-            }
-
-            // set xref to start with
-            xrefTrailerResolver.setStartxref( document.getStartXref() );
-
-            // get resolved xref table + trailer
-            document.setTrailer( xrefTrailerResolver.getTrailer() );
-            document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
-            document.addXRefTable( xrefTrailerResolver.getXrefTable() );
-
-            fixStreamsLength();
-
-            if( !document.isEncrypted() )
-            {
-                document.dereferenceObjectStreams();
-            }
-            else
-            {
-                LOG.info("Document is encrypted");
-            }
-            ConflictObj.resolveConflicts(document, conflictList);
-        }
-        catch( IOException e )
-        {
-            // if the PDF is corrupt then close the document and clear all resources to it
-            if( document != null )
-            {
-                document.close();
-                document = null;
-            }
-            throw e;
-        }
-        finally
-        {
-            pdfSource.close();
-        }
-    }
-
-    /**
-     * Check whether streams with previously unknown length have the correct
-     * length and fix that length if needed.
-     *
-     * @throws IOException
-     */
-    private void fixStreamsLength() throws IOException
-    {
-        for (COSObject obj : document.getObjects())
-        {
-            if (obj.getObject() instanceof COSStream
-                    && streamLengthCheckSet.contains((COSStream) obj.getObject()))
-            {
-                COSStream stream = (COSStream) obj.getObject();
-
-                long filteredLength = stream.getFilteredLength();
-                long filteredLengthWritten = stream.getFilteredLengthWritten();
-                if (Math.abs(filteredLength - filteredLengthWritten) > 2)
-                {
-                    // adjust the length, but only if the difference is > 2,
-                    // i.e. don't bother with CR LF differences
-                    LOG.warn("/Length of " + obj + " corrected from " + filteredLength +
" to " + filteredLengthWritten);
-                    stream.setLong(COSName.LENGTH, filteredLengthWritten);
-                    stream.setFilteredLength(filteredLengthWritten);
-                }
-            }
-        }
     }
 
     protected void parseHeader() throws IOException
@@ -341,7 +96,7 @@ public class PDFParser extends BaseParse
             throw new IOException( "Error: Header doesn't contain versioninfo" );
         }
 
-        //sometimes there are some garbage getBytes in the header before the header
+        //sometimes there is some garbage in the header before the header
         //actually starts, so lets try to find the header first.
         int headerStart = header.indexOf( PDF_HEADER );
         if (headerStart == -1)
@@ -466,204 +221,6 @@ public class PDFParser extends BaseParse
     }
 
     /**
-     * This will parse the next object from the stream and add it to
-     * the local state.
-     *
-     * @return Returns true if the processed object had an endOfFile marker
-     *
-     * @throws IOException If an IO error occurs.
-     */
-    private boolean parseObject() throws IOException
-    {
-        long currentObjByteOffset = pdfSource.getOffset();
-        boolean isEndOfFile = false;
-        skipSpaces();
-        //peek at the next character to determine the type of object we are parsing
-        char peekedChar = (char)pdfSource.peek();
-
-        //ignore endobj and endstream sections.
-        while( peekedChar == 'e' )
-        {
-            //there are times when there are multiple endobj, so lets
-            //just read them and move on.
-            readString();
-            skipSpaces();
-            currentObjByteOffset = pdfSource.getOffset();
-            peekedChar = (char)pdfSource.peek();
-        }
-        if( pdfSource.isEOF())
-        {
-            //"Skipping because of EOF" );
-            //end of file we will return a false and call it a day.
-        }
-        //xref table. Note: The contents of the Xref table are currently ignored
-        else if( peekedChar == 'x')
-        {
-            parseXrefTable( currentObjByteOffset );
-        }
-        // Note: startxref can occur in either a trailer section or by itself
-        else if (peekedChar == 't' || peekedChar == 's')
-        {
-            if(peekedChar == 't')
-            {
-                parseTrailer();
-                peekedChar = (char)pdfSource.peek();
-            }
-            if (peekedChar == 's')
-            {
-                parseStartXref();
-                // readString() calls skipSpaces() will skip comments... that's
-                // bad for us b/c the %%EOF flag is a comment
-                while(isWhitespace(pdfSource.peek()) && !pdfSource.isEOF())
-                {
-                    pdfSource.read(); // read (get rid of) all the whitespace
-                }
-                String eof = "";
-                if(!pdfSource.isEOF())
-                {
-                    eof = readLine(); // if there's more data to read, get the EOF flag
-                }
-
-                // verify that EOF exists (see PDFBOX-979 for documentation on special cases)
-                if(!"%%EOF".equals(eof)) 
-                {
-                    if(eof.startsWith("%%EOF")) 
-                    {
-                        // content after marker -> unread with first space byte for read
newline
-                        pdfSource.unread(SPACE_BYTE); // we read a whole line; add space
as newline replacement
-                        pdfSource.unread(eof.substring(5).getBytes(ISO_8859_1));
-                    } 
-                    else 
-                    {
-                        // PDF does not conform to spec, we should warn someone
-                        LOG.warn("expected='%%EOF' actual='" + eof + "'");
-                        // if we're not at the end of a file, just put it back and move on
-                        if(!pdfSource.isEOF()) 
-                        {
-                            pdfSource.unread( SPACE_BYTE ); // we read a whole line; add
space as newline replacement
-                            pdfSource.unread(eof.getBytes(ISO_8859_1));
-                        }
-                    }
-                }
-                isEndOfFile = true;
-            }
-        }
-        else
-        {
-            //we are going to parse a normal object
-            COSObjectKey key = parseObjectKey(!isContinueOnError(null));
-            skipSpaces();
-            COSBase pb = parseDirObject();
-            String endObjectKey = readString();
-
-            if (endObjectKey.equals(STREAM_STRING))
-            {
-                pdfSource.unread(endObjectKey.getBytes(ISO_8859_1));
-                pdfSource.unread(' ');
-                if (pb instanceof COSDictionary)
-                {
-                    pb = parseCOSStream((COSDictionary) pb);
-
-                    // test for XRef type
-                    final COSStream strmObj = (COSStream) pb;
-
-                    // remember streams without length to check them later
-                    COSBase streamLength = strmObj.getItem(COSName.LENGTH);
-                    int length = -1;
-                    if (streamLength instanceof COSNumber)
-                    {
-                        length = ((COSNumber) streamLength).intValue();
-                    }
-                    if (length == -1)
-                    {
-                        streamLengthCheckSet.add(strmObj);
-                    }
-
-                    final COSName objectType = (COSName) strmObj.getItem(COSName.TYPE);
-                    if (objectType != null && objectType.equals(COSName.XREF))
-                    {
-                        // XRef stream
-                        parseXrefStream(strmObj, currentObjByteOffset);
-                    }
-                }
-                else
-                {
-                    // this is not legal
-                    // the combination of a dict and the stream/endstream forms a complete
stream object
-                    throw new IOException("stream not preceded by dictionary");
-                }
-                skipSpaces();
-                endObjectKey = readLine();
-            }
-
-            COSObject pdfObject = document.getObjectFromPool( key );
-            if(pdfObject.getObject() == null)
-            {
-                pdfObject.setObject(pb);
-            }
-            /*
-             * If the object we returned already has a baseobject, then we have a conflict
-             * which we will resolve using information after we parse the xref table.
-             */
-            else
-            {
-                addObjectToConflicts(currentObjByteOffset, key, pb);
-            }
-
-            if( !endObjectKey.equals( ENDOBJ_STRING ) )
-            {
-                if (endObjectKey.startsWith( ENDOBJ_STRING ) )
-                {
-                    /*
-                     * Some PDF files don't contain a new line after endobj so we
-                     * need to make sure that the next object number is getting read separately
-                     * and not part of the endobj keyword. Ex. Some files would have "endobj28"
-                     * instead of "endobj"
-                     */
-                    pdfSource.unread( SPACE_BYTE ); // add a space first in place of the
newline consumed by readline()
-                    pdfSource.unread( endObjectKey.substring( 6 ).getBytes(ISO_8859_1) );
-                }
-                else if(endObjectKey.trim().endsWith(ENDOBJ_STRING))
-                {
-                    /*
-                     * Some PDF files contain junk (like ">> ", in the case of a PDF
-                     * I found which was created by Exstream Dialogue Version 5.0.039)
-                     * in which case we ignore the data before endobj and just move on
-                     */
-                    LOG.warn("expected='endobj' actual='" + endObjectKey + "' ");
-                }
-                else if( !pdfSource.isEOF() )
-                {
-                    //It is possible that the endobj is missing, there
-                    //are several PDFs out there that do that so. Unread
-                    //and assume that endobj was missing
-                    pdfSource.unread( SPACE_BYTE ); // add a space first in place of the
newline consumed by readline()
-                    pdfSource.unread( endObjectKey.getBytes(ISO_8859_1) );
-                }
-            }
-            skipSpaces();
-        }
-        return isEndOfFile;
-    }
-
-   /**
-    * Adds a new ConflictObj to the conflictList.
-    * @param offset the offset of the ConflictObj
-    * @param key The COSObjectKey of this object
-    * @param pb The COSBase of this conflictObj
-    * @throws IOException
-    */
-    private void addObjectToConflicts(long offset, COSObjectKey key, COSBase pb) throws IOException
-    {
-        COSObject obj = new COSObject(null);
-        obj.setObjectNumber( COSInteger.get( key.getNumber() ) );
-        obj.setGenerationNumber( COSInteger.get( key.getGeneration() ) );
-        obj.setObject(pb);
-        ConflictObj conflictObj = new ConflictObj(offset, key, obj);
-        conflictList.add(conflictObj);
-    }
-
-    /**
      * This will parse the startxref section from the stream.
      * The startxref value is ignored.
      *
@@ -864,18 +421,6 @@ public class PDFParser extends BaseParse
      * Stream must be of type XRef.
      * @param stream the stream to be read
      * @param objByteOffset the offset to start at
-     * @throws IOException if there is an error parsing the stream
-     */
-    public void parseXrefStream( COSStream stream, long objByteOffset ) throws IOException
-    {
-        parseXrefStream( stream, objByteOffset, true );
-    }
-        
-    /**
-     * Fills XRefTrailerResolver with data of given stream.
-     * Stream must be of type XRef.
-     * @param stream the stream to be read
-     * @param objByteOffset the offset to start at
      * @param isStandalone should be set to true if the stream is not part of a hybrid xref
table
      * @throws IOException if there is an error parsing the stream
      */
@@ -893,97 +438,4 @@ public class PDFParser extends BaseParse
         parser.parse();
     }
 
-    /**
-     * Used to resolve conflicts when a PDF Document has multiple objects with
-     * the same id number. Ideally, we could use the Xref table when parsing
-     * the document to be able to determine which of the objects with the same ID
-     * is correct, but we do not have access to the Xref Table during parsing.
-     * Instead, we queue up the conflicts and resolve them after the Xref has
-     * been parsed. The Objects listed in the Xref Table are kept and the
-     * others are ignored.
-     */
-    private static class ConflictObj
-    {
-
-        private final long offset;
-        private final COSObjectKey objectKey;
-        private final COSObject object;
-
-        ConflictObj(long offsetValue, COSObjectKey key, COSObject pdfObject)
-        {
-            this.offset = offsetValue;
-            this.objectKey = key;
-            this.object = pdfObject;
-        }
-        @Override
-        public String toString()
-        {
-            return "Object(" + offset + ", " + objectKey + ")";
-        }
-
-        /**
-         * Sometimes pdf files have objects with the same ID number yet are
-         * not referenced by the Xref table and therefore should be excluded.
-         * This method goes through the conflicts list and replaces the object stored
-         * in the objects array with this one if it is referenced by the xref
-         * table.
-         * @throws IOException
-         */
-        private static void resolveConflicts(COSDocument document, List<ConflictObj>
conflictList) throws IOException
-        {
-            Iterator<ConflictObj> conflicts = conflictList.iterator();
-            if (conflicts.hasNext())
-            {
-                Collection<Long> values = document.getXrefTable().values();
-                do
-                {
-                    ConflictObj o = conflicts.next();
-                    if (tolerantConflicResolver(values, o.offset, 4))
-                    {
-                        COSObject pdfObject = document.getObjectFromPool(o.objectKey);
-                        if (pdfObject.getObjectNumber() != null 
-                                && pdfObject.getObjectNumber().equals(o.object.getObjectNumber()))
-                        {
-                            pdfObject.setObject(o.object.getObject());
-                        }
-                        else
-                        {
-                            LOG.debug("Conflict object [" + o.objectKey + "] at offset "
+ o.offset
-                                    +" found in the xref table, but the object numbers differ.
Ignoring this object."
-                                    + " The document is maybe malformed.");
-                        }
-                    }
-                }
-                while (conflicts.hasNext());
-            }
-        }
-    }
-    
-    /**
-     * Check if the given object offset can be find in the xref table. If not, we try to
search the table
-     * again with the given tolerance and check the given bytes before and after the xref
table offset.
-     *
-     * @param values are the unsorted values from the xref table
-     * @param offset is the offset that should be found in the xref table
-     * @param tolerance is the allowed tolerance in bytes.
-     * @return true if the offset was found inside the xref table
-     */
-    private static boolean tolerantConflicResolver(Collection<Long> values, long offset,
int tolerance)
-    {
-        if (values.contains(offset))
-        {
-            return true;
-        }
-        else
-        {
-            for ( Long integer : values )
-            {
-                if (Math.abs(integer - offset) <= tolerance)
-                {
-                    return true;
-                }
-            }
-        }
-        return false;
-    }
 }



Mime
View raw message