manifoldcf-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From kwri...@apache.org
Subject svn commit: r919366 - in /incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl: Constants.java XMLDocumentScannerImpl.java XMLEntityManager.java XMLScanner.java
Date Fri, 05 Mar 2010 10:50:32 GMT
Author: kwright
Date: Fri Mar  5 10:50:32 2010
New Revision: 919366

URL: http://svn.apache.org/viewvc?rev=919366&view=rev
Log:
Add required patches to xerces-2 java, for LCF.  These changes permit acceptance of bad UTF-8
sequences, and fix some of the more egregious problems with allowing the parse to continue
after a fatal error.

Modified:
    incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/Constants.java
    incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
    incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLEntityManager.java
    incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLScanner.java

Modified: incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/Constants.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/Constants.java?rev=919366&r1=919365&r2=919366&view=diff
==============================================================================
--- incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/Constants.java (original)
+++ incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/Constants.java Fri Mar
 5 10:50:32 2010
@@ -219,6 +219,9 @@
     
     /** Warn on undeclared element feature ("validation/warn-on-undeclared-elemdef"). */
     public static final String WARN_ON_UNDECLARED_ELEMDEF_FEATURE = "validation/warn-on-undeclared-elemdef";
+
+    /** Ignore misencoded characters feature */
+    public static final String IGNORE_BADLY_ENCODED_CHARS = "ignore-badly-encoded-chars";
     
     /** Warn on duplicate entity declaration feature ("warn-on-duplicate-entitydef"). */
     public static final String WARN_ON_DUPLICATE_ENTITYDEF_FEATURE = "warn-on-duplicate-entitydef";

Modified: incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java?rev=919366&r1=919365&r2=919366&view=diff
==============================================================================
--- incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
(original)
+++ incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java
Fri Mar  5 10:50:32 2010
@@ -783,6 +783,8 @@
                                 else {
                                     reportFatalError("MarkupNotRecognizedInProlog",
                                                      null);
+                                    // Don't loop forever!
+                                    fEntityScanner.scanChar();
                                 }
                             }
                             else if (isValidNameStartChar(fEntityScanner.peekChar())) {
@@ -802,6 +804,8 @@
                             else {
                                 reportFatalError("MarkupNotRecognizedInProlog",
                                                  null);
+                                // Don't loop forever!
+                                fEntityScanner.scanChar();
                             }
                             break;
                         }
@@ -872,6 +876,8 @@
                         }
                         case SCANNER_STATE_REFERENCE: {
                             reportFatalError("ReferenceIllegalInProlog", null);
+                            // Don't loop forever!
+                            fEntityScanner.scanChar();
                         }
                     }
                 } while (complete || again);
@@ -1277,6 +1283,8 @@
                             else {
                                 reportFatalError("MarkupNotRecognizedInMisc",
                                                  null);
+				// Skip forward one character, otherwise we loop forever.
+				fEntityScanner.scanChar();
                             }
                             break;
                         }

Modified: incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLEntityManager.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLEntityManager.java?rev=919366&r1=919365&r2=919366&view=diff
==============================================================================
--- incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLEntityManager.java
(original)
+++ incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLEntityManager.java
Fri Mar  5 10:50:32 2010
@@ -132,6 +132,10 @@
 	protected static final String PARSER_SETTINGS = 
 		Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS;	
 
+    /** Feature identifier: ignore badly encoded characters */
+    protected static final String IGNORE_BADLY_ENCODED_CHARS =
+        Constants.XERCES_FEATURE_PREFIX + Constants.IGNORE_BADLY_ENCODED_CHARS;
+
     // property identifiers
 
     /** Property identifier: symbol table. */
@@ -167,7 +171,8 @@
         EXTERNAL_PARAMETER_ENTITIES,
         ALLOW_JAVA_ENCODINGS,
         WARN_ON_DUPLICATE_ENTITYDEF,
-        STANDARD_URI_CONFORMANT
+        STANDARD_URI_CONFORMANT,
+        IGNORE_BADLY_ENCODED_CHARS
     };
 
     /** Feature defaults. */
@@ -177,6 +182,7 @@
         Boolean.TRUE,
         Boolean.FALSE,
         Boolean.FALSE,
+        Boolean.FALSE,
         Boolean.FALSE
     };
 
@@ -262,6 +268,12 @@
      */
     protected boolean fStrictURI;
 
+    /**
+    * allow badly encoded characters (skip them)
+    * http://apache.org/xml/features/ignore-badly-encoded-chars
+    */
+    protected boolean fAllowBadlyEncodedChars;
+
     // properties
 
     /**
@@ -1310,6 +1322,13 @@
             fStrictURI = false;
         }
 
+        try {
+            fAllowBadlyEncodedChars = componentManager.getFeature(IGNORE_BADLY_ENCODED_CHARS);
+        }
+        catch (XMLConfigurationException e) {
+            fAllowBadlyEncodedChars = false;
+        }
+
         // xerces properties
         fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
         fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
@@ -2082,6 +2101,33 @@
     protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
         throws IOException {
 
+        Reader internalReader = createInternalReader(inputStream, encoding, isBigEndian);
+        if (fAllowBadlyEncodedChars)
+        {
+            // Wrap the reader so that bad characters are ignored rather than causing aborts
+            return new LaxReader(internalReader);
+        }
+        return internalReader;
+    }
+    
+    /**
+     * Creates a reader capable of reading the given input stream in
+     * the specified encoding.
+     *
+     * @param inputStream  The input stream.
+     * @param encoding     The encoding name that the input stream is
+     *                     encoded using. If the user has specified that
+     *                     Java encoding names are allowed, then the
+     *                     encoding name may be a Java encoding name;
+     *                     otherwise, it is an ianaEncoding name.
+     * @param isBigEndian   For encodings (like uCS-4), whose names cannot
+     *                      specify a byte order, this tells whether the order is bigEndian.
 null menas
+     *                      unknown or not relevant.
+     *
+     * @return Returns a reader.
+     */
+    protected Reader createInternalReader(InputStream inputStream, String encoding, Boolean
isBigEndian)
+        throws IOException {
         // if the encoding is UTF-8 use the optimized UTF-8 reader
         if (encoding == "UTF-8" || encoding == null) {
             if (DEBUG_ENCODINGS) {
@@ -3025,6 +3071,9 @@
                 return -1;
             }
             if (fOffset == fData.length) {
+                if (fCurrentEntity.mayReadChunks) {
+                    return fInputStream.read();
+                }
                 byte[] newData = new byte[fOffset << 1];
                 System.arraycopy(fData, 0, newData, 0, fOffset);
                 fData = newData;
@@ -3138,4 +3187,105 @@
         }
     } // end of RewindableInputStream class
 
+    protected static class LaxReader extends Reader
+    {
+        protected Reader internalReader;
+
+        public LaxReader(Reader internalReader)
+        {
+            this.internalReader = internalReader;
+        }
+
+        public int read()
+            throws IOException
+        {
+            // Since we need to be able to skip ahead at the point of error, and not drop
huge amounts on the floor,
+            // all read operations for this class are channeled through the single-character
operation.  This is less
+            // efficient, but hopefully not terribly so.
+            try
+            {
+                return internalReader.read();
+            }
+            catch (org.apache.xerces.impl.io.MalformedByteSequenceException e)
+            {
+                // When this fails, it means we detected a bad character.
+                // However, the bad character has already been pulled off the stream, so
we are free to stuff in a "?" and
+                // just keep going.
+                return (int)'?';
+            }
+        }
+        
+        public int read(char[] cbuf)
+            throws IOException
+        {
+            return read(cbuf,0,cbuf.length);
+        }
+
+        public int read(char[] cbuf,
+            int off,
+            int len)
+            throws IOException
+        {
+            int amtRead = 0;
+            while (amtRead < len)
+            {
+                int cval = read();
+                if (cval == -1)
+                {
+                    if (amtRead == 0)
+                        return -1;
+                    else
+                        return amtRead;
+                }
+                cbuf[off++] = (char)cval;
+                amtRead++;
+            }
+            return amtRead;
+        }
+
+        public long skip(long n)
+            throws IOException
+        {
+            long skipped = 0;
+            while (skipped < n)
+            {
+                int cval = read();
+                if (cval == -1)
+                    break;
+                skipped++;
+            }
+            return skipped;
+        }
+        
+        public boolean ready()
+            throws IOException
+        {
+            return internalReader.ready();
+        }
+        
+        public boolean markSupported()
+        {
+            return internalReader.markSupported();
+        }
+        
+        public void mark(int readAheadLimit)
+            throws IOException
+        {
+            internalReader.mark(readAheadLimit);
+        }
+        
+        public void reset()
+           throws IOException
+        {
+            internalReader.reset();
+        }
+
+        public void close()
+            throws IOException
+        {
+            internalReader.close();
+        }
+    }
+    
+
 } // class XMLEntityManager

Modified: incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLScanner.java
URL: http://svn.apache.org/viewvc/incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLScanner.java?rev=919366&r1=919365&r2=919366&view=diff
==============================================================================
--- incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLScanner.java (original)
+++ incubator/lcf/trunk/upstream/xerces2-j/src/org/apache/xerces/impl/XMLScanner.java Fri
Mar  5 10:50:32 2010
@@ -823,6 +823,7 @@
                         String entityName = fEntityScanner.scanName();
                         if (entityName == null) {
                             reportFatalError("NameRequiredInReference", null);
+                            entityName = "unknown";
                         }
                         else if (entityDepth == fEntityDepth) {
                             fStringBuffer2.append(entityName);
@@ -1027,6 +1028,14 @@
                     if (XMLChar.isMarkup(c) || c == ']') {
                         fStringBuffer.append((char)fEntityScanner.scanChar());
                     }
+                    else if (XMLChar.isHighSurrogate(c)) {
+                        scanSurrogates(fStringBuffer);
+                    }
+                    else if (isInvalidLiteral(c)) {
+                        reportFatalError("InvalidCharInSystemID",
+                                new Object[] { Integer.toHexString(c) }); 
+                        fEntityScanner.scanChar();
+                    }
                 } while (fEntityScanner.scanLiteral(quote, ident) != quote);
                 fStringBuffer.append(ident);
                 ident = fStringBuffer;



Mime
View raw message