tomcat-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ma...@apache.org
Subject [tomcat] branch master updated: Improve BoM detection for rarely used UTF-32
Date Tue, 14 May 2019 13:50:59 GMT
This is an automated email from the ASF dual-hosted git repository.

markt pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tomcat.git


The following commit(s) were added to refs/heads/master by this push:
     new 8607e1a  Improve BoM detection for rarely used UTF-32
8607e1a is described below

commit 8607e1a0d2c283e443ce1ba2ccfb55b1884a580e
Author: Mark Thomas <markt@apache.org>
AuthorDate: Tue May 14 14:50:43 2019 +0100

    Improve BoM detection for rarely used UTF-32
    
    Identified by Coverity Scan which reported unreachable code.
---
 java/org/apache/catalina/servlets/DefaultServlet.java | 12 +++++++++++-
 webapps/docs/changelog.xml                            |  5 +++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/java/org/apache/catalina/servlets/DefaultServlet.java b/java/org/apache/catalina/servlets/DefaultServlet.java
index 205d302..2e669d4 100644
--- a/java/org/apache/catalina/servlets/DefaultServlet.java
+++ b/java/org/apache/catalina/servlets/DefaultServlet.java
@@ -1212,7 +1212,9 @@ public class DefaultServlet extends HttpServlet {
             skip(is, 2);
             return StandardCharsets.UTF_16BE;
         }
-        if (b0 == 0xFF && b1 == 0xFE) {
+        // Delay the UTF_16LE check if there are more that 2 bytes since it
+        // overlaps with UTF32-LE.
+        if (count == 2 && b0 == 0xFF && b1 == 0xFE) {
             skip(is, 2);
             return StandardCharsets.UTF_16LE;
         }
@@ -1244,6 +1246,14 @@ public class DefaultServlet extends HttpServlet {
             return Charset.forName("UTF32-LE");
         }
 
+        // Now we can check for UTF16-LE. There is an assumption here that we
+        // won't see a UTF16-LE file with a BOM where the first real data is
+        // 0x00 0x00
+        if (b0 == 0xFF && b1 == 0xFE) {
+            skip(is, 2);
+            return StandardCharsets.UTF_16LE;
+        }
+
         skip(is, 0);
         return null;
     }
diff --git a/webapps/docs/changelog.xml b/webapps/docs/changelog.xml
index 92f2aa0..4f83bb2 100644
--- a/webapps/docs/changelog.xml
+++ b/webapps/docs/changelog.xml
@@ -63,6 +63,11 @@
         Make a best efforts attempt to clean-up if a request fails during
         processing due to an <code>OutOfMemoryException</code>. (markt)
       </fix>
+      <fix>
+        Improve the BoM detection for static files handled by the default
+        servlet for the rarely used UTF-32 encodings. Identified by Coverity
+        Scan. (markt)
+      </fix>
     </changelog>
   </subsection>
   <subsection name="Coyote">


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@tomcat.apache.org
For additional commands, e-mail: dev-help@tomcat.apache.org


Mime
View raw message