httpd-apreq-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j...@apache.org
Subject svn commit: r234258 - in /httpd/apreq/trunk: include/apreq_version.h library/util.c
Date Sun, 21 Aug 2005 16:31:43 GMT
Author: joes
Date: Sun Aug 21 09:31:39 2005
New Revision: 234258

URL: http://svn.apache.org/viewcvs?rev=234258&view=rev
Log:
Scan the resultant url-decoded string for control chars, to ensure we
always set the charset to cp1252 whenever they're present.

Modified:
    httpd/apreq/trunk/include/apreq_version.h
    httpd/apreq/trunk/library/util.c

Modified: httpd/apreq/trunk/include/apreq_version.h
URL: http://svn.apache.org/viewcvs/httpd/apreq/trunk/include/apreq_version.h?rev=234258&r1=234257&r2=234258&view=diff
==============================================================================
--- httpd/apreq/trunk/include/apreq_version.h (original)
+++ httpd/apreq/trunk/include/apreq_version.h Sun Aug 21 09:31:39 2005
@@ -61,7 +61,7 @@
 #define APREQ_MINOR_VERSION       3
 
 /** patch level */
-#define APREQ_PATCH_VERSION       0
+#define APREQ_PATCH_VERSION       1
 
 /**
  *  This symbol is defined for internal, "development" copies of libapreq.

Modified: httpd/apreq/trunk/library/util.c
URL: http://svn.apache.org/viewcvs/httpd/apreq/trunk/library/util.c?rev=234258&r1=234257&r2=234258&view=diff
==============================================================================
--- httpd/apreq/trunk/library/util.c (original)
+++ httpd/apreq/trunk/library/util.c Sun Aug 21 09:31:39 2005
@@ -254,23 +254,33 @@
     return 1;
 }
 
-static APR_INLINE apreq_charset_t fragment_charset(const char *word,
-                                                   const char *end)
+static APR_INLINE unsigned is_enc8_fragment(const char *word,
+                                            const char *end)
 {
     unsigned char flen = end - word;
     unsigned char wlen = flen / 3;
     if (!is_enc8(word, wlen))
-        return APREQ_CHARSET_LATIN1;
+        return 0;
 
     switch (flen % 3) {
     case 2:
         if (!is_89AB(*--end))
-            return APREQ_CHARSET_LATIN1;
+            return 0;
     case 1:
         if (*--end != '%')
-            return APREQ_CHARSET_LATIN1;
+            return 0;
     }
-    return APREQ_CHARSET_UTF8;
+    return 1;
+}
+
+/* look for chars between 0x80 and 0x9F, inclusive */
+static APR_INLINE unsigned has_cntrl(const unsigned char *start,
+                                     const unsigned char *end)
+{
+    while (start <= end)
+        if ((*start++ & 0xE0) == 0x80)
+            return 1;
+    return 0;
 }
 
 
@@ -309,9 +319,8 @@
  * 3) presume latin1; unless there are control chars, in which case
  * 4) punt to cp1252.
  *
- * Note: in downgrading from 2 to 3, we should to be careful
+ * Note: in downgrading from 2 to 3, we need to be careful
  * about earlier control characters presumed to be valid utf8.
- * However, we aren't being that careful with the current implementation.
  */
 
 
@@ -359,8 +368,8 @@
                 else if (c < 0xE0) {
                     /* 2-byte utf8 */
                     if (s + 3 >= end) {
-                        *charset = fragment_charset(s+1, end);
-                        if (*charset == APREQ_CHARSET_UTF8) {
+                        if (is_enc8_fragment(s+1, end)) {
+                            *charset = APREQ_CHARSET_UTF8;
                             s -= 2;
                             *dlen = d - start;
                             *slen = s - src;
@@ -369,6 +378,8 @@
                             return APR_INCOMPLETE;
                         }
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                     else if (is_enc8(s+1, 1)) {
                         *charset = APREQ_CHARSET_UTF8;
@@ -377,15 +388,16 @@
                         s += 3;
                     }
                     else {
-                        *charset = APREQ_CHARSET_LATIN1;
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                 }
                 else if (c < 0xF0) {
                     /* 3-byte utf8 */
                     if (s + 6 >= end) {
-                        *charset = fragment_charset(s+1, end);
-                        if (*charset == APREQ_CHARSET_UTF8) {
+                        if (is_enc8_fragment(s+1, end)) {
+                            *charset = APREQ_CHARSET_UTF8;
                             s -= 2;
                             *dlen = d - start;
                             *slen = s - src;
@@ -394,6 +406,8 @@
                             return APR_INCOMPLETE;
                         }
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                     else if (is_enc8(s+1, 2)) {
                         *charset = APREQ_CHARSET_UTF8;
@@ -403,16 +417,17 @@
                         s += 6;
                     }
                     else {
-                        *charset = APREQ_CHARSET_LATIN1;
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
 
                 }
                 else if (c < 0xF8) {
                     /* 4-byte utf8 */
                     if (s + 9 >= end) {
-                        *charset = fragment_charset(s+1, end);
-                        if (*charset == APREQ_CHARSET_UTF8) {
+                        if (is_enc8_fragment(s+1, end)) {
+                            *charset = APREQ_CHARSET_UTF8;
                             s -= 2;
                             *dlen = d - start;
                             *slen = s - src;
@@ -421,6 +436,8 @@
                             return APR_INCOMPLETE;
                         }
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                     else if (is_enc8(s+1, 3)) {
                         *charset = APREQ_CHARSET_UTF8;
@@ -431,24 +448,27 @@
                         s += 9;
                     }
                     else {
-                        *charset = APREQ_CHARSET_LATIN1;
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
 
                 }
                 else if (c < 0xFC) {
                     /* 5-byte utf8 */
                     if (s + 12 >= end) {
-                        *charset = fragment_charset(s+1, end);
-                         if (*charset == APREQ_CHARSET_UTF8) {
-                             s -= 2;
-                             *dlen = d - start;
-                             *slen = s - src;
-                             memmove(d, s, end - s);
-                             d[end - s] = 0;
-                             return APR_INCOMPLETE;
-                         }
-                         *d = c;
+                        if (is_enc8_fragment(s+1, end)) {
+                            *charset = APREQ_CHARSET_UTF8;
+                            s -= 2;
+                            *dlen = d - start;
+                            *slen = s - src;
+                            memmove(d, s, end - s);
+                            d[end - s] = 0;
+                            return APR_INCOMPLETE;
+                        }
+                        *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                     else if (is_enc8(s+1, 4)) {
                         *charset = APREQ_CHARSET_UTF8;
@@ -460,16 +480,17 @@
                         s += 12;
                     }
                     else {
-                        *charset = APREQ_CHARSET_LATIN1;
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
 
                 }
                 else if (c < 0xFE) {
                     /* 6-byte utf8 */
                     if (s + 15 >= end) {
-                        *charset = fragment_charset(s+1, end);
-                        if (*charset == APREQ_CHARSET_UTF8) {
+                        if (is_enc8_fragment(s+1, end)) {
+                            *charset = APREQ_CHARSET_UTF8;
                             s -= 2;
                             *dlen = d - start;
                             *slen = s - src;
@@ -478,6 +499,8 @@
                             return APR_INCOMPLETE;
                         }
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                     else if (is_enc8(s+1, 5)) {
                         *charset = APREQ_CHARSET_UTF8;
@@ -490,8 +513,9 @@
                         s += 15;
                     }
                     else {
-                        *charset = APREQ_CHARSET_LATIN1;
                         *d = c;
+                        *charset = has_cntrl((unsigned char *)dest, d)
+                            ? APREQ_CHARSET_CP1252 : APREQ_CHARSET_LATIN1;
                     }
                 }
                 else {



Mime
View raw message