httpd-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s.@apache.org
Subject svn commit: r1226511 - in /httpd/mod_mbox/branches/convert-charsets/module-2.0: mod_mbox.h mod_mbox_mime.c mod_mbox_out.c
Date Mon, 02 Jan 2012 18:50:27 GMT
Author: sf
Date: Mon Jan  2 18:50:27 2012
New Revision: 1226511

URL: http://svn.apache.org/viewvc?rev=1226511&view=rev
Log:
convert charset of mail bodies to UTF-8

Modified:
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h?rev=1226511&r1=1226510&r2=1226511&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h Mon Jan  2 18:50:27 2012
@@ -130,6 +130,7 @@ apr_status_t mbox_cte_convert_to_utf8(ap
 /* MIME decoding functions */
 mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p,
                                                 char *body, char *ct,
+                                                char *charset,
                                                 mbox_cte_e cte,
                                                 char *boundary);
 char *mbox_mime_decode_body(apr_pool_t *p, mbox_cte_e cte, char *body,

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c?rev=1226511&r1=1226510&r2=1226511&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c Mon Jan  2 18:50:27
2012
@@ -18,11 +18,37 @@
  */
 
 #include "mod_mbox.h"
+#include <apr_lib.h>
 
 #ifdef APLOG_USE_MODULE
 APLOG_USE_MODULE(mbox);
 #endif
 
+static char *mbox_mime_get_charset(apr_pool_t *p, const char *ct, const char *limit)
+{
+    const char *ptr = ct;
+    while (ptr && *ptr && ptr < limit) {
+        while (*ptr && apr_isspace(*ptr))
+            ptr++;
+        if (strncasecmp(ptr, "charset", 7) == 0) {
+            ptr += 7;
+            while (*ptr && apr_isspace(*ptr) && ptr < limit)
+                ptr++;
+            if (*ptr == '=') {
+                const char *end;
+                while (*ptr && apr_isspace(*ptr) && ptr < limit)
+                    ptr++;
+                end = ap_strchr_c(ptr, ';');
+                if (!end || end > limit)
+                    end = limit;
+                return apr_pstrmemdup(p, ptr, end - ptr);
+            }
+        }
+        ptr = ap_strchr_c(ptr, ';');
+    }
+    return NULL;
+}
+
 static apr_status_t cleanup_mime_msg(void *data)
 {
     mbox_mime_message_t *mail = data;
@@ -34,8 +60,8 @@ static apr_status_t cleanup_mime_msg(voi
  * levels of MIME parts, this function is recursive.
  */
 mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p, char *body,
-                                                char *ct, mbox_cte_e cte,
-                                                char *boundary)
+                                                char *ct, char *charset,
+                                                mbox_cte_e cte, char *boundary)
 {
     mbox_mime_message_t *mail;
     char *tmp = NULL, *k = NULL, *end_bound = NULL;
@@ -56,19 +82,21 @@ mbox_mime_message_t *mbox_mime_decode_mu
         headers_bound = body;
     }
 
-    /* If no Content-Type is provided, it means that we are parsing a
-       sub-part of the multipart message. The Content-Type header
-       should then be the first line of the part. If not, use
-       text/plain as default for the sub-part. */
-    tmp = ap_strstr(body, "Content-Type: ");
-    if (!ct && (!tmp || tmp > headers_bound)) {
-        ct = "text/plain";
-    }
-
     mail = apr_pcalloc(p, sizeof(mbox_mime_message_t));
     /* make sure the memory allocated by realloc() below is cleaned up */
     apr_pool_cleanup_register(p, mail, cleanup_mime_msg, apr_pool_cleanup_null);
 
+    if (!ct) {
+        /* If no Content-Type is provided, it means that we are parsing a
+         * sub-part of the multipart message. The Content-Type header
+         * should then be the first line of the part. If not, use
+         * text/plain as default for the sub-part.
+         */
+        tmp = ap_strstr(body, "Content-Type: ");
+        if (!tmp || tmp > headers_bound)
+            ct = "text/plain";
+    }
+
     /* If no Content-Type is given, we have to look for it. */
     if (!ct) {
         tmp += sizeof("Content-Type: ") - 1;
@@ -94,6 +122,9 @@ mbox_mime_message_t *mbox_mime_decode_mu
         mail->content_type = apr_pstrdup(p, tmp);
         *k = ';';
 
+        if (!charset)
+            charset = mbox_mime_get_charset(p, tmp, headers_bound);
+
         /* If available, get MIME part name */
         tmp = ap_strstr(body, "name=");
         if (tmp && tmp < headers_bound) {
@@ -124,7 +155,10 @@ mbox_mime_message_t *mbox_mime_decode_mu
     }
     else {
         mail->content_type = ct;
+        if (!charset)
+            charset = mbox_mime_get_charset(p, ct, ct + strlen(ct));
     }
+    mail->charset = charset;
 
     /* Now we have a Content-Type. Look for other useful header information */
 
@@ -265,7 +299,7 @@ mbox_mime_message_t *mbox_mime_decode_mu
             mail->sub =
                 realloc(mail->sub, ++count * sizeof(struct mimemsg *));
             mail->sub[count - 1] =
-                mbox_mime_decode_multipart(p, search, NULL, CTE_NONE, NULL);
+                mbox_mime_decode_multipart(p, search, NULL, NULL, CTE_NONE, NULL);
 
             /* If the boudary is found again, it means we have another
                MIME part in the same multipart message. Set the new
@@ -330,7 +364,6 @@ char *mbox_mime_decode_body(apr_pool_t *
     return new_body;
 }
 
-
 /* This function returns the relevant MIME part from a message. For
  * the moment, it just returns the first text/ MIME part available.
  */
@@ -346,17 +379,31 @@ char *mbox_mime_get_body(apr_pool_t *p, 
 
     if (strncasecmp(m->content_type, "text/", strlen("text/")) == 0) {
         char *new_body;
-
-        new_body =
-            mbox_mime_decode_body(p, m->cte, m->body, m->body_len, NULL);
+        apr_size_t new_len;
+        new_body = mbox_mime_decode_body(p, m->cte, m->body, m->body_len,
+                                         &new_len);
         if (!new_body) {
             return MBOX_FETCH_ERROR_STR;
         }
 
-        m->body_len =
-            mbox_cte_escape_html(p, new_body, m->body_len, &(m->body));
+        if (m->charset) {
+            struct ap_varbuf vb;
+            apr_status_t rv;
+            ap_varbuf_init(p, &vb, 0);
+            vb.strlen = 0;
+            if ((rv = mbox_cte_convert_to_utf8(p, m->charset, new_body, new_len, &vb))
+                == APR_SUCCESS) {
+                new_body = vb.buf;
+                new_len = vb.strlen;
+            }
+            else {
+                ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
+                             "conversion from '%s' to utf-8 failed", m->charset);
+            }
+        }
 
-        return apr_pstrndup(p, m->body, m->body_len);
+        mbox_cte_escape_html(p, new_body, new_len, &new_body);
+        return new_body;
     }
 
     if (!m->sub) {

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c?rev=1226511&r1=1226510&r2=1226511&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c Mon Jan  2 18:50:27
2012
@@ -81,6 +81,7 @@ static void display_atom_entry(request_r
     /* Parse multipart information */
     m->mime_msg = mbox_mime_decode_multipart(pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     ap_rprintf(r, "%s",
@@ -995,6 +996,7 @@ int mbox_raw_message(request_rec *r, apr
        subpart */
     m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     mime_part = m->mime_msg;
@@ -1136,6 +1138,7 @@ int mbox_static_message(request_rec *r, 
     /* Parse multipart information */
     m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     send_page_header(r,
@@ -1246,6 +1249,7 @@ apr_status_t mbox_xml_message(request_re
     /* Parse multipart information */
     m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
                                              m->content_type,
+                                             m->charset,
                                              m->cte, m->boundary);
 
     ap_rputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n", r);



Mime
View raw message