httpd-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From s.@apache.org
Subject svn commit: r1325648 - in /httpd/mod_mbox/branches/convert-charsets/module-2.0: mod_mbox.h mod_mbox_mime.c mod_mbox_out.c
Date Fri, 13 Apr 2012 08:10:00 GMT
Author: sf
Date: Fri Apr 13 08:10:00 2012
New Revision: 1325648

URL: http://svn.apache.org/viewvc?rev=1325648&view=rev
Log:
Rework mbox_mime_decode_multipart() and mbox_mime_get_body()

- Add debug/trace logging, to get a hint at the problem if we get
  MBOX_FETCH_ERROR_STR. Pass request_rec to allow proper logging scope.
- Move duplicated code to extract header lines and parameters
  into separate functions.
- Add proper handling of quoted words in header parameters.
- Avoid changing strings just to be able to strdup sub-strings. Use
  apr_strndup/apr_strmemdup instead.
- Fix skipping of headers in multi-part parts.

This fixes a bunch of cases where we only sent
"An error occured while fetching this message, sorry !"

Modified:
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c
    httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h?rev=1325648&r1=1325647&r2=1325648&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox.h Fri Apr 13 08:10:00 2012
@@ -128,14 +128,14 @@ apr_status_t mbox_cte_convert_to_utf8(ap
                                       struct ap_varbuf *vb);
 
 /* MIME decoding functions */
-mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p,
+mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p,
                                                 char *body, char *ct,
                                                 char *charset,
                                                 mbox_cte_e cte,
                                                 char *boundary);
 char *mbox_mime_decode_body(apr_pool_t *p, mbox_cte_e cte, char *body,
                             apr_size_t len, apr_size_t *ret_len);
-char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m);
+char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m);
 void mbox_mime_display_static_structure(request_rec *r,
                                         mbox_mime_message_t *m,
                                         char *link);

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c?rev=1325648&r1=1325647&r2=1325648&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_mime.c Fri Apr 13 08:10:00
2012
@@ -24,27 +24,89 @@
 APLOG_USE_MODULE(mbox);
 #endif
 
-static char *mbox_mime_get_charset(apr_pool_t *p, const char *ct, const char *limit)
+/**
+ * find certain header line, return copy of first part (up to first ";")
+ * @param p pool to allocate from
+ * @param name name of the header
+ * @param string input: pointer to pointer string where to find the header;
+ *        output: pointer to the ";" or "\n" after the copied value
+ * @param end pointer where to stop searching
+ * @note string must be NUL-terminated (but the NUL may be after *end)
+ * @return copy of the header value or NULL if not found
+ */
+static char *mbox_mime_get_header(apr_pool_t *p, const char *name,
+                                  char **string, const char *end)
+{
+    char *ptr;
+    int namelen = strlen(name);
+    for (ptr = *string;
+         ptr && *ptr && ptr < end ;
+         ptr = ap_strchr(ptr + 1, '\n'))
+    {
+        int l;
+        if (strncasecmp(ptr, name, namelen) != 0)
+            continue;
+        ptr += namelen;
+        if (*ptr != ':')
+            continue;
+        ptr++;
+        while (*ptr == ' ')
+            ptr++;
+        if (ptr >= end)
+            break;
+        l = strcspn(ptr, ";\n");
+        *string = ptr + l;
+        while (apr_isspace(ptr[l]) && l > 0)
+            l--;
+        return apr_pstrndup(p, ptr, l);
+    }
+    return NULL;
+}
+
+/**
+ * find value for parameter with certain name
+ * @param p pool to allocate from
+ * @param name name of the attribute
+ * @param string string with name=value pairs separated by ";",
+ *        value may be a quoted string delimited by double quotes
+ * @param end pointer where to stop searching
+ * @note string must be NUL-terminated (but the NUL may be after *end)
+ * @return copy of the value, NULL if not found
+ */
+static char *mbox_mime_get_parameter(apr_pool_t *p, const char *name,
+                                     const char *string, const char *end)
 {
-    const char *ptr = ct;
-    while (ptr && *ptr && ptr < limit) {
+    const char *ptr = string;
+    int namelen = strlen(name);
+    while (ptr && *ptr && ptr < end) {
+        int have_match = 0;
+        const char *val_end;
         while (*ptr && apr_isspace(*ptr))
             ptr++;
-        if (strncasecmp(ptr, "charset", 7) == 0) {
-            ptr += 7;
-            while (*ptr && apr_isspace(*ptr) && ptr < limit)
+        if (strncasecmp(ptr, name, namelen) == 0) {
+            ptr += strlen(name);
+            while (*ptr && apr_isspace(*ptr) && ptr < end)
                 ptr++;
             if (*ptr == '=') {
-                const char *end;
-                while (*ptr && apr_isspace(*ptr) && ptr < limit)
+                have_match = 1;
+                ptr++;
+                if (ptr >= end)
+                    break;
+                while (*ptr && apr_isspace(*ptr) && ptr < end)
                     ptr++;
-                end = ap_strchr_c(ptr, ';');
-                if (!end || end > limit)
-                    end = limit;
-                return apr_pstrmemdup(p, ptr, end - ptr);
             }
         }
-        ptr = ap_strchr_c(ptr, ';');
+        if (!have_match)
+            ptr += strcspn(ptr, "= \t");
+        if (*ptr == '"')
+            val_end = ap_strchr_c(++ptr, '"');
+        else
+            val_end = ptr + strcspn(ptr, ";\n ");
+        if (!val_end || val_end > end)
+            val_end = end;
+        if (have_match)
+            return apr_pstrmemdup(p, ptr, val_end - ptr);
+        ptr = val_end + 1;
     }
     return NULL;
 }
@@ -59,15 +121,17 @@ static apr_status_t cleanup_mime_msg(voi
 /* Decode a multipart (or not) email. In order to support multiple
  * levels of MIME parts, this function is recursive.
  */
-mbox_mime_message_t *mbox_mime_decode_multipart(apr_pool_t *p, char *body,
+mbox_mime_message_t *mbox_mime_decode_multipart(request_rec *r, apr_pool_t *p, char *body,
                                                 char *ct, char *charset,
                                                 mbox_cte_e cte, char *boundary)
 {
     mbox_mime_message_t *mail;
-    char *tmp = NULL, *k = NULL, *end_bound = NULL;
+    char *tmp = NULL, *end_bound = NULL;
     char *headers_bound = NULL;
 
     if (!body) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_decode_multipart: no body");
         return NULL;
     }
 
@@ -75,6 +139,8 @@ mbox_mime_message_t *mbox_mime_decode_mu
     if (!ct) {
         headers_bound = ap_strstr(body, "\n\n");
         if (!headers_bound) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                          "no '\\n\\n' header separator found");
             return NULL;
         }
     }
@@ -92,140 +158,49 @@ mbox_mime_message_t *mbox_mime_decode_mu
          * should then be the first line of the part. If not, use
          * text/plain as default for the sub-part.
          */
-        tmp = ap_strstr(body, "Content-Type: ");
-        if (!tmp || tmp > headers_bound)
+        tmp = body;
+        ct = mbox_mime_get_header(p, "Content-Type", &tmp, headers_bound);
+        if (!ct) {
             ct = "text/plain";
-    }
-
-    /* If no Content-Type is given, we have to look for it. */
-    if (!ct) {
-        tmp += sizeof("Content-Type: ") - 1;
-        k = strchr(tmp, ';');
-
-        /* Isolate the Content-Type string (between 'Content-Type: '
-           and ';' or end of line */
-        if (k && k < headers_bound) {
-            *k = 0;
         }
         else {
-            k = tmp;
-            while (*k) {
-                if (isspace(*k)) {
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-        }
-
-        /* Copy the Content-Type and reset *k */
-        mail->content_type = apr_pstrdup(p, tmp);
-        *k = ';';
-
-        if (!charset)
-            charset = mbox_mime_get_charset(p, tmp, headers_bound);
-
-        /* If available, get MIME part name */
-        tmp = ap_strstr(body, "name=");
-        if (tmp && tmp < headers_bound) {
-            char c = '\0';
-            tmp += sizeof("name=") - 1;
-            k = tmp;
-
-            while (*k) {
-                if (isspace(*k) || *k == ';') {
-                    c = *k;
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-
-            /* Check for double quotes */
-            if ((*tmp == '"') && (tmp[strlen(tmp) - 1] == '"')) {
-                mail->content_name =
-                    apr_pstrndup(p, tmp + 1, strlen(tmp) - 2);
-            }
-            else {
-                mail->content_name = apr_pstrdup(p, tmp);
-            }
-
-            *k = c;
+            if (!charset)
+                charset = mbox_mime_get_parameter(p, "charset", tmp, headers_bound);
+            mail->content_name = mbox_mime_get_parameter(p, "name", tmp, headers_bound);
         }
+        mail->content_type = ct;
     }
     else {
         mail->content_type = ct;
         if (!charset)
-            charset = mbox_mime_get_charset(p, ct, ct + strlen(ct));
+            charset = mbox_mime_get_parameter(p, "charset", ct, ct + strlen(ct));
     }
     mail->charset = charset;
 
     /* Now we have a Content-Type. Look for other useful header information */
 
     /* Check Content-Disposition if the match is within the headers */
-    tmp = ap_strstr(body, "Content-Disposition: ");
-    if (tmp && tmp < headers_bound) {
-        char c = '\0';
-        tmp += sizeof("Content-Disposition: ") - 1;
-        k = tmp;
-
-        while (*k) {
-            if (isspace(*k) || *k == ';') {
-                c = *k;
-                *k = 0;
-                break;
-            }
-            k++;
-        }
-
-        /* Copy the Content-Disposition and reset *k */
-        mail->content_disposition = apr_pstrdup(p, tmp);
-        *k = c;
-    }
-    else {
-        mail->content_disposition = apr_pstrdup(p, "inline");
-    }
+    tmp = body;
+    mail->content_disposition = mbox_mime_get_header(p, "Content-Disposition", &tmp,
headers_bound);
+    if (!mail->content_disposition)
+        mail->content_disposition = "inline";
 
     /* Check Content-Transfer-Encoding, if needed */
     if (cte == CTE_NONE) {
-        tmp = ap_strstr(body, "Content-Transfer-Encoding: ");
-        if (tmp && tmp < headers_bound) {
-            char c = '\0';
-            tmp += sizeof("Content-Transfer-Encoding: ") - 1;
-            k = tmp;
-
-            while (*k) {
-                if (isspace(*k) || *k == ';') {
-                    c = *k;
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-
-            /* Copy the Content-Disposition and reset *k */
+        tmp = body;
+        tmp = mbox_mime_get_header(p, "Content-Transfer-Encoding", &tmp, headers_bound);
+        if (tmp)
             mail->cte = mbox_parse_cte_header(tmp);
-            *k = c;
-        }
     }
     else {
         mail->cte = cte;
     }
 
-    /* Now we have all the headers we need. Start processing the
-       body. If the Content-Type was given at call time, the body
-       starts where it's given. Otherwise it's after the headers
-       (first new empty line) */
-
-    if (ct) {
+    /* Now we have all the headers we need. Start processing the body */
+    if (headers_bound == body)
         mail->body = body;
-    }
-    else {
-        mail->body = ap_strstr(body, "\n\n");
-        if (mail->body != NULL) {
-            mail->body += 2;
-        }
-    }
+    else
+        mail->body = headers_bound + 2; /* skip double new line */
 
     /* If the mail is a multipart message, search for the boundary,
        and process its sub parts by recursive calls. */
@@ -235,30 +210,16 @@ mbox_mime_message_t *mbox_mime_decode_mu
 
         /* If the boundary was not given, we must look for it in the headers */
         if (!boundary) {
-            tmp = ap_strstr(body, "boundary=\"");
-            if (!tmp) {
+            boundary = mbox_mime_get_parameter(p, "boundary", body, headers_bound);
+            if (!boundary) {
+                ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                              "invalid multipart message: no boundary defined");
                 return NULL;
             }
-
-            tmp += sizeof("boundary=\"") - 1;
-            k = tmp;
-
-            while (*k) {
-                if (*k == '"') {
-                    *k = 0;
-                    break;
-                }
-                k++;
-            }
-
-            mail->boundary = apr_pstrdup(p, tmp);
-            *k = '"';
-        }
-
-        /* Otherwise, the boundary is as given to us */
-        else {
-            mail->boundary = boundary;
         }
+        mail->boundary = boundary;
+        ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+                      "decoding multipart message: boundary %s", boundary);
 
         /* Now we have our boundary string. We must : look for it once
            (begining of MIME part) and then look for the end boundary :
@@ -298,8 +259,10 @@ mbox_mime_message_t *mbox_mime_decode_mu
             /* Allocate a new pointer for the sub part, and parse it. */
             mail->sub =
                 realloc(mail->sub, ++count * sizeof(struct mimemsg *));
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
+                          "decoding part %d", count);
             mail->sub[count - 1] =
-                mbox_mime_decode_multipart(p, search, NULL, NULL, CTE_NONE, NULL);
+                mbox_mime_decode_multipart(r, p, search, NULL, NULL, CTE_NONE, NULL);
 
             /* If the boudary is found again, it means we have another
                MIME part in the same multipart message. Set the new
@@ -320,6 +283,9 @@ mbox_mime_message_t *mbox_mime_decode_mu
 
         /* Finally reset the end-body pointer. */
         //      *tmp = '-';
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "done decoding multipart message (boundary %s)",
+                      boundary);
     }
 
     /* If the parsed body is not multipart or is a MIME part, the body
@@ -367,13 +333,16 @@ char *mbox_mime_decode_body(apr_pool_t *
 /* This function returns the relevant MIME part from a message. For
  * the moment, it just returns the first text/ MIME part available.
  */
-char *mbox_mime_get_body(apr_pool_t *p, mbox_mime_message_t *m)
+char *mbox_mime_get_body(request_rec *r, apr_pool_t *p, mbox_mime_message_t *m)
 {
     int i;
 
     /* If the message structure or the message body is empty, just
        return NULL */
     if (!m || !m->body) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_get_body: %s",
+                      m == NULL ? "no message???" : "no body");
         return MBOX_FETCH_ERROR_STR;
     }
 
@@ -383,6 +352,8 @@ char *mbox_mime_get_body(apr_pool_t *p, 
         new_body = mbox_mime_decode_body(p, m->cte, m->body, m->body_len,
                                          &new_len);
         if (!new_body) {
+            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                          "mbox_mime_get_body: could not decode body");
             return MBOX_FETCH_ERROR_STR;
         }
 
@@ -391,6 +362,9 @@ char *mbox_mime_get_body(apr_pool_t *p, 
             apr_status_t rv;
             ap_varbuf_init(p, &vb, 0);
             vb.strlen = 0;
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
+                          "mbox_mime_get_body: converting %d bytes from %s",
+                          new_len, m->charset);
             if ((rv = mbox_cte_convert_to_utf8(p, m->charset, new_body, new_len, &vb))
                 == APR_SUCCESS) {
                 new_body = vb.buf;
@@ -400,6 +374,8 @@ char *mbox_mime_get_body(apr_pool_t *p, 
                 ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
                              "conversion from '%s' to utf-8 failed", m->charset);
             }
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE4, 0, r,
+                          "mbox_mime_get_body: conversion done");
         }
 
         mbox_cte_escape_html(p, new_body, new_len, &new_body);
@@ -407,13 +383,21 @@ char *mbox_mime_get_body(apr_pool_t *p, 
     }
 
     if (!m->sub) {
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_get_body: message not text/* and no sub parts");
         return MBOX_FETCH_ERROR_STR;
     }
 
     for (i = 0; i < m->sub_count; i++) {
-        return mbox_mime_get_body(p, m->sub[i]);
+        /* XXX this loop is bullshit, should check result of mbox_mime_get_body()  */
+        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                      "mbox_mime_get_body: choosing m->sub[%d]", i);
+        return mbox_mime_get_body(r, p, m->sub[i]);
     }
 
+    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
+                  "m->sub != NULL but m->subcount == 0 ???");
+
     return MBOX_FETCH_ERROR_STR;
 }
 

Modified: httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c
URL: http://svn.apache.org/viewvc/httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c?rev=1325648&r1=1325647&r2=1325648&view=diff
==============================================================================
--- httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c (original)
+++ httpd/mod_mbox/branches/convert-charsets/module-2.0/mod_mbox_out.c Fri Apr 13 08:10:00
2012
@@ -79,13 +79,13 @@ static void display_atom_entry(request_r
 
     load_message(pool, f, m);
     /* Parse multipart information */
-    m->mime_msg = mbox_mime_decode_multipart(pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, pool, m->raw_body,
                                              m->content_type,
                                              m->charset,
                                              m->cte, m->boundary);
 
     ap_rprintf(r, "%s",
-               mbox_cntrl_escape(pool, mbox_wrap_text(mbox_mime_get_body(pool, m->mime_msg))));
+               mbox_cntrl_escape(pool, mbox_wrap_text(mbox_mime_get_body(r, pool, m->mime_msg))));
 
     ap_rputs("\n</pre>\n</div>\n</content>\n", r);
     ap_rputs("</entry>\n", r);
@@ -994,7 +994,7 @@ int mbox_raw_message(request_rec *r, apr
 
     /* First, parse the MIME structure, and look for the correct
        subpart */
-    m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
                                              m->content_type,
                                              m->charset,
                                              m->cte, m->boundary);
@@ -1136,7 +1136,7 @@ int mbox_static_message(request_rec *r, 
     }
 
     /* Parse multipart information */
-    m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
                                              m->content_type,
                                              m->charset,
                                              m->cte, m->boundary);
@@ -1202,7 +1202,7 @@ int mbox_static_message(request_rec *r, 
     /* Message body */
     ap_rputs("   <tr class=\"contents\"><td colspan=\"2\"><pre>\n", r);
     ap_rprintf(r, "%s",
-               mbox_wrap_text(mbox_mime_get_body(r->pool, m->mime_msg)));
+               mbox_wrap_text(mbox_mime_get_body(r, r->pool, m->mime_msg)));
     ap_rputs("</pre></td></tr>\n", r);
 
     /* MIME structure */
@@ -1248,7 +1248,7 @@ apr_status_t mbox_xml_message(request_re
     }
 
     /* Parse multipart information */
-    m->mime_msg = mbox_mime_decode_multipart(r->pool, m->raw_body,
+    m->mime_msg = mbox_mime_decode_multipart(r, r->pool, m->raw_body,
                                              m->content_type,
                                              m->charset,
                                              m->cte, m->boundary);
@@ -1273,7 +1273,7 @@ apr_status_t mbox_xml_message(request_re
                ESCAPE_OR_BLANK(r->pool, m->rfc822_date));
 
     ap_rprintf(r, "%s",
-               mbox_cntrl_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r->pool,
m->mime_msg))));
+               mbox_cntrl_escape(r->pool, mbox_wrap_text(mbox_mime_get_body(r, r->pool,
m->mime_msg))));
     ap_rputs("]]></contents>\n", r);
     ap_rputs(" <mime>\n", r);
     mbox_mime_display_xml_structure(r, m->mime_msg,



Mime
View raw message