Return-Path: Delivered-To: new-httpd-archive@hyperreal.org Received: (qmail 29653 invoked by uid 6000); 8 Dec 1999 17:15:22 -0000 Received: (qmail 29030 invoked from network); 8 Dec 1999 17:14:39 -0000 Received: from i.meepzor.com (HELO Mail.MeepZor.Com) (root@204.146.167.214) by taz.hyperreal.org with SMTP; 8 Dec 1999 17:14:39 -0000 Received: from Golux.Com (ss04.nc.us.ibm.com [32.97.136.234]) by Mail.MeepZor.Com (8.8.7/8.8.7) with ESMTP id MAA15420; Wed, 8 Dec 1999 12:14:13 -0500 Message-ID: <384E91DF.D92FABC2@Golux.Com> Date: Wed, 08 Dec 1999 12:14:07 -0500 From: Rodent of Unusual Size Organization: The Apache Software Foundation X-Mailer: Mozilla 4.06 [en] (WinNT; U) MIME-Version: 1.0 To: Apache Developers Subject: [PATCH] enhance mod_mime with AddCharset Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Sender: new-httpd-owner@apache.org Precedence: bulk Reply-To: new-httpd@apache.org Status: O This is something that's been requested forever, and was submitted by Youichirou Koga a while ago. Martin Duerst has re-opened the request to add it for 1.3. I'm +1 after preliminary testing. There's docco to go with it, but it's not in this patch. -- #ken P-)} Ken Coar Apache Software Foundation "Apache Server for Dummies" Index: src/modules/standard/mod_mime.c =================================================================== RCS file: /home/cvs/apache-1.3/src/modules/standard/mod_mime.c,v retrieving revision 1.49 diff -u -r1.49 mod_mime.c --- mod_mime.c 1999/04/20 17:27:51 1.49 +++ mod_mime.c 1999/12/08 17:10:24 @@ -75,6 +75,7 @@ typedef struct { table *forced_types; /* Additional AddTyped stuff */ table *encoding_types; /* Added with AddEncoding... */ + table *charset_types; /* Added with AddCharset... */ table *language_types; /* Added with AddLanguage... */ table *handlers; /* Added with AddHandler... */ array_header *handlers_remove; /* List of handlers to remove */ @@ -84,6 +85,24 @@ char *default_language; /* Language if no AddLanguage ext found */ } mime_dir_config; +typedef struct param_s { + char *attr; + char *val; + struct param_s *next; +} param; + +typedef struct { + char *type; + char *subtype; + param *param; +} content_type; + +static char tspecial[] = { + '(', ')', '<', '>', '@', ',', ';', ':', + '\\', '"', '/', '[', ']', '?', '=', + '\0' +}; + module MODULE_VAR_EXPORT mime_module; static void *create_mime_dir_config(pool *p, char *dummy) @@ -93,6 +112,7 @@ new->forced_types = ap_make_table(p, 4); new->encoding_types = ap_make_table(p, 4); + new->charset_types = ap_make_table(p, 4); new->language_types = ap_make_table(p, 4); new->handlers = ap_make_table(p, 4); new->handlers_remove = ap_make_array(p, 4, sizeof(handlers_info)); @@ -119,9 +139,11 @@ } new->forced_types = ap_overlay_tables(p, add->forced_types, - base->forced_types); + base->forced_types); new->encoding_types = ap_overlay_tables(p, add->encoding_types, base->encoding_types); + new->charset_types = ap_overlay_tables(p, add->charset_types, + base->charset_types); new->language_types = ap_overlay_tables(p, add->language_types, base->language_types); new->handlers = ap_overlay_tables(p, add->handlers, @@ -155,11 +177,23 @@ return NULL; } -static const char *add_language(cmd_parms *cmd, mime_dir_config * m, char *lang, +static const char *add_charset(cmd_parms *cmd, mime_dir_config *m, + char *charset, char *ext) +{ + if (*ext == '.') { + ++ext; + } + ap_str_tolower(charset); + ap_table_setn(m->charset_types, ext, charset); + return NULL; +} + +static const char *add_language(cmd_parms *cmd, mime_dir_config *m, char *lang, char *ext) { - if (*ext == '.') - ++ext; + if (*ext == '.') { + ++ext; + } ap_str_tolower(lang); ap_table_setn(m->language_types, ext, lang); return NULL; @@ -209,6 +243,8 @@ "a mime type followed by one or more file extensions"}, {"AddEncoding", add_encoding, NULL, OR_FILEINFO, ITERATE2, "an encoding (e.g., gzip), followed by one or more file extensions"}, + {"AddCharset", add_charset, NULL, OR_FILEINFO, ITERATE2, + "a charset (e.g., iso-2022-jp), followed by one or more file extensions"}, {"AddLanguage", add_language, NULL, OR_FILEINFO, ITERATE2, "a language (e.g., fr), followed by one or more file extensions"}, {"AddHandler", add_handler, NULL, OR_FILEINFO, ITERATE2, @@ -275,6 +311,257 @@ ap_cfg_closefile(f); } +char *zap_sp(char *s) +{ + char *tp; + + if (s == NULL) { + return (NULL); + } + if (*s == '\0') { + return (s); + } + + /* delete prefixed white space */ + for (; *s == ' ' || *s == '\t' || *s == '\n'; s++); + + /* delete postfixed white space */ + for (tp = s; *tp != '\0'; tp++); + for (tp--; tp != s && (*tp == ' ' || *tp == '\t' || *tp == '\n'); tp--) { + *tp = '\0'; + } + return (s); +} + +int is_token(int c) +{ + int res = -1; + int i; + + if (c >= 33 && c <= 127) { + res = 1; + for (i = 0; tspecial[i] != '\0'; i++) { + if (c == tspecial[i]) { + res = -1; + break; + } + } + } + return (res); +} + +int is_qtext(int c) +{ + int res = -1; + + if ((c >= 0 && c <= 127) && !(c == '"' || c == '\\' || c == '\n')) { + res = 1; + } + return (res); +} + +int is_quoted_pair(char *s) +{ + int res = -1; + int c; + + if (((s + 1) != NULL) && (*s == '\\')) { + c = (int) *(s + 1); + if (c >= 0 && c <= 127) { + res = 1; + } + } + return (res); +} + +content_type *analyze_ct(pool * p, char *s) +{ + char *tp, *mp, *cp; + char *attribute, *value; + int quoted = 0; + + content_type *ctp; + param *pp, *npp; + + /* initialize ctp */ + ctp = (content_type *) ap_palloc(p, sizeof(content_type)); + ctp->type = NULL; + ctp->subtype = NULL; + ctp->param = NULL; + + tp = ap_pstrdup(p, s); + + mp = tp; + cp = mp; + + /* getting a type */ + if (!(cp = strchr(mp, '/'))) { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media type."); + return (NULL); + } + ctp->type = ap_pstrndup(p, mp, cp - mp); + ctp->type = zap_sp(ctp->type); + if (ctp->type == NULL || *(ctp->type) == '\0' || + strchr(ctp->type, ';') || strchr(ctp->type, ' ') || + strchr(ctp->type, '\t')) { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media subtype."); + return (NULL); + } + + /* getting a subtype */ + cp++; + mp = cp; + + for (; *cp != ';' && *cp != '\0'; cp++); + ctp->subtype = ap_pstrndup(p, mp, cp - mp); + ctp->subtype = zap_sp(ctp->subtype); + if ((ctp->subtype == NULL) || (*(ctp->subtype) == '\0') || + strchr(ctp->subtype, ' ') || strchr(ctp->subtype, '\t')) { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media subtype."); + return (NULL); + } + cp = zap_sp(cp); + if (cp == NULL || *cp == '\0') { + return (ctp); + } + + /* getting parameters */ + cp++; + cp = zap_sp(cp); + if (cp == NULL || *cp == '\0') { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + mp = cp; + attribute = NULL; + value = NULL; + + while (cp != NULL && *cp != '\0') { + if (attribute == NULL) { + if (is_token((int) *cp) > 0) { + cp++; + continue; + } + else if (*cp == ' ' || *cp == '\t' || *cp == '\n') { + cp++; + continue; + } + else if (*cp == '=') { + attribute = ap_pstrndup(p, mp, cp - mp); + attribute = zap_sp(attribute); + if (attribute == NULL || *attribute == '\0') { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + cp++; + cp = zap_sp(cp); + if (cp == NULL || *cp == '\0') { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + mp = cp; + continue; + } + else { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + } + else { + if (mp == cp) { + if (*cp == '"') { + quoted = 1; + cp++; + } + else { + quoted = 0; + } + } + if (quoted > 0) { + while (quoted && *cp != '\0') { + if (is_qtext((int) *cp) > 0) { + cp++; + } + else if (is_quoted_pair(cp) > 0) { + cp += 2; + } + else if (*cp == '"') { + cp++; + while (*cp == ' ' || *cp == '\t' || *cp == '\n') { + cp++; + } + if (*cp != ';' && *cp != '\0') { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return(NULL); + } + quoted = 0; + } + else { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + } + } + else { + while (1) { + if (is_token((int) *cp) > 0) { + cp++; + } + else if (*cp == '\0' || *cp == ';') { + break; + } + else { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + } + } + value = ap_pstrndup(p, mp, cp - mp); + value = zap_sp(value); + if (value == NULL || *value == '\0') { + ap_log_error(APLOG_MARK, APLOG_WARNING, NULL, + "Cannot get media parameter."); + return (NULL); + } + + pp = ap_palloc(p, sizeof(param)); + pp->attr = attribute; + pp->val = value; + pp->next = NULL; + + if (ctp->param == NULL) { + ctp->param = pp; + } + else { + npp = ctp->param; + while (npp->next) { + npp = npp->next; + } + npp->next = pp; + } + quoted = 0; + attribute = NULL; + value = NULL; + if (*cp == '\0') { + break; + } + cp++; + mp = cp; + } + } + return (ctp); +} + static int find_ct(request_rec *r) { const char *fn = strrchr(r->filename, '/'); @@ -283,6 +570,7 @@ char *ext; const char *orighandler = r->handler; const char *type; + const char *charset; if (S_ISDIR(r->finfo.st_mode)) { r->content_type = DIR_MAGIC_TYPE; @@ -308,6 +596,12 @@ found = 1; } + /* Add charset to Content-Type */ + if ((type = ap_table_get(conf->charset_types, ext))) { + charset = type; + found = 1; + } + /* Check for Content-Language */ if ((type = ap_table_get(conf->language_types, ext))) { const char **new; @@ -347,8 +641,46 @@ r->content_languages = NULL; r->content_encoding = NULL; r->handler = orighandler; - } + charset = NULL; + } + } + if (r->content_type) { + content_type *ctp; + char *ct; + int override = 0; + + ct = (char *) ap_palloc(r->pool, + sizeof(char) * (strlen(r->content_type) + 1)); + strcpy(ct, r->content_type); + + if ((ctp = analyze_ct(r->pool, ct))) { + param *pp = ctp->param; + r->content_type = ap_pstrcat(r->pool, ctp->type, "/", + ctp->subtype, NULL); + while (pp != NULL) { + if (charset && !strcmp(pp->attr, "charset")) { + if (!override) { + r->content_type = ap_pstrcat(r->pool, r->content_type, + "; charset=", charset, + NULL); + override = 1; + } + } + else { + r->content_type = ap_pstrcat(r->pool, r->content_type, + "; ", pp->attr, + "=", pp->val, + NULL); + } + pp = pp->next; + } + if (charset && !override) { + r->content_type = ap_pstrcat(r->pool, r->content_type, + "; charset=", charset, + NULL); + } + } } /* Set default language, if none was specified by the extensions