httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Jim Jagielski <...@jaguNET.com>
Subject [STATUS] 1.3b1 Tue Sep 2 19:10:39 EDT 1997
Date Tue, 02 Sep 1997 23:19:47 GMT

-----------------------------------------------
1.3b1 status as of Tue Sep  2 19:10:39 EDT 1997
-----------------------------------------------

The Schedule
============

   o We are currently in Feature Freeze
   o Release of 1.3b1 on 9/9/97
   o We will be releasing the apachen CVS tree code
   o Will we run the code thru indent first??

The Code Changes
================

The Patches
===========

   o) Marc's proxy-CONNECT patch (see below):
       +1 Randy, Marc, Martin, Jim
   o) Paul's "library" update to Configuration suit:
       +1 Paul, Jim, Dean
   o) Makefile.tmpl: change `/bin/date` to `date`:
       +1 Marc, Jim
   o) mod_speling (see below):
       +1 Martin (Dean, Paul, and Alexei had voted +1 on previous versions)

The Plan
========

   o) Someone commit Marc's proxy-CONNEXT patch
   o) Paul commit his Configuration patch
   o) Jim commit the /bin/date patch unless people complain
   o) People vote on mod_speling


Agenda for 1.3.b2
====================


START OF PATCHES
================

------------
PROXY-PATCH
------------

diff -r -u apachen.orig/src/modules/proxy/mod_proxy.c apachen/src/modules/proxy/mod_proxy.c
--- apachen.orig/src/modules/proxy/mod_proxy.c     Mon Sep  1 09:00:35 1997
+++ apachen/src/modules/proxy/mod_proxy.c  Mon Sep  1 09:42:44 1997
@@ -331,8 +331,14 @@
 	    (p != NULL &&
 	       strncmp(url, ents[i].scheme, strlen(ents[i].scheme)) == 0))
 	{
+	    /* CONNECT is a special method that bypasses the normal
+	     * proxy code.
+	     */
+	    if (r->method_number == M_CONNECT)
+		rc = proxy_connect_handler(r, cr, url, ents[i].hostname,
+		    ents[i].port);
 /* we only know how to handle communication to a proxy via http */
-	    if (strcmp(ents[i].protocol, "http") == 0)
+	    else if (strcmp(ents[i].protocol, "http") == 0)
 		rc = proxy_http_handler(r, cr, url, ents[i].hostname,
 		    ents[i].port);
 	    else rc = DECLINED;
@@ -349,7 +355,7 @@
  */
     /* handle the scheme */
     if (r->method_number == M_CONNECT)
-	return proxy_connect_handler(r, cr, url);
+	return proxy_connect_handler(r, cr, url, NULL, 0);
     if (strcmp(scheme, "http") == 0)
 	return proxy_http_handler(r, cr, url, NULL, 0);
     if (strcmp(scheme, "ftp") == 0)
diff -r -u apachen.orig/src/modules/proxy/mod_proxy.h apachen/src/modules/proxy/mod_proxy.h
--- apachen.orig/src/modules/proxy/mod_proxy.h     Mon Aug 25 00:01:57 1997
+++ apachen/src/modules/proxy/mod_proxy.h  Mon Aug 25 10:26:36 1997
@@ -237,7 +237,8 @@
 
 /* proxy_connect.c */
 
-int proxy_connect_handler(request_rec *r, struct cache_req *c, char *url);
+int proxy_connect_handler(request_rec *r, struct cache_req *c, char *url, 
+    const char *proxyhost, int proxyport);
 
 /* proxy_ftp.c */
 
diff -r -u apachen.orig/src/modules/proxy/proxy_connect.c apachen/src/modules/proxy/proxy_connect.c
--- apachen.orig/src/modules/proxy/proxy_connect.c Mon Sep  1 09:00:35 1997
+++ apachen/src/modules/proxy/proxy_connect.c      Mon Sep  1 09:42:45 1997
@@ -71,6 +71,9 @@
  * "Tunneling SSL Through a WWW Proxy" currently at
  * http://www.mcom.com/newsref/std/tunneling_ssl.html.
  *
+ * If proxyhost and proxyport are set, we send a CONNECT to 
+ * the specified proxy..  
+ *
  * FIXME: this is bad, because it does its own socket I/O
  *        instead of using the I/O in buff.c.  However,
  *        the I/O in buff.c blocks on reads, and because
@@ -90,7 +93,8 @@
  */ 
  
 int
-proxy_connect_handler(request_rec *r, struct cache_req *c, char *url)
+proxy_connect_handler(request_rec *r, struct cache_req *c, char *url,
+    const char *proxyhost, int proxyport)
 {
     struct sockaddr_in server;
     struct in_addr destaddr;
@@ -140,10 +144,14 @@
 	    return HTTP_SERVICE_UNAVAILABLE;
     }
 
-    Explain2("CONNECT to %s on port %d", host, port);
+    if (proxyhost)
+	Explain2("CONNECT to remote proxy %s on port %d", proxyhost, proxyport);
+    else
+	Explain2("CONNECT to %s on port %d", host, port);
  
-    server.sin_port = htons(port);
-    err = proxy_host2addr(host, &server_hp);
+    server.sin_port = proxyport ? htons(proxyport) : htons(port);
+    err = proxy_host2addr(proxyhost ? proxyhost : host, &server_hp);
+
     if (err != NULL)
 	return proxyerror(r, err); /* give up */
  
@@ -169,11 +177,27 @@
         return proxyerror(r, "Could not connect to remote machine");
     }
  
-    Explain0("Returning 200 OK Status");
- 
-    rvputs(r, "HTTP/1.0 200 Connection established\015\012", NULL);
-    rvputs(r, "Proxy-agent: ", SERVER_VERSION, "\015\012\015\012", NULL);
-    bflush(r->connection->client);
+    /* If we are connecting through a remote proxy, we need to pass
+     * the CONNECT request on to it.
+     */
+    if (proxyport) {
+	/* FIXME: We should not be calling write() directly, but we currently
+	 * have no alternative.  Error checking ignored.  Also, we force
+	 * a HTTP/1.0 request to keep things simple.
+	 */
+	Explain0("Sending the CONNECT request to the remote proxy");
+	ap_snprintf(buffer, sizeof(buffer), "CONNECT %s HTTP/1.0\015\012", 
+	    r->uri); 
+	write(sock, buffer, strlen(buffer));
+	ap_snprintf(buffer, sizeof(buffer),
+	    "Proxy-agent: %s\015\012\015\012", SERVER_VERSION);
+	write(sock, buffer, strlen(buffer));
+    } else {
+	Explain0("Returning 200 OK Status");
+	rvputs(r, "HTTP/1.0 200 Connection established\015\012", NULL);
+	rvputs(r, "Proxy-agent: ", SERVER_VERSION, "\015\012\015\012", NULL);
+	bflush(r->connection->client);
+    }
 
     while (1) /* Infinite loop until error (one side closes the connection) */
     {


-----------------
MOD_SPELING PATCH
-----------------

diff -r -u ./src/Configuration.tmpl /home/martin/apachen/src/Configuration.tmpl
--- apachen.orig/src/Configuration.tmpl	Mon Sep  1 03:00:23 1997
+++ apachen/src/Configuration.tmpl	Mon Sep  1 09:37:55 1997
@@ -211,6 +214,15 @@
 ##
 ## URL translation modules.
 ##
+
+## The Speling module attemps to correct mispellings of URLs that
+## users might have entered, namely by checking capitalizations
+## or by allowing up to one misspelling (character insertion / omission /
+## transposition/typo). This catches the majority of misspelled requests.
+## If it finds a match, a "spelling corrected" redirection is returned.
+
+# AddModule modules/standard/mod_speling.o
+
 ## The UserDir module for selecting resource directories by user name
 ## and a common prefix, e.g., /~<user> , /usr/web/<user> , etc.
 
diff -r -u -N /dev/null apachen/htdocs/manual/mod/mod_speling.html
--- /dev/null	Tue Sep  2 12:35:02 1997
+++ apachen/htdocs/manual/mod/mod_speling.html	Tue Sep  2 11:45:45 1997
@@ -0,0 +1,88 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<HTML>
+ <HEAD>
+  <TITLE>Apache module mod_speling</TITLE>
+ </HEAD>
+<!-- Background white, links blue (unvisited), navy (visited), red (active) -->
+ <BODY
+  BGCOLOR="#FFFFFF"
+  TEXT="#000000"
+  LINK="#0000FF"
+  VLINK="#000080"
+  ALINK="#FF0000"
+ >
+<!--#include virtual="header.html" -->
+  <H1 ALIGN="CENTER">Module mod_speling</H1>
+  <P>
+  This module is contained in the <code>mod_speling.c</code> file,
+  and is <strong>not</strong> compiled in by default.
+  It attemps to correct mispellings of
+  URLs that users might have entered, by ignoring capitalization
+  and by allowing up to one misspelling.<br>
+  This catches the majority of misspelled requests. An automatic
+  "spelling corrected" redirection is returned if only one matching
+  document was found, and a list of matches is returned if more than
+  one document with a sufficiently similar name is found.
+  </P>
+
+  <h2>Summary</h2>
+  <p>
+  Requests to documents sometimes cannot be served by the core apache
+  server because the request was misspelled or miscapitalized. This
+  module addresses this problem by trying to find a matching document,
+  even after all other modules gave up. It does its work by comparing
+  each document name in the requested directory against the requested
+  document name <STRONG>without regard to case</STRONG>, and allowing
+  <STRONG>up to one misspelling</STRONG> (character insertion / omission
+  / transposition or wrong character). A list is built with all document
+  names which were matched using this strategy.
+  </p>
+  <p>
+  If, after scanning the directory,
+  <ul>
+   <li>no matching document was found, Apache will proceed as usual
+       and return a "document not found" error.
+   <li>only one document is found that "almost" matches the request,
+       then it is returned in the form of a redirection response.
+   <li>more than one document with a close match was found, then
+       the list of the matches is returned to the client, and the client
+       can select the correct candidate.
+  </ul>
+  </p>
+
+  <h2>Directives</h2>
+
+  <menu>
+  <li><A HREF="#checkspelling">CheckSpelling</A>
+  </menu>
+
+  <HR> <!-- the HR is part of the directive description -->
+  <A name="checkspelling"><h2>CheckSpelling</h2></A>
+  <!--%plaintext &lt;?INDEX {\tt CheckSpelling} directive&gt; -->
+  <strong>Syntax:</strong> CheckSpelling <em>on/off</em><br>
+  <strong>Default:</strong> <code>CheckSpelling Off</code><br>
+  <Strong>Context:</strong> server config, virtual host<br>
+  <strong>Status:</strong> Base<br>
+  <strong>Module:</strong> mod_speling<br>
+  <strong>Compatibility:</strong> CheckSpelling was available as a separately
+  available module for Apache 1.1, but was limited to miscapitalizations.
+  As of Apache 1.3, it is part of the apache distribution<!-- or:
+  available as a separate module-->.<p>
+
+  This directive enables or disables the spelling module. When enabled,
+  keep in mind that
+  <UL>
+  <LI>the directory scan which is necessary for the spelling
+      correction will have an impact on the server's performance
+      when many spelling corrections have to be performed at the same time.
+  <LI>the document trees should not contain sensitive files which could
+      be matched inadvertedly, by a spelling "correction".
+  <LI>the module is unable to correct misspelled user names
+      (as in <code>http://my.host/~apahce/</code>), just file names or
+      directory names.
+  </UL>
+
+<!--#include virtual="footer.html" -->
+ </BODY>
+</HTML>
+
diff -r -u -N /dev/null apachen/src/modules/standard/mod_speling.c
--- /dev/null	Tue Sep  2 12:35:02 1997
+++ apachen/src/modules/standard/mod_speling.c	Tue Sep  2 12:07:21 1997
@@ -0,0 +1,431 @@
+#define WANT_BASENAME_MATCH
+/* ====================================================================
+ * Copyright (c) 1996,1997 The Apache Group.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the Apache Group
+ *    for use in the Apache HTTP server project (http://www.apache.org/)."
+ *
+ * 4. The names "Apache Server" and "Apache Group" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission.
+ *
+ * 5. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the Apache Group
+ *    for use in the Apache HTTP server project (http://www.apache.org/)."
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE APACHE GROUP OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Group and was originally based
+ * on public domain software written at the National Center for
+ * Supercomputing Applications, University of Illinois, Urbana-Champaign.
+ * For more information on the Apache Group and the Apache HTTP server
+ * project, please see <http://www.apache.org/>.
+ *
+ */
+
+#include "httpd.h"
+#include "http_config.h"
+#include "http_log.h"
+
+/* mod_speling.c - by Alexei Kosut <akosut@organic.com> June, 1996
+ *
+ * This module is transparent, and simple. It attemps to correct
+ * mispellings of URLs that users might have entered, namely by checking
+ * capitalizations. If it finds a match, it sends a redirect.
+ *
+ * 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De>
+ * o Upgraded module interface to apache_1.3a2-dev API (more NULL's in speling_module).
+ * o Integrated tcsh's "spelling correction" routine which allows one
+ *   misspelling (character insertion/omission/typo/transposition).
+ *   Rewrote it to ignore case as well. This ought to catch the majority
+ *   of misspelled requests.
+ * o Commented out the second pass where files' suffixes are stripped.
+ *   Given the better hit rate of the first pass, this rather ugly
+ *   (request index.html, receive index.db ?!?!) solution can be
+ *   omitted.
+ * o wrote a "kind of" html page for mod_speling
+ *
+ * Activate it with "CheckSpelling On"
+ */
+
+module speling_module;
+
+/* We use the "unconventional" mod_userdir approach here. And heck,
+ * here it's just one int!
+ */
+
+static void *create_speling_config(pool * dummy, server_rec * s)
+{
+    return (void *) 0;
+}
+
+static const char *set_speling(cmd_parms * cmd, void *dummy, int arg)
+{
+    void *server_conf = cmd->server->module_config;
+
+    set_module_config(server_conf, &speling_module, (void *) arg);
+    return NULL;
+}
+
+command_rec speling_cmds[] =
+{
+    {"CheckSpelling", set_speling, NULL, RSRC_CONF, FLAG,
+       "whether or not to fix miscapitalized/misspelled requests"},
+    {NULL}
+};
+
+typedef enum {
+    SP_IDENTICAL = 0,
+    SP_MISCAPITALIZED = 1,
+    SP_TRANSPOSITION = 2,
+    SP_MISSINGCHAR = 3,
+    SP_EXTRACHAR = 4,
+    SP_SIMPLETYPO = 5,
+    SP_VERYDIFFERENT = 6
+} sp_reason;
+
+static const char *sp_reason_str[] =
+{
+    "identical",
+    "miscapitalized",
+    "transposed characters",
+    "character missing",
+    "extra character",
+    "mistyped character",
+    "common basename",
+};
+
+typedef struct {
+    const char *name;
+    sp_reason quality;
+} misspelled_file;
+
+/*
+ * spdist() is taken from Kernighan & Pike,
+ *  _The_UNIX_Programming_Environment_
+ * and adapted somewhat to correspond better to psychological reality.
+ * (Note the changes to the return values)
+ *
+ * According to Pollock and Zamora, CACM April 1984 (V. 27, No. 4),
+ * page 363, the correct order for this is:
+ * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION
+ * thus, it was exactly backwards in the old version. -- PWP
+ *
+ * This routine was taken out of tcsh's spelling correction code
+ * (tcsh-6.07.04) and re-converted to apache data types ("char" type
+ * instead of tcsh's NLS'ed "Char"). Plus it now ignores the case
+ * during comparisons, so is a "approximate strcasecmp()".
+ * NOTE that is still allows only _one_ real "typo",
+ * it does NOT try to correct multiple errors.
+ */
+
+static sp_reason spdist(const char *s, const char *t)
+{
+    for (; tolower(*s) == tolower(*t); t++, s++)
+	if (*t == '\0')
+	    return SP_MISCAPITALIZED;	/* exact match (sans case) */
+    if (*s) {
+	if (*t) {
+	    if (s[1] && t[1] && tolower(*s) == tolower(t[1]) &&
+	      tolower(*t) == tolower(s[1]) && strcasecmp(s + 2, t + 2) == 0)
+		return SP_TRANSPOSITION;	/* transposition */
+	    if (strcasecmp(s + 1, t + 1) == 0)
+		return SP_SIMPLETYPO;	/* 1 char mismatch */
+	}
+	if (strcasecmp(s + 1, t) == 0)
+	    return SP_EXTRACHAR;	/* extra character */
+    }
+    if (*t && strcasecmp(s, t + 1) == 0)
+	return SP_MISSINGCHAR;	/* missing character */
+    return SP_VERYDIFFERENT;	/* distance too large to fix. */
+}
+
+static int sort_by_quality(const void *left, const void *rite)
+{
+    return (int) (((misspelled_file *) left)->quality)
+	 - (int) (((misspelled_file *) rite)->quality);
+}
+
+static int check_speling(request_rec * r)
+{
+    void *server_conf = r->server->module_config;
+    char *good, *bad, *postgood, *url;
+    int filoc, dotloc, urlen, pglen;
+    DIR *dirp;
+    struct DIR_TYPE *dir_entry;
+    array_header *candidates = NULL;
+
+    if (!(int) get_module_config(server_conf, &speling_module))
+	return DECLINED;
+
+    /* We only want to worry about GETs */
+    if (r->method_number != M_GET)
+	return DECLINED;
+
+    /* We've already got a file of some kind or another */
+    if (r->proxyreq || (r->finfo.st_mode != 0))
+	return DECLINED;
+
+    /* This is a sub request - don't mess with it */
+    if (r->main)
+	return DECLINED;
+
+    /* The request should end up looking like this:
+     * r->uri: /correct-url/mispelling/more
+     * r->filename: /correct-file/mispelling r->path_info: /more
+     *
+     * So we do this in steps. First break r->filename into two peices
+     */
+
+    filoc = rind(r->filename, '/');
+    if (filoc == -1)
+	return DECLINED;
+
+    /* good = /correct-file */
+    good = pstrndup(r->pool, r->filename, filoc);
+    /* bad = mispelling */
+    bad = pstrdup(r->pool, r->filename + filoc + 1);
+    /* postgood = mispelling/more */
+    postgood = pstrcat(r->pool, bad, r->path_info, NULL);
+
+    urlen = strlen(r->uri);
+    pglen = strlen(postgood);
+
+    /* Check to see if the URL pieces add up */
+    if (strcmp(postgood, r->uri + (urlen - pglen)))
+	return DECLINED;
+
+    /* url = /correct-url */
+    url = pstrndup(r->pool, r->uri, (urlen - pglen));
+
+    /* Now open the directory and do ourselves a check... */
+    dirp = opendir(good);
+    if (dirp == NULL)		/* Oops, not a directory... */
+	return DECLINED;
+
+    candidates = make_array(r->pool, 2, sizeof(misspelled_file));
+
+    dotloc = ind(bad, '.');
+    if (dotloc == -1)
+	dotloc = strlen(bad);
+
+    while ((dir_entry = readdir(dirp))) {
+	sp_reason q;
+
+	/* If we end up with a "fixed" URL which is identical to the
+	 * requested one, we must have found a broken symlink or some such.
+	 * Do _not_ try to redirect this, it causes a loop!
+	 */
+	if (strcmp(bad, dir_entry->d_name) == 0)
+	{
+	    closedir(dirp);
+	    return OK;
+	}
+	/*
+	 * miscapitalization errors are checked first
+	 * (like, e.g., lower case file, upper case request)
+	 */
+	else if (strcasecmp(bad, dir_entry->d_name) == 0) {
+	    misspelled_file *sp_new = (misspelled_file *) push_array(candidates);
+	    sp_new->name = pstrdup(r->pool, dir_entry->d_name);
+	    sp_new->quality = SP_MISCAPITALIZED;
+	}
+	/*
+	 * simple typing errors are checked next
+	 * (like, e.g., missing/extra/transposed char)
+	 */
+	else if ((q = spdist(bad, dir_entry->d_name)) != SP_VERYDIFFERENT) {
+	    misspelled_file *sp_new = (misspelled_file *) push_array(candidates);
+	    sp_new->name = pstrdup(r->pool, dir_entry->d_name);
+	    sp_new->quality = q;
+	}
+	/* The spdist() should have found the majority of the misspelled requests.
+	 * it is of questionable use to continue looking for files with the same
+	 * base name, but potentially of totally wrong type (index.html <-> index.db)
+	 * I would propose to not set the WANT_BASENAME_MATCH define.
+	 *      08-Aug-1997 <Martin.Kraemer@Mch.SNI.De>
+	 *
+	 * However, Alexei replied giving some reasons to add it anyway:
+	 * > Oh, by the way, I remembered why having the
+	 * > extension-stripping-and-matching stuff is a good idea:
+	 * >
+	 * > If you're using MultiViews, and have a file named foobar.html, which you
+	 * > refer to as "foobar", and someone tried to access "Foobar", mod_speling
+	 * > won't find it, because it won't find anything matching that
+	 * > spelling. With the extension-munging, it would locate "foobar.html". Not
+	 * > perfect, but I ran into that problem when I first wrote the module.
+	 */
+	else {
+#ifdef WANT_BASENAME_MATCH
+	    /* Okay... we didn't find anything. Now we take out the hard-core
+	     * power tools. There are several cases here. Someone might have
+	     * entered a wrong extension (.htm instead of .html or vice versa)
+	     * or the document could be negotiated. At any rate, now we just compare
+	     * stuff before the first dot. If it matches, we figure we got us a
+	     * match. This can result in wrong things if there are files of
+	     * different content types but the same prefix (e.g. foo.gif and foo.html)
+	     * This code will pick the first one it finds. Better than a Not Found,
+	     * though.
+	     */
+	    int entloc = ind(dir_entry->d_name, '.');
+	    if (entloc == -1)
+		entloc = strlen(dir_entry->d_name);
+
+	    if ((dotloc == entloc)
+		&& !strncasecmp(bad, dir_entry->d_name, dotloc)) {
+		misspelled_file *sp_new = (misspelled_file *) push_array(candidates);
+		sp_new->name = pstrdup(r->pool, dir_entry->d_name);
+		sp_new->quality = SP_VERYDIFFERENT;
+	    }
+#endif
+	}
+    }
+    closedir(dirp);
+
+    if (candidates->nelts != 0) {
+	/* Wow... we found us a mispelling. Construct a fixed url */
+	char *nuri, *ref;
+	misspelled_file *variant = (misspelled_file *) candidates->elts;
+	int i;
+
+	ref = table_get(r->headers_in, "Referer");
+
+	qsort((void *) candidates->elts, candidates->nelts,
+	      sizeof(misspelled_file), sort_by_quality);
+
+	/*
+	 * Conditions for immediate redirection:
+	 *     a) the first candidate was not found by stripping the suffix
+	 * AND b) there exists only one candidate OR the best match is not ambigous
+	 *
+	 * Otherwise, a "[300] Multiple Choices" list with the variants is returned.
+	 */
+	if (variant[0].quality != SP_VERYDIFFERENT &&
+	    (candidates->nelts == 1 || variant[0].quality != variant[1].quality)) {
+
+	    nuri = pstrcat(r->pool, url, variant[0].name,
+			   r->path_info, NULL);
+
+	    table_set(r->headers_out, "Location",
+		      construct_url(r->pool, nuri, r->server));
+
+	    aplog_error(APLOG_MARK, APLOG_ERR, r->server,
+			ref ? "Fixed spelling: %s to %s from %s"
+			: "Fixed spelling: %s to %s",
+			r->uri, nuri, ref);
+
+	    return HTTP_MOVED_PERMANENTLY;
+	}
+	/*
+	 * Otherwise, a "[300] Multiple Choices" list with the variants is returned.
+	 */
+	else {
+	    char *t;
+	    pool *pool;
+	    table *notes;
+
+	    if (r->main == NULL) {
+		pool = r->pool;
+		notes = r->notes;
+	    }
+	    else {
+		pool = r->main->pool;
+		notes = r->main->notes;
+	    }
+
+	    /* Generate the reponse text. */
+	    t = pstrcat(pool, "The document name you requested (<code>",
+		     r->uri, "</code>) could not be found on this server.\n"
+			"However, we found documents with names similar to the one you requested.<p>"
+			"Available documents:\n<ul>\n", NULL);
+
+	    for (i = 0; i < candidates->nelts; ++i) {
+
+		/* The format isn't very neat... */
+		t = pstrcat(pool, t, "<li><a href=\"", variant[i].name, "\">",
+			    variant[i].name, "</a> (",
+			    sp_reason_str[(int) (variant[i].quality)], ")\n", NULL);
+
+		/* when we have printed the "close matches" and there
+		 * are more "distant matches" (matched by stripping the
+		 * suffix), then we insert an additional separator text
+		 * to suggest that the user LOOK CLOSELY whether these
+		 * are really the files she wanted.
+		 */
+		if (i > 0 && i < candidates->nelts - 1
+		    && variant[i].quality != SP_VERYDIFFERENT
+		    && variant[i + 1].quality == SP_VERYDIFFERENT) {
+		    t = pstrcat(pool, t, "</ul>\nFurthermore, the following related documents were
found:\n<ul>\n", NULL);
+		}
+	    }
+	    t = pstrcat(pool, t, "</ul>\n", NULL);
+
+	    /* If we know there was a referring page, add a note: */
+	    if (ref != NULL)
+		t = pstrcat(pool, t, "Please consider informing the owner of the <a href=\"",
+		ref, "\">referring page</a> about the broken link.\n", NULL);
+
+	    /* Pass our table to http_protocol.c (see mod_negotiation): */
+	    table_set(notes, "variant-list", t);
+
+	    aplog_error(APLOG_MARK, APLOG_WARNING, r->server,
+			ref ? "Spelling fix: %s: %d candidates from %s"
+			: "Spelling fix: %s: %d candidates",
+			r->uri, candidates->nelts, ref);
+
+	    return HTTP_MULTIPLE_CHOICES;
+	}
+    }
+
+    return OK;
+}
+
+module MODULE_VAR_EXPORT speling_module = {
+   STANDARD_MODULE_STUFF,
+   NULL,                        /* initializer */
+   NULL,                        /* create per-dir config */
+   NULL,                        /* merge per-dir config */
+   create_speling_config,       /* server config */
+   NULL,                        /* merge server config */
+   speling_cmds,                /* command table */
+   NULL,                        /* handlers */
+   NULL,                        /* filename translation */
+   NULL,                        /* check_user_id */
+   NULL,                        /* check auth */
+   NULL,                        /* check access */
+   NULL,                        /* type_checker */
+   check_speling,               /* fixups */
+   NULL,                        /* logger */
+   NULL,                        /* header parser */
+   NULL,                        /* child_init */
+   NULL,                        /* child_exit */
+   NULL                         /* post read-request */
+};


-- 
====================================================================
      Jim Jagielski            |       jaguNET Access Services
     jim@jaguNET.com           |       http://www.jaguNET.com/
            "Look at me! I'm wearing a cardboard belt!"

Mime
View raw message