httpd-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Dean Gaudet <dgau...@hyperreal.org>
Subject cvs commit: apache/src CHANGES http_conf_globals.h http_config.c http_main.c http_protocol.c httpd.h
Date Tue, 05 Aug 1997 06:02:48 GMT
dgaudet     97/08/04 23:02:47

  Modified:    htdocs/manual  vhosts-in-depth.html
               src       CHANGES http_conf_globals.h http_config.c
                        http_main.c  http_protocol.c httpd.h
  Log:
  Hashed ip-vhosts, including some semantic changes to vhosts in general
  which should improve the vhost situation overall.
  
  Revision  Changes    Path
  1.13      +4 -0      apache/htdocs/manual/vhosts-in-depth.html
  
  Index: vhosts-in-depth.html
  ===================================================================
  RCS file: /export/home/cvs/apache/htdocs/manual/vhosts-in-depth.html,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -r1.12 -r1.13
  --- vhosts-in-depth.html	1997/07/06 17:18:57	1.12
  +++ vhosts-in-depth.html	1997/08/05 06:02:38	1.13
  @@ -186,6 +186,10 @@
   
   <h3>Vhost Matching</h3>
   
  +
  +<p><strong>Apache 1.3 differs from what is documented
  +here, and documentation still has to be written.</strong>
  +
   <p>
   The server determines which vhost to use for a request as follows:
   
  
  
  
  1.382     +14 -0     apache/src/CHANGES
  
  Index: CHANGES
  ===================================================================
  RCS file: /export/home/cvs/apache/src/CHANGES,v
  retrieving revision 1.381
  retrieving revision 1.382
  diff -u -r1.381 -r1.382
  --- CHANGES	1997/08/02 22:43:50	1.381
  +++ CHANGES	1997/08/05 06:02:39	1.382
  @@ -1,5 +1,19 @@
   Changes with Apache 1.3a2
   
  +  *) ip-based vhosts are stored and queried using a hashing function, which
  +     has been shown to improve performance on servers with many ip-vhosts.
  +     Some other changes had to be made to accomodate this:
  +	- the * address for vhosts now behaves like _default_
  +	- the matching process now is:
  +	    - match an ip-vhost directly via hash (possibly matches main
  +		server)
  +	    - if that fails, just pretend it matched the main server
  +	    - if so far only the main server has been matched, perform
  +		name-based lookups (ServerName, ServerAlias, ServerPath)
  +		*only on name-based vhosts*
  +	    - if they fail, look for _default_ vhosts
  +     [Dean Gaudet, Dave Hankins <dhankins@sugarat.net>]
  +
     *) dbmmanage overhaul:
        - merge dbmmanage and dbmmanage.new functionality, remove dbmmanage.new 
        - tie() to AnyDBM_File which will use one of DB_File, NDBM_File or
  
  
  
  1.17      +2 -1      apache/src/http_conf_globals.h
  
  Index: http_conf_globals.h
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_conf_globals.h,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- http_conf_globals.h	1997/08/03 20:29:18	1.16
  +++ http_conf_globals.h	1997/08/05 06:02:40	1.17
  @@ -87,8 +87,9 @@
   extern char server_root[MAX_STRING_LEN];
   extern char server_confname[MAX_STRING_LEN];
   
  +extern server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
  +
   /* We want this to have the least chance of being correupted if there
    * is some memory corruption, so we allocate it statically.
    */
   extern char coredump_dir[MAX_STRING_LEN];
  -
  
  
  
  1.70      +5 -0      apache/src/http_config.c
  
  Index: http_config.c
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_config.c,v
  retrieving revision 1.69
  retrieving revision 1.70
  diff -u -r1.69 -r1.70
  --- http_config.c	1997/08/03 20:29:18	1.69
  +++ http_config.c	1997/08/05 06:02:40	1.70
  @@ -1154,6 +1154,11 @@
       bind_address.s_addr = htonl(INADDR_ANY);
       listeners = NULL;
       listenbacklog = DEFAULT_LISTENBACKLOG;
  +
  +    /* Global virtual host hash bucket pointers.  Init to null. */
  +    memset (vhash_table, 0,
  +	(VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP) * sizeof (vhash_table[0]));
  +
       strncpy(coredump_dir, server_root, sizeof(coredump_dir)-1);
       coredump_dir[sizeof(coredump_dir)-1] = '\0';
   }
  
  
  
  1.197     +160 -39   apache/src/http_main.c
  
  Index: http_main.c
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_main.c,v
  retrieving revision 1.196
  retrieving revision 1.197
  diff -u -r1.196 -r1.197
  --- http_main.c	1997/08/04 09:21:16	1.196
  +++ http_main.c	1997/08/05 06:02:41	1.197
  @@ -191,6 +191,23 @@
   listen_rec *listeners;
   static listen_rec *head_listener;
   
  +/* A (n) bucket hash table, each entry has a pointer to a server rec and
  + * a pointer to the other entries in that bucket.  Each individual address,
  + * even for virtualhosts with multiple addresses, has an entry in this hash
  + * table.  There are extra buckets for _default_, and name-vhost entries.
  + *
  + * The main_server's addresses appear in the main part of this table.
  + * They're differentiated from real vhosts by server->is_virtual == 0.
  + *
  + * The VHASH_DEFAULT_BUCKET is a list of all the _default_ server_addr_recs.
  + *
  + * The VHASH_MAIN_BUCKET is a list of one server_addr_rec from each name
  + * based vhost.  At the moment none of the name based vhost code is hashed,
  + * and it's just more convenient to have a list of all the name-based vhosts
  + * rather than a list of all the names of name-based vhosts.
  + */
  +server_rec_chain *vhash_table[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
  +
   char server_root[MAX_STRING_LEN];
   char server_confname[MAX_STRING_LEN];
   char coredump_dir[MAX_STRING_LEN];
  @@ -1694,50 +1711,128 @@
       return (suexec_enabled);
   }
   
  +/* This hashing function is designed to get good distribution in the cases
  + * where the server is handling entire "networks" of servers.  i.e. a
  + * whack of /24s.  This is probably the most common configuration for
  + * ISPs with large virtual servers.
  + *
  + * Hash function provided by David Hankins.
  + */
  +static inline unsigned hash_inaddr( unsigned key )
  +{
  +    key ^= (key >> 16);
  +    return ((key >> 8) ^ key) % VHASH_TABLE_SIZE;
  +}
   
   static server_rec *find_virtual_server (struct in_addr server_ip,
   				unsigned port, server_rec *server)
   {
  -    server_rec *virt;
       server_addr_rec *sar;
  -    server_rec *def;
  +    server_rec_chain *trav;
  +    unsigned buk;
   
  -    def = server;
  -    for (virt = server->next; virt; virt = virt->next) {
  -	for (sar = virt->addrs; sar; sar = sar->next) {
  -	    if ((virt->is_virtual == 1) &&	/* VirtualHost */
  -		(sar->host_addr.s_addr == htonl(INADDR_ANY) ||
  -		sar->host_addr.s_addr == server_ip.s_addr) &&
  -		(sar->host_port == 0 || sar->host_port == port)) {
  -		return virt;
  -	    } else if ( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR
  -		&& (sar->host_port == 0 || sar->host_port == port)) {
  -		/* this is so that you can build a server that is the
  -		    "default" for any interface which isn't explicitly
  -		    specified.  So that you can implement "deny anything
  -		    which isn't expressly permitted" -djg */
  -		def = virt;
  +    /* scan the hash table for an exact match first */
  +    buk = hash_inaddr( server_ip.s_addr );
  +    for (trav = vhash_table[buk]; trav; trav = trav->next) {
  +	sar = trav->sar;
  +	if ((sar->host_addr.s_addr == server_ip.s_addr)
  +	    && (sar->host_port == 0 || sar->host_port == port)) {
  +	    if (trav->server->is_virtual) {
  +		return trav->server;
   	    }
  +	    /* otherwise it's the "main server address", and we need
  +	     * to do _default_ handling
  +	     */
  +	    break;
   	}
       }
   
  -    return def;
  +    /* return the main server for now, might switch to a _default_ later */
  +    return server_conf;
   }
   
  -void default_server_hostnames(server_rec *s)
  +
  +static void add_to_vhash_bucket (unsigned buk, server_rec *s,
  +    server_addr_rec *sar)
  +{
  +    server_rec_chain *hashme;
  +
  +    hashme = palloc (pconf, sizeof (*hashme));
  +    hashme->server = s;
  +    hashme->sar = sar;
  +    hashme->next = vhash_table[buk];
  +    vhash_table[buk] = hashme;
  +}
  +
  +
  +/* hash table statistics, keep this in here for the beta period so
  + * we can find out if the hash function is ok
  + */
  +#define VHASH_STATISTICS
  +#ifdef VHASH_STATISTICS
  +static int vhash_compare (const void *a, const void *b)
  +{
  +    return (*(const int *)b - *(const int *)a);
  +}
  +
  +static void dump_vhash_statistics (void)
  +{
  +    unsigned count[VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP];
  +    int i;
  +    server_rec_chain *src;
  +    unsigned total;
  +    char buf[HUGE_STRING_LEN];
  +    char *p;
  +
  +    total = 0;
  +    for (i = 0; i < VHASH_TABLE_SIZE + VHASH_EXTRA_SLOP; ++i) {
  +	count[i] = 0;
  +	for (src = vhash_table[i]; src; src = src->next) {
  +	    ++count[i];
  +	    if (i < VHASH_TABLE_SIZE) {
  +		/* don't count the slop buckets in the total */
  +		++total;
  +	    }
  +	}
  +    }
  +    qsort (count, VHASH_TABLE_SIZE, sizeof (count[0]), vhash_compare);
  +    p = buf + ap_snprintf (buf, sizeof (buf),
  +	"vhash: total hashed = %u, avg chain = %u, #default = %u, "
  +	"#name-vhost = %u, chain lengths (count x len):",
  +	total, total / VHASH_TABLE_SIZE, count [VHASH_DEFAULT_BUCKET],
  +	count [VHASH_MAIN_BUCKET]);
  +    total = 1;
  +    for (i = 1; i < VHASH_TABLE_SIZE; ++i) {
  +	if (count[i-1] != count[i]) {
  +	    p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u",
  +		total, count[i-1]);
  +	    total = 1;
  +	} else {
  +	    ++total;
  +	}
  +    }
  +    p += ap_snprintf (p, sizeof (buf) - (p - buf), " %ux%u",
  +	total, count[VHASH_TABLE_SIZE-1]);
  +    log_error(buf, server_conf);
  +}
  +#endif
  +
  +
  +void default_server_hostnames(server_rec *main_s)
   {
       struct hostent *h;
  -    struct in_addr *main_addr;
  -    int num_addr;
       char *def_hostname;
       int n;
       server_addr_rec *sar;
  +    server_addr_rec *main_sar;
       int has_default_vhost_addr;
  -    unsigned mainport = s->port;
       int from_local=0;  
  +    server_rec *s;
  +    int is_namevhost;
   
       /* Main host first */
  -    
  +    s = main_s;
  +
       if (!s->server_hostname) {
   	s->server_hostname = get_local_host(pconf);
   	from_local = 1;
  @@ -1756,31 +1851,53 @@
   	};
   	exit(1);
       }
  -    /* we need to use gethostbyaddr below... and since it shares a static
  -    	area with gethostbyname it'd clobber the value we just got.  So
  -    	we need to make a copy.  -djg */
  -    for (num_addr = 0; h->h_addr_list[num_addr] != NULL; num_addr++) {
  -    	/* nop */
  -    }
  -    main_addr = palloc( pconf, sizeof( *main_addr ) * num_addr );
  -    for (n = 0; n < num_addr; n++) {
  -    	main_addr[n] = *(struct in_addr *)h->h_addr_list[n];
  +
  +    /* we fill in s->addrs for two reasons.  One so that we have
  +     * server_addr_recs for the hash table.  And also because gethostbyname
  +     * and gethostbyaddr share a static data area and our result would be
  +     * clobbered here if we didn't copy it somewhere. -djg
  +     */
  +    for (n = 0; h->h_addr_list[n] != NULL; n++) {
  +    	main_sar = pcalloc (pconf, sizeof (*main_sar));
  +	main_sar->host_addr = *(struct in_addr *)h->h_addr_list[n];
  +	main_sar->host_port = 0;	/* we want this to match all ports */
  +	main_sar->virthost = s->server_hostname;
  +	main_sar->next = s->addrs;
  +	s->addrs = main_sar;
  +	add_to_vhash_bucket (hash_inaddr (main_sar->host_addr.s_addr),
  +	    s, main_sar);
       }
   
       /* Then virtual hosts */
  -    
  +
       for (s = s->next; s; s = s->next) {
   	/* Check to see if we might be a HTTP/1.1 virtual host - same IP */
   	has_default_vhost_addr = 0;
  -	for (n = 0; n < num_addr; n++) {
  -	    for(sar = s->addrs; sar; sar = sar->next) {
  -		if (sar->host_addr.s_addr == main_addr[n].s_addr &&
  -		    s->port == mainport)
  +	for(sar = s->addrs; sar; sar = sar->next) {
  +	    is_namevhost = 0; /* guess addr doesn't match main server */
  +	    for (main_sar = main_s->addrs; main_sar; main_sar=main_sar->next) {
  +		if (sar->host_addr.s_addr == main_sar->host_addr.s_addr
  +		    && s->port == main_s->port) {
  +		    add_to_vhash_bucket (VHASH_MAIN_BUCKET, s, sar);
  +		    /* XXX: only add it to the main bucket once since we're
  +		     * not optimizing name-vhosts yet */
   		    s->is_virtual = 2;
  -		if( sar->host_addr.s_addr == DEFAULT_VHOST_ADDR ) {
  -		    has_default_vhost_addr = 1;
  +		    is_namevhost = 1;
  +		    break;
   		}
   	    }
  +	    if (sar->host_addr.s_addr == DEFAULT_VHOST_ADDR
  +		|| sar->host_addr.s_addr == INADDR_ANY) {
  +		/* XXX: this probably isn't the best handling of INADDR_ANY */
  +		/* add it to default bucket for each appropriate sar
  +		 * since we need to do a port test
  +		 */
  +		has_default_vhost_addr = 1;
  +		add_to_vhash_bucket (VHASH_DEFAULT_BUCKET, s, sar);
  +	    } else if (!is_namevhost) {
  +		add_to_vhash_bucket (hash_inaddr (sar->host_addr.s_addr),
  +		    s, sar);
  +	    }
   	}
   
   	/* FIXME: some of this decision doesn't make a lot of sense in
  @@ -1819,6 +1936,10 @@
   	    }
   	}
       }
  +
  +#ifdef VHASH_STATISTICS
  +    dump_vhash_statistics ();
  +#endif
   }
   
   conn_rec *new_connection (pool *p, server_rec *server, BUFF *inout,
  
  
  
  1.150     +43 -13    apache/src/http_protocol.c
  
  Index: http_protocol.c
  ===================================================================
  RCS file: /export/home/cvs/apache/src/http_protocol.c,v
  retrieving revision 1.149
  retrieving revision 1.150
  diff -u -r1.149 -r1.150
  --- http_protocol.c	1997/08/04 02:55:11	1.149
  +++ http_protocol.c	1997/08/05 06:02:42	1.150
  @@ -68,6 +68,7 @@
   				 */
   #include "util_date.h"          /* For parseHTTPdate and BAD_DATE */
   #include <stdarg.h>
  +#include "http_conf_globals.h"
   
   #define SET_BYTES_SENT(r) \
     do { if (r->sent_bodyct) \
  @@ -692,6 +693,7 @@
     unsigned port = (*hostname) ? atoi(hostname) : 80;
     server_rec *s;
     int l;
  +  server_rec_chain *src;
   
     if (port && (port != r->server->port))
       return;
  @@ -703,15 +705,17 @@
   
     r->hostname = host;
   
  -  for (s = r->server->next; s; s = s->next) {
  +  for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) {
       const char *names;
       server_addr_rec *sar;
   
  -    if (s->addrs == NULL) {
  -	/* this server has been disabled because of DNS screwups during
  -	    configuration */
  -	continue;
  -    }
  +    s = src->server;
  +
  +    /* s->addrs != NULL because it's in a hash bucket */
  +
  +    /* Note that default_server_hostnames has ensured that each name-vhost
  +     * appears only once in the VHASH_MAIN_BUCKET.
  +     */
   
       if ((!strcasecmp(host, s->server_hostname)) && (port == s->port)) {
         r->server = r->connection->server = s;
  @@ -754,13 +758,15 @@
   
   void check_serverpath (request_rec *r) {
     server_rec *s;
  +  server_rec_chain *src;
   
     /* This is in conjunction with the ServerPath code in
      * http_core, so we get the right host attached to a non-
      * Host-sending request.
      */
   
  -  for (s = r->server->next; s; s = s->next) {
  +  for (src = vhash_table[VHASH_MAIN_BUCKET]; src; src = src->next) {
  +    s = src->server;
       if (s->addrs && s->path && !strncmp(r->uri, s->path, s->pathlen)
&&
   	(s->path[s->pathlen - 1] == '/' ||
   	 r->uri[s->pathlen] == '/' ||
  @@ -769,6 +775,24 @@
     }
   }
   
  +
  +static void check_default_server (request_rec *r)
  +{
  +    server_addr_rec *sar;
  +    server_rec_chain *trav;
  +    unsigned port;
  +
  +    port = ntohs (r->connection->local_addr.sin_port);
  +    for (trav = vhash_table[VHASH_DEFAULT_BUCKET]; trav; trav = trav->next) {
  +	sar = trav->sar;
  +	if (sar->host_port == 0 || sar->host_port == port) {
  +	    /* match! */
  +	    r->server = r->connection->server = trav->server;
  +	    return;
  +	}
  +    }
  +}
  +
   request_rec *read_request (conn_rec *conn)
   {
       request_rec *r = (request_rec *)pcalloc (conn->pool, sizeof(request_rec));
  @@ -815,12 +839,18 @@
   
       r->status = HTTP_OK;                /* Until further notice. */
   
  -    /* handle Host header here, to get virtual server */
  -
  -    if (r->hostname || (r->hostname = table_get(r->headers_in, "Host")))
  -      check_hostalias(r);
  -    else
  -      check_serverpath(r);
  +    /* if it's the main server so far, we have to do name-vhost style lookups */
  +    if (r->server->is_virtual == 0) {
  +	if (r->hostname || (r->hostname = table_get(r->headers_in, "Host")))
  +	    check_hostalias(r);
  +	else
  +	    check_serverpath(r);
  +	/* if that failed, then look for a default server */
  +	if (r->server->is_virtual == 0) {
  +	    check_default_server (r);
  +	}
  +    }
  +    /* we have finished the search for a vhost */
       
       /* we may have switched to another server */
       r->per_dir_config = r->server->lookup_defaults;
  
  
  
  1.137     +23 -1     apache/src/httpd.h
  
  Index: httpd.h
  ===================================================================
  RCS file: /export/home/cvs/apache/src/httpd.h,v
  retrieving revision 1.136
  retrieving revision 1.137
  diff -u -r1.136 -r1.137
  --- httpd.h	1997/08/03 20:30:57	1.136
  +++ httpd.h	1997/08/05 06:02:43	1.137
  @@ -293,6 +293,19 @@
   #define SCOREBOARD_MAINTENANCE_INTERVAL 1000000
   #endif
   
  +/* This defines the size of the hash table used for hashing ip addresses
  + * of virtual hosts.  It must be a power of two.
  + */
  +#ifndef VHASH_TABLE_SIZE
  +#define VHASH_TABLE_SIZE 256
  +#endif
  +/* bucket where _default_ entries are stored */
  +#define VHASH_DEFAULT_BUCKET	(VHASH_TABLE_SIZE)
  +/* bucket where name-vhosts are stored */
  +#define VHASH_MAIN_BUCKET	((VHASH_TABLE_SIZE)+1)
  +/* number of magic buckets */
  +#define VHASH_EXTRA_SLOP	2
  +
   /* Number of requests to try to handle in a single process.  If <= 0,
    * the children don't die off.  That's the default here, since I'm still
    * interested in finding and stanching leaks.
  @@ -498,7 +511,6 @@
       const struct htaccess_result *next;
   };
   
  -
   typedef struct conn_rec conn_rec;
   typedef struct server_rec server_rec;
   typedef struct request_rec request_rec;
  @@ -677,6 +689,16 @@
       char *virthost;		/* The name given in <VirtualHost> */
   };
   
  +/* Meta linear list for hashes.  Each server_rec can be in possibly multiple
  + * hash chains since it can have multiple ips
  + */
  +typedef struct server_rec_chain server_rec_chain;
  +struct server_rec_chain {
  +    server_rec_chain *next;
  +    server_rec *server;
  +    server_addr_rec *sar;	/* the record causing it to be in
  +    				 * this chain */
  +};
   
   struct server_rec {
   
  
  
  

Mime
View raw message