Return-Path: Delivered-To: new-httpd-archive@hyperreal.org Received: (qmail 2611 invoked by uid 6000); 28 May 1998 16:57:27 -0000 Received: (qmail 2592 invoked from network); 28 May 1998 16:57:25 -0000 Received: from twinlark.arctic.org (204.62.130.91) by taz.hyperreal.org with SMTP; 28 May 1998 16:57:25 -0000 Received: (qmail 14133 invoked by uid 500); 28 May 1998 16:58:46 -0000 Date: Thu, 28 May 1998 09:58:46 -0700 (PDT) From: Dean Gaudet To: new-httpd@apache.org Subject: Re: [PATCH] Proxy Garbage Collection fixes In-Reply-To: Message-ID: X-Comment: Visit http://www.arctic.org/~dgaudet/legal for information regarding copyright and disclaimer. MIME-Version: 1.0 Content-Type: MULTIPART/MIXED; BOUNDARY=uAKRQypu60I7Lcqm Content-ID: Sender: new-httpd-owner@apache.org Precedence: bulk Reply-To: new-httpd@apache.org This message is in MIME format. The first part should be readable text, while the remaining parts are likely unreadable without MIME-aware tools. Send mail to mime@docserver.cac.washington.edu for more info. --uAKRQypu60I7Lcqm Content-Type: TEXT/PLAIN; CHARSET=us-ascii Content-ID: Oh duh, I should have just read your patch first, you did all this research already :) Dean On Thu, 28 May 1998, Dean Gaudet wrote: > > > On Thu, 28 May 1998, Martin Kraemer wrote: > > > Furthermore, it contains the following fixes: > > * "real world" operating systems usually allocate files on block > > granularity of 512 bytes or bigger. The old algorithm did not > > account for that, and would think 1023 files with 1 byte each would > > take up less than 1 kB ;-) > > The block size is currently initialized to 512 bytes, but should > > probably be configurable. But with this fix, mod_proxy's idea of > > disk usage is only "10% off" the OS's idea (it used to be >25% off). > > On solaris, struct stat contains: > > long st_blksize; /* Preferred I/O block size */ > blkcnt_t st_blocks; /* Number of 512 byte blocks allocated*/ > > st_blksize > A hint as to the "best" unit size for I/O > operations. This field is not defined for block > special or character special files. > > st_blocks The total number of physical blocks of size 512 > bytes actually allocated on disk. This field is > not defined for block special or character special > files. > > i.e. you could use st_blocks. > > On linux, struct stat contains: > > unsigned long st_blksize; /* blocksize for filesystem I/O */ > unsigned long st_blocks; /* number of blocks allocated */ > > And there's this humourous note: > > Note that st_blocks may not always be in terms of blocks of size > st_blksize, and that st_blksize may instead provide a notion of > the "pre- ferred" blocksize for efficient file system I/O. > > i.e. they're useless as documented... looking in the kernel they actually > have the same meaning as on solaris. > > But if you read the code in gnu fileutils/src/system.h, search for > st_blocks, you'll see it is a hopeless mess of incompatibility. > > Note: st_blocks, when it's useful, appears to also contain the number > of indirect blocks in the inode. I forget when a file needs them, > is it 48k and above? > > Dean > --uAKRQypu60I7Lcqm Content-Type: TEXT/PLAIN; CHARSET=us-ascii Content-ID: Content-Description: Detached Garbage Collection plus other GC fixes diff -u /home/martin/apachen/X/apache-1.3/src/modules/proxy/mod_proxy.h proxy/mod_proxy.h --- /home/martin/apachen/X/apache-1.3/src/modules/proxy/mod_proxy.h Thu May 28 01:57:59 1998 +++ proxy/mod_proxy.h Thu May 28 13:21:01 1998 @@ -188,11 +188,11 @@ /* static information about the local cache */ struct cache_conf { const char *root; /* the location of the cache directory */ - int space; /* Maximum cache size (in 1024 bytes) */ - int maxexpire; /* Maximum time to keep cached files in secs */ - int defaultexpire; /* default time to keep cached file in secs */ + off_t space; /* Maximum cache size (in 1024 bytes) */ + time_t maxexpire; /* Maximum time to keep cached files in secs */ + time_t defaultexpire; /* default time to keep cached file in secs */ double lmfactor; /* factor for estimating expires date */ - int gcinterval; /* garbage collection interval, in seconds */ + time_t gcinterval; /* garbage collection interval, in seconds */ int dirlevels; /* Number of levels of subdirectories */ int dirlength; /* Length of subdirectory names */ }; @@ -207,7 +207,7 @@ array_header *nocaches; char *domain; /* domain name to use in absence of a domain name in the request */ int req; /* true if proxy requests are enabled */ - int recv_buffer_size; + size_t recv_buffer_size; } proxy_server_conf; struct hdr_entry { @@ -229,7 +229,7 @@ time_t lmod; /* last-modified date of cached entity */ time_t date; /* the date the cached file was last touched */ int version; /* update count of the file */ - unsigned int len; /* content length */ + off_t len; /* content length */ char *protocol; /* Protocol, and major/minor number, e.g. HTTP/1.1 */ int status; /* the status of the cached file */ char *resp_line; /* the whole status like (protocol, code + message) */ diff -u /home/martin/apachen/X/apache-1.3/src/modules/proxy/proxy_cache.c proxy/proxy_cache.c --- /home/martin/apachen/X/apache-1.3/src/modules/proxy/proxy_cache.c Thu May 28 01:58:10 1998 +++ proxy/proxy_cache.c Thu May 28 13:21:02 1998 @@ -71,33 +71,37 @@ DEF_Explain -#ifndef abs -#define abs(c) ((c) >= 0 ? (c) : -(c)) -#endif - struct gc_ent { unsigned long int len; time_t expire; char file[HASH_LEN + 1]; - }; -static int gcdiff(const void *ap, const void *bp) -{ - const struct gc_ent *a = *(const struct gc_ent * const *) ap; - const struct gc_ent *b = *(const struct gc_ent * const *) bp; - - if (a->expire > b->expire) - return 1; - else if (a->expire < b->expire) - return -1; - else - return 0; -} - -static int curbytes, cachesize, every; -static unsigned long int curblocks; -static time_t garbage_now, garbage_expire; +/* Poor man's 61 bit arithmetic */ +typedef struct { + long lower; /* lower 30 bits of result */ + long upper; /* upper 31 bits of result */ +} long61_t; + +/* FIXME: The block size can be different on a `per file system' base. + * This would make automatic detection highly OS specific. + * In the GNU fileutils code for du(1), you can see how complicated it can + * become to detect the block size. And, with BSD-4.x fragments, it + * it even more difficult to get precise results. + * As a compromise (and to improve on the incorrect counting of cache + * size on byte level, omitting directory sizes entirely, which was + * used up to apache-1.3b7) we're rounding to multiples of 512 here. + * Your file system may be using larger blocks (I certainly hope so!) + * but it will hardly use smaller blocks. + * (So this approximation is still closer to reality than the old behavior). + * The best solution would be automatic detection, the next best solution + * IMHO is a sensible default and the possibility to override it. + */ + +#define ROUNDUP2BLOCKS(_bytes) (((_bytes)+block_size-1) & ~(block_size-1)) +static long block_size = 512; /* this must be a power of 2 */ +static long61_t curbytes, cachesize; +static time_t every, garbage_now, garbage_expire; static char *filename; static mutex *garbage_mutex = NULL; @@ -114,6 +118,10 @@ static int sub_garbage_coll(request_rec *r, array_header *files, const char *cachedir, const char *cachesubdir); static void help_proxy_garbage_coll(request_rec *r); +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__) +static void detached_proxy_garbage_coll(request_rec *r); +#endif + void ap_proxy_garbage_coll(request_rec *r) { @@ -128,7 +136,13 @@ inside = 1; (void) ap_release_mutex(garbage_mutex); + ap_block_alarms(); /* avoid SIGALRM on big cache cleanup */ +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__) + detached_proxy_garbage_coll(r); +#else help_proxy_garbage_coll(r); +#endif + ap_unblock_alarms(); (void) ap_acquire_mutex(garbage_mutex); inside = 0; @@ -136,6 +150,115 @@ } +static void +add_long61 (long61_t *accu, long val) +{ + /* Add in lower 30 bits */ + accu->lower += (val & 0x3FFFFFFFL); + /* add in upper bits, and carry */ + accu->upper += (val >> 30) + ((accu->lower & ~0x3FFFFFFFL) != 0L); + /* Clear carry */ + accu->lower &= 0x3FFFFFFFL; +} + +static void +sub_long61 (long61_t *accu, long val) +{ + int carry = (val & 0x3FFFFFFFL) > accu->lower; + /* Subtract lower 30 bits */ + accu->lower = accu->lower - (val & 0x3FFFFFFFL) + ((carry) ? 0x40000000 : 0); + /* add in upper bits, and carry */ + accu->upper -= (val >> 30) + carry; +} + +/* Compare two long61's: + * return <0 when left < right + * return 0 when left == right + * return >0 when left > right + */ +static long +cmp_long61 (long61_t *left, long61_t *right) +{ + return (left->upper == right->upper) ? (left->lower - right->lower) + : (left->upper - right->upper); +} + +/* Compare two gc_ent's, sort them by expiration date */ +static int gcdiff(const void *ap, const void *bp) +{ + const struct gc_ent *a = (const struct gc_ent * const) ap; + const struct gc_ent *b = (const struct gc_ent * const) bp; + + if (a->expire > b->expire) + return 1; + else if (a->expire < b->expire) + return -1; + else + return 0; +} + +#if !defined(WIN32) && !defined(MPE) && !defined(__EMX__) +static void detached_proxy_garbage_coll(request_rec *r) +{ + pid_t pid; + int status; + pid_t pgrp; + + switch (pid = fork()) { + case -1: + ap_log_error(APLOG_MARK, APLOG_ERR, r->server, + "proxy: fork() for cache cleanup failed"); + return; + + case 0: /* Child */ + + /*@@@ FIXME: Should we set the socket fd to CLOEXEC here? */ + + /* Fork twice to disassociate from the child */ + switch (pid = fork()) { + case -1: + ap_log_error(APLOG_MARK, APLOG_ERR, r->server, + "proxy: fork(2nd) for cache cleanup failed"); + exit(1); + + case 0: /* Child */ + /* The setpgrp() stuff was snarfed from http_main.c */ +#ifndef NO_SETSID + if ((pgrp = setsid()) == -1) { + perror("setsid"); + fprintf(stderr, "httpd: setsid failed\n"); + exit(1); + } +#elif defined(NEXT) || defined(NEWSOS) + if (setpgrp(0, getpid()) == -1 || (pgrp = getpgrp(0)) == -1) { + perror("setpgrp"); + fprintf(stderr, "httpd: setpgrp or getpgrp failed\n"); + exit(1); + } +#else + if ((pgrp = setpgrp(getpid(), 0)) == -1) { + perror("setpgrp"); + fprintf(stderr, "httpd: setpgrp failed\n"); + exit(1); + } +#endif + help_proxy_garbage_coll(r); + exit(0); + + default: /* Father */ + /* After grandson has been forked off, */ + /* there's nothing else to do. */ + exit(0); + } + default: + /* Wait until grandson has been forked off */ + /* (without wait we'd leave a zombie) */ + waitpid(pid, &status, 0); + return; + } +} +#endif /* ndef WIN32 */ + static void help_proxy_garbage_coll(request_rec *r) { const char *cachedir; @@ -145,17 +268,24 @@ const struct cache_conf *conf = &pconf->cache; array_header *files; struct stat buf; - struct gc_ent *fent, **elts; + struct gc_ent *fent; int i, timefd; - static time_t lastcheck = BAD_DATE; /* static data!!! */ + static time_t lastcheck = BAD_DATE; /* static (per-process) data!!! */ cachedir = conf->root; - cachesize = conf->space; + /* configured size is given in kB. Make it bytes, convert to long61_t: */ + cachesize.lower = cachesize.upper = 0; + add_long61(&cachesize, conf->space << 10); every = conf->gcinterval; if (cachedir == NULL || every == -1) return; garbage_now = time(NULL); + /* Usually, the modification time of /.time can only increase. + * Thus, even with several child processes having their own copy of + * lastcheck, if time(NULL) still < lastcheck then it's not time + * for GC yet. + */ if (garbage_now != -1 && lastcheck != BAD_DATE && garbage_now < lastcheck + every) return; @@ -176,7 +306,7 @@ ap_log_error(APLOG_MARK, APLOG_ERR, r->server, "proxy: creat(%s)", filename); else - lastcheck = abs(garbage_now); /* someone else got in there */ + lastcheck = garbage_now; /* someone else got in there */ ap_unblock_alarms(); return; } @@ -192,22 +322,24 @@ ap_log_error(APLOG_MARK, APLOG_ERR, r->server, "proxy: utimes(%s)", filename); } - files = ap_make_array(r->pool, 100, sizeof(struct gc_ent *)); - curblocks = 0; - curbytes = 0; + files = ap_make_array(r->pool, 100, sizeof(struct gc_ent)); + curbytes.upper = curbytes.lower = 0L; sub_garbage_coll(r, files, cachedir, "/"); - if (curblocks < cachesize || curblocks + curbytes <= cachesize) { + if (cmp_long61(&curbytes, &cachesize) < 0L) { + ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server, + "proxy GC: Cache is %ld%% full (nothing deleted)", + ((curbytes.upper<<20)|(curbytes.lower>>10))*100/conf->space); ap_unblock_alarms(); return; } - qsort(files->elts, files->nelts, sizeof(struct gc_ent *), gcdiff); + /* sort the files we found by expiration date */ + qsort(files->elts, files->nelts, sizeof(struct gc_ent), gcdiff); - elts = (struct gc_ent **) files->elts; for (i = 0; i < files->nelts; i++) { - fent = elts[i]; + fent = &((struct gc_ent *) files->elts)[i]; sprintf(filename, "%s%s", cachedir, fent->file); Explain3("GC Unlinking %s (expiry %ld, garbage_now %ld)", filename, fent->expire, garbage_now); #if TESTING @@ -221,16 +353,15 @@ else #endif { - curblocks -= fent->len >> 10; - curbytes -= fent->len & 0x3FF; - if (curbytes < 0) { - curbytes += 1024; - curblocks--; - } - if (curblocks < cachesize || curblocks + curbytes <= cachesize) + sub_long61(&curbytes, ROUNDUP2BLOCKS(fent->len)); + if (cmp_long61(&curbytes, &cachesize) < 0) break; } } + + ap_log_error(APLOG_MARK, APLOG_DEBUG|APLOG_NOERRNO, r->server, + "proxy GC: Cache is %ld%% full (%d deleted)", + ((curbytes.upper<<20)|(curbytes.lower>>10))*100/conf->space, i); ap_unblock_alarms(); } @@ -342,6 +473,9 @@ rmdir(newcachedir); #endif --nfiles; + } else { + /* Directory is not empty. Account for its size: */ + add_long61(&curbytes, ROUNDUP2BLOCKS(buf.st_size)); } continue; } @@ -378,22 +512,14 @@ * file. * */ - /* FIXME: We should make the array an array of gc_ents, not gc_ent *s - */ - fent = ap_palloc(r->pool, sizeof(struct gc_ent)); + fent = (struct gc_ent *) ap_push_array(files); fent->len = buf.st_size; fent->expire = garbage_expire; strcpy(fent->file, cachesubdir); strcat(fent->file, ent->d_name); - *(struct gc_ent **) ap_push_array(files) = fent; /* accumulate in blocks, to cope with directories > 4Gb */ - curblocks += buf.st_size >> 10; /* Kbytes */ - curbytes += buf.st_size & 0x3FF; - if (curbytes >= 1024) { - curbytes -= 1024; - curblocks++; - } + add_long61(&curbytes, ROUNDUP2BLOCKS(buf.st_size)); } closedir(dir); @@ -467,7 +593,7 @@ q = ap_proxy_get_header(c->hdrs, "Content-Length"); if (q == NULL) { strp = ap_palloc(p, 15); - ap_snprintf(strp, 15, "%u", c->len); + ap_snprintf(strp, 15, "%lu", c->len); ap_proxy_add_header(c->hdrs, "Content-Length", strp, HDR_REP); } } @@ -590,7 +716,7 @@ } ap_pclosef(r->pool, cachefp->fd); Explain0("Use local copy, cached file hasn't changed"); - return USE_LOCAL_COPY; + return HTTP_NOT_MODIFIED; } /* Ok, has been modified */ @@ -735,7 +861,7 @@ /* no date header! */ /* add one; N.B. use the time _now_ rather than when we were checking the cache */ - date = abs(now); + date = now; p = ap_gm_timestr_822(r->pool, now); dates = ap_proxy_add_header(resp_hdrs, "Date", p, HDR_REP); Explain0("Added date header"); @@ -775,10 +901,10 @@ double maxex = conf->cache.maxexpire; if (x > maxex) x = maxex; - expc = abs(now) + (int) x; + expc = now + (int) x; } else - expc = abs(now) + conf->cache.defaultexpire; + expc = now + conf->cache.defaultexpire; Explain1("Expiry date calculated %ld", expc); } @@ -820,7 +946,7 @@ ap_pclosef(r->pool, c->fp->fd); Explain0("Remote document not modified, use local copy"); /* CHECKME: Is this right? Shouldn't we check IMS again here? */ - return USE_LOCAL_COPY; + return HTTP_NOT_MODIFIED; } else { /* return the whole document */ --uAKRQypu60I7Lcqm--