httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Andreas Steinmetz <...@domdv.de>
Subject Re: mod_disk_cache directives naming convention
Date Fri, 08 Oct 2004 16:35:00 GMT
Justin Erenkrantz wrote:
> Feel free to submit a patch that efficiently allows the constraint of 
> the cache size.  I just don't see a way to do that as mod_disk_cache 
> does not have any indexing.
> 
> IMHO, instead of making a false promise, we should remove it.  If we 
> were to add such a feature later, we can add such directives 
> accordingly.  -- justin

After looking at the code and thinking about the performance hits involved
I do believe Justin is right. Therefore I created a little "htcacheclean"
utility that does the the job of cache cleaning outside of apache itself.
The utility is right now not complete and intended to be a base for further
discussion. Please see below for the code (based on 2.0.52).
-- 
Andreas Steinmetz                       SPAMmers use robotrap@domdv.de


/* Copyright 2001-2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

/*
  * htcacheclean.c: simple program for cleaning of
  * the disk cache of the Apache HTTP server
  *
  * Contributed by Andreas Steinmetz <ast@domdv.de>
  * 8 Oct 2004
  */

#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"

#define DEBUG

/* mod_disk_cache.c extract start */

#define DISK_FORMAT_VERSION 0
typedef struct {
     /* Indicates the format of the header struct stored on-disk. */
     int format;
     /* The HTTP status code returned for this response.  */
     int status;
     /* The size of the entity name that follows. */
     apr_size_t name_len;
     /* The number of times we've cached this entity. */
     apr_size_t entity_version;
     /* Miscellaneous time values. */
     apr_time_t date;
     apr_time_t expire;
     apr_time_t request_time;
     apr_time_t response_time;
} disk_cache_info_t;

/* mod_disk_cache.c extract end */

#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_NAME)

#define HEADER 1
#define DATA 2
#define TEMP 4
#define HEADERDATA (HEADER|DATA)

typedef struct
{
     char *basename;
     int type;
     apr_time_t htime;
     apr_time_t dtime;
     apr_off_t hsize;
     apr_off_t dsize;
} DIRENTRY;

typedef struct _entry
{
     struct _entry *next;
     apr_time_t expire;
     apr_time_t response_time;
     apr_time_t htime;
     apr_time_t dtime;
     apr_off_t hsize;
     apr_off_t dsize;
     char basename[0];
} ENTRY;

static int realclean;
static int verbose;
static ENTRY *anchor;
static apr_time_t now;
apr_file_t *errfile;

static void delete_file(char *path, char *basename, apr_pool_t *p)
{
     char *nextpath;


     nextpath = apr_pstrcat(p, path, "/", basename, NULL);
#ifndef DEBUG
     apr_file_remove(nextpath, p);
#else
     apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
}

static void delete_entry(char *path, char *basename, apr_pool_t *p)
{
     char *nextpath;


     nextpath = apr_pstrcat(p, path, "/", basename, ".header", NULL);
#ifndef DEBUG
     apr_file_remove(nextpath, p);
#else
     apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
     nextpath = apr_pstrcat(p, path, "/", basename, ".data", NULL);
#ifndef DEBUG
     apr_file_remove(nextpath, p);
#else
     apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
}

static int process_dir(char *path, int baselen, apr_pool_t *pool)
{
     apr_dir_t *dir;
     apr_pool_t *p;
     apr_hash_t *h;
     apr_hash_index_t *i;
     apr_file_t *fd;
     apr_finfo_t info;
     apr_size_t len;
     char *nextpath;
     char *ext;
     DIRENTRY *d;
     ENTRY *e;
     int skip;
     disk_cache_info_t disk_info;


     if (apr_dir_open(&dir, path, pool) != APR_SUCCESS)
         return 1;

     apr_pool_create(&p, pool);
     h = apr_hash_make(p);
     fd = NULL;

     skip = baselen;
     if (path[baselen])
         skip++;

     while (apr_dir_read(&info, DIRINFO, dir) == APR_SUCCESS) {
         if (info.filetype == APR_DIR) {
             if (info.name[0] == '.')
                 continue;
             nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
             if (process_dir(nextpath, baselen, pool))
                 return 1;
             continue;
         }

         if (info.filetype != APR_REG)
             continue;

         ext = strrchr(info.name, '.');
         if (!ext) {
             if (!strncasecmp(info.name, "aptmp", 5)) {
                 d = apr_pcalloc(p, sizeof(DIRENTRY));
                 d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                 d->type = TEMP;
                 apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
             }
             continue;
         }

         if (!strcasecmp(ext,".header")) {
             *ext = '\0';
             nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
             d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
             if (!d) {
                 d = apr_pcalloc(p, sizeof(DIRENTRY));
                 d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                 d->type = HEADER;
             }
             else
                 d->type |= HEADER;
             d->htime = info.mtime;
             d->hsize = info.size;
             apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
             continue;
         }

         if (!strcasecmp(ext, ".data")) {
             *ext = '\0';
             nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
             d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
             if (!d) {
                 d = apr_pcalloc(p, sizeof(DIRENTRY));
                 d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                 d->type = DATA;
             }
             else
                 d->type |= DATA;
             d->dtime = info.mtime;
             d->dsize = info.size;
             apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
             continue;
         }
     }

     apr_dir_close(dir);

     path[baselen] = '\0';

     for (i = apr_hash_first(p, h); i; i = apr_hash_next(i)) {
         apr_hash_this(i, NULL, NULL, (void **)(&d));
         if (d->type == HEADERDATA) {
             nextpath = apr_pstrcat(p, path, "/", d->basename, ".header", NULL);
             if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT, p)
               == APR_SUCCESS) {
                 len = sizeof(disk_cache_info_t);
                 if (apr_file_read_full(fd, &disk_info, len, &len)
                   == APR_SUCCESS) {
                     apr_file_close(fd);
                     if (disk_info.format == DISK_FORMAT_VERSION ) {
                         e = apr_palloc(pool, sizeof(ENTRY) +
                             strlen(d->basename) + 1);
                         e->next = anchor;
                         anchor= e;
                         e->expire = disk_info.expire;
                         e->response_time = disk_info.response_time;
                         e->htime = d->htime;
                         e->dtime = d->dtime;
                         e->hsize = d->hsize;
                         e->dsize = d->dsize;
                         strcpy(e->basename, d->basename);
                         continue;
                     }
                 }
                 else
                     apr_file_close(fd);
             }
             if(realclean)
                 delete_entry(path, d->basename, p);
         } else if (realclean) {
             if (d->type == HEADER || d->type == DATA)
                 delete_entry(path, d->basename, p);
             else
                 delete_file(path, d->basename, p);
         }
     }

     apr_pool_destroy(p);

     return 0;
}

static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max)
{
     if (!verbose)
         return;
     apr_file_printf(errfile,
         "Statistics: total was %dK, total now %dK, limit %dK\n",
         (int)(total/1024), (int)(sum/1024), (int)(max/1024));
}

static void purge(char *path, apr_pool_t *pool, apr_off_t max)
{
     apr_pool_t *p;
     apr_off_t sum;
     apr_off_t total;
     ENTRY *e;
     ENTRY *oldest;
     char *nextpath;


     sum = 0;

     for(e=anchor; e; e=e->next) {
         sum += e->hsize;
         sum += e->dsize;
     }

     total = sum;

     if (sum<=max) {
         printstats(total, sum, max);
         return;
     }

     apr_pool_create(&p, pool);

     for(e=anchor; e; e=e->next) {
         if (e->response_time>now || e->htime>now || e->dtime>now) {
             delete_entry(path, e->basename, p);
             sum -= e->hsize;
             sum -= e->dsize;
             e->basename[0] = '\0';
         }
     }

     apr_pool_destroy(p);

     if (sum <= max) {
         printstats(total, sum, max);
         return;
     }

     apr_file_printf(errfile, "sum %d, max %d\n",sum,max);

     apr_pool_create(&p, pool);

     for(e=anchor; e; e=e->next) {
         if (e->expire<now && e->basename[0]) {
             delete_entry(path, e->basename, p);
             sum -= e->hsize;
             sum -= e->dsize;
             e->basename[0] = '\0';
         }
     }

     apr_pool_destroy(p);

     apr_pool_create(&p, pool);

     while (sum>max) {
         oldest = NULL;
         for(e=anchor; e; e=e->next)
             if (e->basename[0]) {
             if (!oldest)
                 oldest = e;
             else if(e->dtime < oldest->dtime)
                 oldest = e;
         }

         delete_entry(path, oldest->basename, p);
         sum -= oldest->hsize;
         sum -= oldest->dsize;
         oldest->basename[0] = '\0';
     }

     apr_pool_destroy(p);

     printstats(total, sum, max);
}

static void usage(void)
{
     fprintf(stderr, "htcacheclean -- program for cleaning the disk cache.\n");
     fprintf(stderr, "Usage: htcacheclean [-vr] -pPATH -lLIMIT\n");
     fprintf(stderr, "Options:\n");
     fprintf(stderr, "   -v   Be verbose and print statistics.\n");
     fprintf(stderr, "   -r   Clean thoroughly. This assumes that the Apache "
                     "web server\n        is not running.\n");
     fprintf(stderr, "   -p   Specify PATH as the root of the disk cache.\n");
     fprintf(stderr, "   -l   Specify LIMIT as the total disk cache size "
                     "limit in KBytes.\n");
     exit(1);
}

int main(int argc, const char * const argv[])
{
     apr_off_t max;
     int i;
     const char *arg;
     apr_pool_t *pool;
     char *proxypath;
     char *path;

     max = 0;
     verbose = 0;
     realclean = 0;
     proxypath = NULL;
     anchor = NULL;

     apr_app_initialize(&argc, &argv, NULL);
     apr_pool_create(&pool, NULL);
     apr_file_open_stderr(&errfile, pool);
     now = apr_time_now();

     for (i = 1; i < argc; i++) {
         arg = argv[i];
         if (*arg != '-')
             break;
         while (*++arg != '\0') {
             switch (*arg) {
             case 'v':
                 verbose = 1;
                 break;
             case 'r':
                 realclean = 1;
                 break;
             case 'l':
                 max = apr_atoi64(++arg);
                 while (*arg != '\0')
                     ++arg;
                 --arg;
                 break;
             case 'p':
                 proxypath = apr_pstrdup(pool, ++arg);
                 apr_filepath_set(proxypath, pool);
                 while (*arg != '\0')
                     ++arg;
                 --arg;
                 break;
             }
         }
     }

     if (!proxypath || max<=0) {
         usage();
     }

     apr_filepath_get(&path, 0, pool);

     if (!process_dir(path, strlen(path), pool)) {
         purge(path, pool, max*1024);
     } else {
         apr_file_printf(errfile,
             "An error occurred, cache cleaning aborted.\n");
         apr_pool_destroy(pool);
         return 1;
     }

     apr_pool_destroy(pool);
     return 0;
}

Mime
View raw message