httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Andreas Steinmetz <...@domdv.de>
Subject Re: mod_disk_cache directives naming convention
Date Fri, 08 Oct 2004 23:44:24 GMT
htcacheclean, take two:

Code cleanups, more apr style coding, presumably feature complete, now
built against apache 2.1 cvs. Needs further testing and especially
niceness tuning. See code below. Comments welcome.
-- 
Andreas Steinmetz                       SPAMmers use robotrap@domdv.de


/* Copyright 2001-2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

/*
  * htcacheclean.c: simple program for cleaning of
  * the disk cache of the Apache HTTP server
  *
  * Contributed by Andreas Steinmetz <ast@domdv.de>
  * 8 Oct 2004
  */

#include "apr.h"
#include "apr_lib.h"
#include "apr_strings.h"
#include "apr_file_io.h"
#include "apr_file_info.h"
#include "apr_pools.h"
#include "apr_hash.h"
#include "apr_thread_proc.h"
#include "apr_signal.h"
#include "apr_getopt.h"
#include "apr_ring.h"


#define DEBUG

/* mod_disk_cache.c extract start */

#define DISK_FORMAT_VERSION 0
typedef struct {
     /* Indicates the format of the header struct stored on-disk. */
     int format;
     /* The HTTP status code returned for this response.  */
     int status;
     /* The size of the entity name that follows. */
     apr_size_t name_len;
     /* The number of times we've cached this entity. */
     apr_size_t entity_version;
     /* Miscellaneous time values. */
     apr_time_t date;
     apr_time_t expire;
     apr_time_t request_time;
     apr_time_t response_time;
} disk_cache_info_t;

/* mod_disk_cache.c extract end */

/* mod_disk_cache.c related definitions start */

#define TEMPFILE    "aptmp"
#define HEADEREXT   ".header"
#define DATAEXT     ".data"

/* mod_disk_cache.c related definitions end */

#define NICE_DELAY  100000
#define DELETE_NICE 250
#define HEADER      1
#define DATA        2
#define TEMP        4
#define HEADERDATA  (HEADER|DATA)

#define DIRINFO (APR_FINFO_MTIME|APR_FINFO_SIZE|APR_FINFO_TYPE|APR_FINFO_NAME)

typedef struct
{
     char *basename;
     int type;
     apr_time_t htime;
     apr_time_t dtime;
     apr_off_t hsize;
     apr_off_t dsize;
} DIRENTRY;

typedef struct _entry
{
     APR_RING_ENTRY(_entry) link;
     apr_time_t expire;
     apr_time_t response_time;
     apr_time_t htime;
     apr_time_t dtime;
     apr_off_t hsize;
     apr_off_t dsize;
     char basename[0];
} ENTRY;

static int delcount;
static int interrupted;
static int realclean;
static int verbose;
static int benice;
static apr_time_t now;
static apr_file_t *errfile;
static APR_RING_ENTRY(_entry) root;

/*
  * called on SIGINT or SIGTERM
  */

void setterm(int unused)
{
#ifdef DEBUG
     apr_file_printf(errfile, "interrupt\n");
#endif
     interrupted = 1;
}

/*
  * printpurge statistics
  */

static void printstats(apr_off_t total, apr_off_t sum, apr_off_t max)
{
     if (!verbose)
         return;

     apr_file_printf(errfile,
         "Statistics: total was %dK, total now %dK, limit %dK\n",
         (int)(total/1024), (int)(sum/1024), (int)(max/1024));
}

/*
  * delete a single file
  */

static void delete_file(char *path, char *basename, apr_pool_t *p)
{
     char *nextpath;


     nextpath = apr_pstrcat(p, path, "/", basename, NULL);
#ifndef DEBUG
     apr_file_remove(nextpath, p);
#else
     apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif

     if (benice) {
         if(++delcount >= DELETE_NICE) {
             apr_sleep(NICE_DELAY);
             delcount = 0;
         }
     }
}

/*
  * delete cache file set
  */

static void delete_entry(char *path, char *basename, apr_pool_t *p)
{
     char *nextpath;


     nextpath = apr_pstrcat(p, path, "/", basename, HEADEREXT, NULL);
#ifndef DEBUG
     apr_file_remove(nextpath, p);
#else
     apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
     nextpath = apr_pstrcat(p, path, "/", basename, DATAEXT, NULL);
#ifndef DEBUG
     apr_file_remove(nextpath, p);
#else
     apr_file_printf(errfile, "would delete %s\n", nextpath);
#endif
     if (benice) {
         delcount += 2;
         if(delcount >= DELETE_NICE) {
             apr_sleep(NICE_DELAY);
             delcount = 0;
         }
     }
}

/*
  * walk the cache directory tree
  */

static int process_dir(char *path, int baselen, apr_pool_t *pool)
{
     apr_dir_t *dir;
     apr_pool_t *p;
     apr_hash_t *h;
     apr_hash_index_t *i;
     apr_file_t *fd;
     apr_finfo_t info;
     apr_size_t len;
     char *nextpath;
     char *ext;
     DIRENTRY *d;
     ENTRY *e;
     int skip;
     disk_cache_info_t disk_info;


     if (apr_dir_open(&dir, path, pool) != APR_SUCCESS)
         return 1;

     apr_pool_create(&p, pool);
     h = apr_hash_make(p);
     fd = NULL;

     skip = baselen;
     if (path[baselen])
         skip++;

     while (apr_dir_read(&info, DIRINFO, dir) == APR_SUCCESS && !interrupted)
{
         if (info.filetype == APR_DIR) {
             if (info.name[0] == '.')
                 continue;
             nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
             if (process_dir(nextpath, baselen, pool))
                 return 1;
             continue;
         }

         if (info.filetype != APR_REG)
             continue;

         ext = strrchr(info.name, '.');
         if (!ext) {
             if (!strncasecmp(info.name, TEMPFILE, strlen(TEMPFILE))) {
                 d = apr_pcalloc(p, sizeof(DIRENTRY));
                 d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                 d->type = TEMP;
                 apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
             }
             continue;
         }

         if (!strcasecmp(ext, HEADEREXT)) {
             *ext = '\0';
             nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
             d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
             if (!d) {
                 d = apr_pcalloc(p, sizeof(DIRENTRY));
                 d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                 d->type = HEADER;
             }
             else
                 d->type |= HEADER;
             d->htime = info.mtime;
             d->hsize = info.size;
             apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
             continue;
         }

         if (!strcasecmp(ext, DATAEXT)) {
             *ext = '\0';
             nextpath = apr_pstrcat(p, path, "/", info.name, NULL);
             d = apr_hash_get(h, nextpath + skip, APR_HASH_KEY_STRING);
             if (!d) {
                 d = apr_pcalloc(p, sizeof(DIRENTRY));
                 d->basename = apr_pstrcat(p, path + skip, "/", info.name, NULL);
                 d->type = DATA;
             }
             else
                 d->type |= DATA;
             d->dtime = info.mtime;
             d->dsize = info.size;
             apr_hash_set(h, nextpath + skip, APR_HASH_KEY_STRING, d);
             continue;
         }
     }

     apr_dir_close(dir);

     if (interrupted)
         return 1;

     path[baselen] = '\0';

     for (i = apr_hash_first(p, h); i && !interrupted; i = apr_hash_next(i)) {
         apr_hash_this(i, NULL, NULL, (void **)(&d));
         if (d->type == HEADERDATA) {
             nextpath = apr_pstrcat(p, path, "/", d->basename, HEADEREXT, NULL);
             if (apr_file_open(&fd, nextpath, APR_READ, APR_OS_DEFAULT, p)
               == APR_SUCCESS) {
                 len = sizeof(disk_cache_info_t);
                 if (apr_file_read_full(fd, &disk_info, len, &len)
                   == APR_SUCCESS) {
                     apr_file_close(fd);
                     if (disk_info.format == DISK_FORMAT_VERSION ) {
                         e = apr_palloc(pool, sizeof(ENTRY) +
                             strlen(d->basename) + 1);
                         APR_RING_INSERT_TAIL(&root, e, _entry, link);
                         e->expire = disk_info.expire;
                         e->response_time = disk_info.response_time;
                         e->htime = d->htime;
                         e->dtime = d->dtime;
                         e->hsize = d->hsize;
                         e->dsize = d->dsize;
                         strcpy(e->basename, d->basename);
                         continue;
                     }
                 }
                 else
                     apr_file_close(fd);
             }
             if(realclean)
                 delete_entry(path, d->basename, p);
         } else if (realclean) {
             if (d->type == HEADER || d->type == DATA)
                 delete_entry(path, d->basename, p);
             else
                 delete_file(path, d->basename, p);
         }
     }

     apr_pool_destroy(p);

     if (interrupted)
         return 1;

     if (benice)
         apr_sleep(NICE_DELAY);

     if (interrupted)
         return 1;

     return 0;
}

/*
  * purge cache entries
  */

static void purge(char *path, apr_pool_t *pool, apr_off_t max)
{
     apr_pool_t *p;
     apr_off_t sum;
     apr_off_t total;
     ENTRY *e;
     ENTRY *n;
     ENTRY *oldest;
     char *nextpath;


     sum = 0;

     for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);
       e=APR_RING_NEXT(e, link)) {
         sum += e->hsize;
         sum += e->dsize;
     }

     total = sum;

     if (sum<=max) {
         printstats(total, sum, max);
         return;
     }

     apr_pool_create(&p, pool);

     for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);) {
         n = APR_RING_NEXT(e, link);
         if (e->response_time>now || e->htime>now || e->dtime>now) {
             delete_entry(path, e->basename, p);
             sum -= e->hsize;
             sum -= e->dsize;
             APR_RING_REMOVE(e, link);
         }
         e = n;
     }

     apr_pool_destroy(p);

     if (sum <= max) {
         printstats(total, sum, max);
         return;
     }

     if (interrupted)
         return;

     apr_pool_create(&p, pool);

     for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);) {
         n = APR_RING_NEXT(e, link);
         if (e->expire<now) {
             delete_entry(path, e->basename, p);
             sum -= e->hsize;
             sum -= e->dsize;
             APR_RING_REMOVE(e, link);
         }
         e = n;
     }

     apr_pool_destroy(p);

     if (interrupted)
         return;

     apr_pool_create(&p, pool);

     while (sum>max && !interrupted) {
         oldest = NULL;
         for(e=APR_RING_FIRST(&root); e!=APR_RING_SENTINEL(&root, _entry, link);
           e=APR_RING_NEXT(e, link)) {
             if (!oldest)
                 oldest = e;
             else if(e->dtime < oldest->dtime)
                 oldest = e;
         }

         delete_entry(path, oldest->basename, p);
         sum -= oldest->hsize;
         sum -= oldest->dsize;
         APR_RING_REMOVE(oldest, link);
     }

     apr_pool_destroy(p);

     printstats(total, sum, max);
}

/*
  * usage info
  */

static void usage(void)
{
     apr_file_printf(errfile, "htcacheclean -- program for cleaning the "
                              "disk cache.\n");
     apr_file_printf(errfile, "Usage: htcacheclean [-vrn] -pPATH -lLIMIT\n");
     apr_file_printf(errfile, "Usage: htcacheclean [-n] -dINTERVAL -pPATH "
                              "-lLIMIT\n");
     apr_file_printf(errfile, "Options:\n");
     apr_file_printf(errfile, "   -d   Daemonize and repeat cache cleaning "
                              "every INTERVAL minutes. This\n"
                              "        option is mutually exclusive with "
                              "the -v and the -r options.\n");
     apr_file_printf(errfile, "   -v   Be verbose and print statistics. "
                              "This option is mutually exclusive\n"
                              "        with the -d option.\n");
     apr_file_printf(errfile, "   -r   Clean thoroughly. This assumes that "
                              "the Apache web server\n"
                              "        is not running. This option is "
                              "mutually exclusive with the -d option.\n");
     apr_file_printf(errfile, "   -n   Be nice. This causes longer processing "
                              "in favour of other processes.\n");
     apr_file_printf(errfile, "   -p   Specify PATH as the root of the "
                              "disk cache.\n");
     apr_file_printf(errfile, "   -l   Specify LIMIT as the total disk cache "
                              "size limit in KBytes.\n");
     exit(1);
}

/*
  * main
  */

int main(int argc, const char * const argv[])
{
     apr_off_t max;
     apr_time_t current;
     apr_time_t repeat;
     apr_time_t delay;
     apr_status_t status;
     int repeat_found;
     apr_pool_t *pool;
     apr_pool_t *instance;
     apr_getopt_t *o;
     char opt;
     const char *arg;
     char *proxypath;
     char *path;


     interrupted = 0;
     repeat = 0;
     repeat_found = 0;
     max = 0;
     verbose = 0;
     realclean = 0;
     benice = 0;
     proxypath = NULL;

     apr_app_initialize(&argc, &argv, NULL);
     atexit(apr_terminate);

     apr_pool_create(&pool, NULL);
     apr_file_open_stderr(&errfile, pool);
     apr_signal(SIGINT, setterm);
     apr_signal(SIGTERM, setterm);

     apr_getopt_init(&o, pool, argc, argv);

     while (1) {
         status = apr_getopt(o, "nvrd:l:p:", &opt, &arg);
         if (status == APR_EOF)
             break;
         else if (status == APR_SUCCESS)
           switch (opt) {
             case 'n':
                 benice = 1;
                 break;
             case 'v':
                 verbose = 1;
                 break;
             case 'r':
                 realclean = 1;
                 break;
             case 'd':
                 repeat_found = 1;
                 repeat = apr_atoi64(arg);
                 repeat *= 60000000;
                 break;
             case 'l':
                 max = apr_atoi64(arg);
                 break;
             case 'p':
                 proxypath = apr_pstrdup(pool, arg);
		apr_filepath_set(proxypath, pool);
                 break;
         }
         else usage();
     }

     if (repeat_found && (repeat<=0 || verbose || realclean))
         usage();

     if (!proxypath || max<=0)
         usage();

     apr_filepath_get(&path, 0, pool);

#ifndef DEBUG
     if (repeat_found) {
         apr_file_close(errfile);
         apr_proc_detach(APR_PROC_DETACH_DAEMONIZE);
     }
#endif

     do
     {
         now = apr_time_now();
         apr_pool_create(&instance, pool);
         APR_RING_INIT(&root, _entry, link);
         delcount = 0;

         if (!process_dir(path, strlen(path), instance)) {
             purge(path, instance, max*1024);
         } else if (!repeat_found) {
             apr_file_printf(errfile,
                 "An error occurred, cache cleaning aborted.\n");
             return 1;
         }

         if (repeat_found && !interrupted) {
             current=apr_time_now();
             if (current<now)
                 delay = repeat;
             else if(now+delay<=current)
                 delay = repeat;
             else
                 delay = now+repeat-current;
             apr_sleep(delay);
         }

         apr_pool_destroy(instance);
     } while (repeat_found && !interrupted);

     return 0;
}

Mime
View raw message