httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Justin Erenkrantz <jus...@erenkrantz.com>
Subject Re: [PATCH] mod_disk_cache: Handling of Varied Content
Date Sun, 12 Jun 2005 09:49:08 GMT
On Fri, Jun 10, 2005 at 06:14:09PM -0700, Paul Querna wrote:

Comments inline.

> Index: modules/cache/mod_disk_cache.c
> ===================================================================
> --- modules/cache/mod_disk_cache.c	(revision 190047)
> +++ modules/cache/mod_disk_cache.c	(working copy)
> @@ -59,10 +59,13 @@
>      int dirlevels;              /* Number of levels of subdirectories */
>      int dirlength;            /* Length of subdirectory names */
>  #endif
> +    int varied;
> +    char *varyfile;          /* name of file where the vary info will go */
>      char *datafile;          /* name of file where the data will go */
>      char *hdrsfile;          /* name of file where the hdrs will go */
>      char *hashfile;          /* Computed hash key for this URI */
>      char *name;
> +    char *key;
>      apr_file_t *fd;          /* data file */
>      apr_file_t *hfd;         /* headers file */
>      apr_file_t *tfd;         /* temporary file for data */

I don't think you're ever using the varied field, correct?

And I think we may want to clarify name / key a bit.  My suggestion:

 char *name;  /* Requested URI without vary bits - suitable for mortals. */
 char *key;   /* Requested URI with Vary bits (if present); on-disk prefix. */

> @@ -92,18 +95,37 @@
>  
>  module AP_MODULE_DECLARE_DATA disk_cache_module;
>  
> +#ifndef DISK_CACHE_DEBUG
> +#define DISK_CACHE_DEBUG 0
> +#endif
> +

Do we really need an #ifdef?  The debug verbosity log level should be fine.
These's no need for more #ifdefs - it clutters up the code too much.

>  /* Forward declarations */
>  static int remove_entity(cache_handle_t *h);
>  static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
>  static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade
*b);
>  static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
>  static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade
*bb);
> +static apr_status_t read_array(request_rec *r, apr_array_header_t* arr, 
> +                               apr_file_t *file);
>  
>  /*
>   * Local static functions
>   */
> +#define CACHE_VARY_SUFFIX   ".vary"
>  #define CACHE_HEADER_SUFFIX ".header"
>  #define CACHE_DATA_SUFFIX   ".data"
> +
> +static char *vary_file(apr_pool_t *p, disk_cache_conf *conf,
> +                         disk_cache_object_t *dobj, const char *name)
> +{
> +    if (!dobj->hashfile) {
> +        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels, 
> +                                                conf->dirlength, name);
> +    }
> +    return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
> +                       CACHE_VARY_SUFFIX, NULL);
> +}
> +
>  static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
>                           disk_cache_object_t *dobj, const char *name)
>  {
> @@ -242,6 +264,59 @@
>      return APR_SUCCESS;
>  }
>  
> +static char* regen_key(apr_pool_t* p, apr_table_t* headers,
> +                       apr_array_header_t* varray, const char *oldkey)
> +{
> +    struct iovec *iov;
> +    int i,k;
> +    int nvec;
> +    const char *header;
> +    const char **elts;
> +
> +    nvec = (varray->nelts * 2) + 2;
> +    iov = apr_palloc(p, sizeof(struct iovec) * nvec);
> +    elts = (const char **) varray->elts;
> +    for(i=0, k=0; i < varray->nelts; i++) {
> +        header = apr_table_get(headers, elts[i]);
> +        if (!header) {
> +            header = "";
> +        }
> +        iov[k].iov_base = (char*) elts[i];
> +        iov[k].iov_len = strlen(elts[i]);
> +        k++;
> +        iov[k].iov_base = (char*) header;
> +        iov[k].iov_len = strlen(header);
> +        k++;
> +    }
> +    iov[k].iov_base = (char*)"\001";
> +    iov[k].iov_len = 1;
> +    k++;
> +    iov[k].iov_base = (char*) oldkey;
> +    iov[k].iov_len = strlen(oldkey);
> +    k++;
> +
> +    return apr_pstrcatv(p, iov, k, NULL);
> +}

Um, what's the '\001' for?  If I'm reading this right, it'd be creating a new
hash value that uses the following as the seed:

header name (from stored response) + header value (from req. or "") + 
... repeat ...  + \001 + original key.

I think the 001 is unnecessary here or it hasn't been suitably explained...

BTW, a comment at the top of mod_disk_cache.c that explains this vary file and
the indirection would be really nice, too.  See where the description of the
header format currently is.  There's no need to force people to guess what the
format is.  In fact, as seen below, I think I'm confused as to how far the
indirection goes (your emails aren't clear, either).

BTW, I bet we could (should?) make this return const char *.

> +
> +static int array_alphasort(const void *fn1, const void *fn2)
> +{
> +    return strcmp(*(char**)fn1, *(char**)fn2);
> +}
> +
> +static void tokens_to_array(apr_pool_t *p, const char *data,
> +                            apr_array_header_t *arr)
> +{
> +    char *token;
> +
> +    while ((token = ap_get_list_item(p, &data)) != NULL) {
> +        *((const char **) apr_array_push(arr)) = token;
> +    }
> +
> +    /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
> +    qsort((void *) arr->elts, arr->nelts,
> +         sizeof(char *), array_alphasort);
> +}

I'm not sure if qsort() isn't overkill here...

Plus, there needs to be some way to delineate case-insensitivity.  A request
with AccEpt-Encoding: gzip should be treated identically to Accept-Encoding:
gzip.  14.44 says:

   The field-names given are not limited to the set of standard
   request-header fields defined by this specification. Field names are
   case-insensitive.

So, the field names need to be lower-cased both here and in regen_key (at the
least).

> +
>  /*
>   * Hook and mod_cache callback functions
>   */
> @@ -254,6 +329,8 @@
>      disk_cache_object_t *dobj;
>  
>      if (conf->cache_root == NULL) {
> +            ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
> +                         "disk_cache,create_entity: Cannot cache files to disk without
a CacheRoot specified.");
>          return DECLINED;
>      }

The log message doesn't really relate to this change; but your comment could
be a lot clearer, too.

> @@ -264,6 +341,8 @@
>      obj->key = apr_pstrdup(r->pool, key);
>  
>      dobj->name = obj->key;
> +    dobj->varied = 0;
> +    dobj->varyfile = vary_file(r->pool, conf, dobj, key);
>      dobj->datafile = data_file(r->pool, conf, dobj, key);
>      dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
>      dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
> @@ -273,6 +352,8 @@
>  
>  static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
>  {
> +    char *nkey;
> +    apr_file_t *vfd;
>      apr_status_t rc;
>      static int error_logged = 0;
>      disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
> @@ -300,10 +381,38 @@
>      obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
>  
>      info = &(obj->info);
> -    obj->key = (char *) key;
> -    dobj->name = (char *) key;
> -    dobj->datafile = data_file(r->pool, conf, dobj, key);
> -    dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
> +
> +    /* Attempt to open the varied headers file */
> +    flags = APR_READ|APR_BINARY|APR_BUFFERED;
> +    dobj->varyfile = vary_file(r->pool, conf, dobj, key);
> +    dobj->varied = 0;
> +    dobj->name = (char*)key;
> +
> +    rc = apr_file_open(&vfd, dobj->varyfile, flags, 0, r->pool);
> +
> +    if (rc == APR_SUCCESS) {
> +        apr_array_header_t* varray;
> +
> +        varray = apr_array_make(r->pool, 5, sizeof(char*));
> +        rc = read_array(r, varray, vfd);
> +        if (rc != APR_SUCCESS) {
> +            ap_log_error(APLOG_MARK, APLOG_ERR, rc, r->server,
> +                         "disk_cache: Cannot parse vary header file: %s", 
> +                         dobj->varyfile);
> +            return DECLINED;
> +        }
> +        nkey = regen_key(r->pool, r->headers_in, varray, dobj->name);
> +        dobj->hashfile = NULL;
> +        dobj->varied = 1;
> +    }
> +    else {
> +        nkey = apr_pstrdup(r->pool, key);
> +    }

I'm fairly certain that the strdup is needless.  We should also close vfd in
the if block section before continuing on.  No need to hold open the fd.

Oh, wait.  What are you doing here?  Does the presence of the vary file
indicate that there is no header and data file as well?  Much like the
type-map file for mod_negotiation?

I'm concerned that we're going to add an extra open() that will fail in the
mainline non-Vary case.  That's sort of worrisome.

Let me restate what I think your file format is doing - please correct me if
I'm wrong:

Incoming client requests /foo/bar/baz
Generate <hash> off /foo/bar/baz
open <hash>.vary
if present:
  read in .vary file which contains header names separated by CRLF
  Use each header name (from .vary) with our request values (headers_in) to
    regenerate <hash> using /foo/bar/baz+HeaderName+HeaderValue+...(see above)
read in <hash>.data and <hash>.header files

One possibility is to morph our .header file - one 'variant' (no pun intended)
has what we have now - the other is essentially your .vary file - bump our
format field and add a sub-type field.

For example, it could work something like this:

Incoming client requests /foo/bar/baz
Generate <hash> off /foo/bar/baz
open <hash>.header
read in <hash>.header file (may contain Format #1 or Format #2)
if format #2 (your .vary):
  Use each header name (from .header) with our request values (headers_in) to
    regenerate <hash> using /foo/bar/baz+HeaderName+HeaderValue+...(see above)
  re-read in <hash>.header (setting 1st .header file to .varyfile)
read in <hash>.data

This would allow one open call that would work in both cases.  In the vary
case, it needs to open a 2nd file.  I'm just concerned that opening a file
that doesn't exist in a common case can hurt performance.

What do you think?

> +
> +    obj->key = nkey;
> +    dobj->key = nkey;
> +    dobj->datafile = data_file(r->pool, conf, dobj, nkey);
> +    dobj->hdrsfile = header_file(r->pool, conf, dobj, nkey);
>      dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
>  
>      /* Open the data file */
> @@ -356,6 +465,72 @@
>      return OK;
>  }
>  
> +static apr_status_t read_array(request_rec *r, apr_array_header_t* arr, 
> +                               apr_file_t *file)
> +{
> +    char w[MAX_STRING_LEN];
> +    int p;
> +    apr_status_t rv;
> +
> +    while (1) {
> +        rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
> +        if (rv != APR_SUCCESS) {
> +            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
> +                          "Premature end of vary array.");
> +            return rv;
> +        }
> +
> +        p = strlen(w);
> +        if (p > 0 && w[p - 1] == '\n') {
> +            if (p > 1 && w[p - 2] == CR) {
> +                w[p - 2] = '\0';
> +            }
> +            else {
> +                w[p - 1] = '\0';
> +            }
> +        }
> +
> +        /* If we've finished reading the array, break out of the loop. */
> +        if (w[0] == '\0') {
> +            break;
> +        }
> +
> +       *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
> +    }
> +
> +    return APR_SUCCESS;
> +}

I shudder at the strlen and CRLF reads; but I realize that read_table and
store_table both do the same thing.  Ugh.

> +
> +static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
> +{
> +    int i;
> +    apr_status_t rv;
> +    struct iovec iov[2];
> +    apr_size_t amt;
> +    const char **elts;
> +
> +    elts = (const char **) arr->elts;
> +
> +    for (i = 0; i < arr->nelts; i++) {
> +        iov[0].iov_base = (char*) elts[i];
> +        iov[0].iov_len = strlen(elts[i]);
> +        iov[1].iov_base = CRLF;
> +        iov[1].iov_len = sizeof(CRLF) - 1;
> +
> +        rv = apr_file_writev(fd, (const struct iovec *) &iov, 2,
> +                             &amt);
> +        if (rv != APR_SUCCESS) {
> +            return rv;
> +        }
> +    }
> +
> +    iov[0].iov_base = CRLF;
> +    iov[0].iov_len = sizeof(CRLF) - 1;
> +
> +    return apr_file_writev(fd, (const struct iovec *) &iov, 1,
> +                         &amt);
> +}
> +
>  static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
>                                 apr_table_t *table, apr_file_t *file)
>  {
> @@ -531,6 +706,60 @@
>      /* This is flaky... we need to manage the cache_info differently */
>      h->cache_obj->info = *info;
>  
> +#if DISK_CACHE_DEBUG
> +    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
> +                 "disk_cache: Storing headers for URL %s",  dobj->name);
> +#endif

Ick on the #ifdef.

> +
> +    if (r->headers_out) {
> +        const char *tmp;
> +
> +        tmp = apr_table_get(r->headers_out, "Vary");
> +
> +        if (tmp) {
> +            apr_array_header_t* varray;
> +
> +            mkdir_structure(conf, dobj->varyfile, r->pool);

This probably should call apr_file_remove first.

> +
> +            rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
> +                             APR_CREATE | APR_WRITE | APR_BINARY |
> +                             APR_BUFFERED | APR_EXCL, r->pool);
> +
> +            if (rv != APR_SUCCESS) {
> +                return rv;
> +            }
> +
> +            varray = apr_array_make(r->pool, 6, sizeof(char*));
> +            tokens_to_array(r->pool, tmp, varray);
> +
> +#if DISK_CACHE_DEBUG
> +            ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
> +                 "disk_cache: varry array contents: %s", apr_array_pstrcat(r->pool,
varray, ','));
> +#endif
> +            store_array(dobj->tfd, varray);
> +
> +            apr_file_close(dobj->tfd);
> +
> +            dobj->tfd = NULL;
> +
> +            rv = apr_file_rename(dobj->tempfile, dobj->varyfile, r->pool);
> +            if (rv != APR_SUCCESS) {
> +                ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
> +                    "disk_cache: rename tempfile to varyfile failed: %s -> %s",
> +                    dobj->tempfile, dobj->varyfile);
> +                return rv;
> +            }
> +
> +            dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE,
NULL);
> +            tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
> +            dobj->varied = 1;
> +            dobj->hashfile = NULL;
> +            dobj->datafile = data_file(r->pool, conf, dobj, tmp);
> +            dobj->hdrsfile = header_file(r->pool, conf, dobj, tmp);

There is also a degenerate case here: a response wasn't previously using Vary
but now is or vice versa.  We should see about clearing out any datafile and
hdrsfile (or varyfile if no longer present) before continuing on.

The presence of a stale Vary file would do all sorts of fun things.  =)

> +        }
> +    }
> +
> +
>      rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
>                           APR_CREATE | APR_WRITE | APR_BINARY |
>                           APR_BUFFERED | APR_EXCL, r->pool);
> @@ -616,8 +845,10 @@
>  
>      dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
>  
> +#if DISK_CACHE_DEBUG
>      ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
>                   "disk_cache: Stored headers for URL %s",  dobj->name);
> +#endif
>      return APR_SUCCESS;
>  }

Why an #if here?  -- justin

Mime
View raw message