httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Davi Arnaut <d...@haxent.com.br>
Subject [PATCH] revamped mod_disk_cache directory structure
Date Thu, 20 Jul 2006 14:58:01 GMT
Hi,

This patch converts the mod_disk_cache cache directory structure to a
uniformly distributed two level hierarchy. The admin specifies the number
of level-1 and level-2 directories and the files are scattered across
the level-2 directories.

Also, with this patch it is possible to designate directories to separate
partitions because the temporary files are created on the destination
directory.

For example, running Apache/proxy+cache for a small network:

[root@cache1 cache]# sh files-per-directory.sh 
dir: 00/ subs: 139 files: 632 size: 4.8M
dir: 01/ subs: 156 files: 765 size: 5.7M
dir: 02/ subs: 144 files: 626 size: 4.8M
dir: 03/ subs: 160 files: 714 size: 6.1M
dir: 04/ subs: 169 files: 820 size: 5.9M
dir: 05/ subs: 131 files: 590 size: 4.1M
dir: 06/ subs: 148 files: 677 size: 5.3M
dir: 07/ subs: 142 files: 644 size: 5.8M
dir: 08/ subs: 148 files: 749 size: 5.8M
dir: 09/ subs: 158 files: 711 size: 6.3M
dir: 0A/ subs: 146 files: 666 size: 5.1M
dir: 0B/ subs: 157 files: 701 size: 5.1M
dir: 0C/ subs: 157 files: 671 size: 5.2M
dir: 0D/ subs: 157 files: 711 size: 5.7M
dir: 0E/ subs: 149 files: 704 size: 5.6M
dir: 0F/ subs: 158 files: 742 size: 5.8M

--
Davi Arnaut

Index: modules/cache/cache_util.c
===================================================================
--- modules/cache/cache_util.c	(revision 423984)
+++ modules/cache/cache_util.c	(working copy)
@@ -19,6 +19,7 @@
 #include "mod_cache.h"
 
 #include <ap_provider.h>
+#include <util_md5.h>
 
 /* -------------------------------------------------------------- */
 
@@ -489,54 +490,31 @@
     y[sizeof(j) * 2] = '\0';
 }
 
-static void cache_hash(const char *it, char *val, int ndepth, int nlength)
+static unsigned int cdb_string_hash(const char *str)
 {
-    apr_md5_ctx_t context;
-    unsigned char digest[16];
-    char tmp[22];
-    int i, k, d;
-    unsigned int x;
-    static const char enc_table[64] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
+    unsigned int hash = 5381;
 
-    apr_md5_init(&context);
-    apr_md5_update(&context, (const unsigned char *) it, strlen(it));
-    apr_md5_final(digest, &context);
+    while (*str)
+        hash = 33 * hash + *str++;
 
-    /* encode 128 bits as 22 characters, using a modified uuencoding
-     * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
-     * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
-     */
-    for (i = 0, k = 0; i < 15; i += 3) {
-        x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
-        tmp[k++] = enc_table[x >> 18];
-        tmp[k++] = enc_table[(x >> 12) & 0x3f];
-        tmp[k++] = enc_table[(x >> 6) & 0x3f];
-        tmp[k++] = enc_table[x & 0x3f];
-    }
-
-    /* one byte left */
-    x = digest[15];
-    tmp[k++] = enc_table[x >> 2];    /* use up 6 bits */
-    tmp[k++] = enc_table[(x << 4) & 0x3f];
-
-    /* now split into directory levels */
-    for (i = k = d = 0; d < ndepth; ++d) {
-        memcpy(&val[i], &tmp[k], nlength);
-        k += nlength;
-        val[i + nlength] = '/';
-        i += nlength + 1;
-    }
-    memcpy(&val[i], &tmp[k], 22 - k);
-    val[i + 22 - k] = '\0';
+    return hash;
 }
 
-CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels,
-                                            int dirlength, const char *name)
+CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, unsigned int L1,
+                                            unsigned int L2, const char *name)
 {
-    char hashfile[66];
-    cache_hash(name, hashfile, dirlevels, dirlength);
-    return apr_pstrdup(p, hashfile);
+    char *key;
+    char *md5_hash;
+    unsigned int cdb_hash;
+
+    md5_hash = ap_md5_binary(p, (unsigned char *) name, (int) strlen(name));
+
+    cdb_hash = cdb_string_hash(md5_hash) / L2;
+
+    key = apr_psprintf(p, "%02X/%02X/%s", (cdb_hash / L2) % L1,
+                       cdb_hash % L2, md5_hash);
+
+    return key;
 }
 
 /* Create a new table consisting of those elements from an input
Index: modules/cache/mod_cache.h
===================================================================
--- modules/cache/mod_cache.h	(revision 423984)
+++ modules/cache/mod_cache.h	(working copy)
@@ -274,8 +274,8 @@
 
 CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x);
 CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y);
-CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, int dirlevels, 
-                                             int dirlength, 
+CACHE_DECLARE(char *) ap_cache_generate_name(apr_pool_t *p, unsigned int L1,
+                                             unsigned int L2,
                                              const char *name);
 CACHE_DECLARE(cache_provider_list *)ap_cache_get_providers(request_rec *r, cache_server_conf
*conf, apr_uri_t uri);
 CACHE_DECLARE(int) ap_cache_liststr(apr_pool_t *p, const char *list,
Index: modules/cache/mod_disk_cache.c
===================================================================
--- modules/cache/mod_disk_cache.c	(revision 423984)
+++ modules/cache/mod_disk_cache.c	(working copy)
@@ -66,17 +66,38 @@
  * Local static functions
  */
 
+static apr_status_t disk_mktemp(apr_file_t **fp, const char *dest, char **tempfile,
+                                apr_int32_t flags, apr_size_t cache_root_len,
+                                apr_pool_t *p)
+{
+    apr_status_t rv;
+    struct iovec iov[2];
+
+    iov[0].iov_base = (char *) dest;
+    iov[0].iov_len  = cache_root_len + DIR_LEVELS_LEN;
+
+    iov[1].iov_base = AP_TEMPFILE;
+    iov[1].iov_len  = sizeof AP_TEMPFILE;
+
+    *tempfile = apr_pstrcatv(p, iov, 2, NULL);
+
+    rv = apr_file_mktemp(fp, *tempfile, flags, p);
+
+    return rv;
+}
+
 static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
                          disk_cache_object_t *dobj, const char *name)
 {
     if (!dobj->hashfile) {
-        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
-                                                conf->dirlength, name);
+        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1,
+                                                conf->dirlevel2, name);
     }
 
     if (dobj->prefix) {
         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
-                           dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
+                           dobj->hashfile + DIR_LEVELS_LEN,
+                           CACHE_HEADER_SUFFIX, NULL);
      }
      else {
         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -88,13 +109,14 @@
                        disk_cache_object_t *dobj, const char *name)
 {
     if (!dobj->hashfile) {
-        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
-                                                conf->dirlength, name);
+        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevel1,
+                                                conf->dirlevel2, name);
     }
 
     if (dobj->prefix) {
         return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX, "/",
-                           dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
+                           dobj->hashfile + DIR_LEVELS_LEN,
+                           CACHE_DATA_SUFFIX, NULL);
      }
      else {
         return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
@@ -359,7 +381,6 @@
     dobj->root_len = conf->cache_root_len;
     dobj->datafile = data_file(r->pool, conf, dobj, key);
     dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     return OK;
 }
@@ -467,7 +488,6 @@
     dobj->key = nkey;
     dobj->name = key;
     dobj->datafile = data_file(r->pool, conf, dobj, nkey);
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
 
     /* Open the data file */
     flags = APR_READ|APR_BINARY;
@@ -843,9 +863,9 @@
 
             mkdir_structure(conf, dobj->hdrsfile, r->pool);
 
-            rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
-                                 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
-                                 r->pool);
+            rv = disk_mktemp(&dobj->tfd, dobj->hdrsfile, &dobj->tempfile,
+                             APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
+                             conf->cache_root_len, r->pool);
 
             if (rv != APR_SUCCESS) {
                 return rv;
@@ -876,7 +896,6 @@
                 return rv;
             }
 
-            dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE,
NULL);
             tmp = regen_key(r->pool, r->headers_in, varray, dobj->name);
             dobj->prefix = dobj->hdrsfile;
             dobj->hashfile = NULL;
@@ -885,11 +904,10 @@
         }
     }
 
+    rv = disk_mktemp(&dobj->hfd, dobj->hdrsfile, &dobj->tempfile,
+                     APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED |
+                     APR_EXCL, conf->cache_root_len, r->pool);
 
-    rv = apr_file_mktemp(&dobj->hfd, dobj->tempfile,
-                         APR_CREATE | APR_WRITE | APR_BINARY |
-                         APR_BUFFERED | APR_EXCL, r->pool);
-
     if (rv != APR_SUCCESS) {
         return rv;
     }
@@ -969,8 +987,6 @@
         return rv;
     }
 
-    dobj->tempfile = apr_pstrcat(r->pool, conf->cache_root, AP_TEMPFILE, NULL);
-
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                  "disk_cache: Stored headers for URL %s",  dobj->name);
     return APR_SUCCESS;
@@ -989,9 +1005,10 @@
      * in file_cache_el_final().
      */
     if (!dobj->tfd) {
-        rv = apr_file_mktemp(&dobj->tfd, dobj->tempfile,
-                             APR_CREATE | APR_WRITE | APR_BINARY |
-                             APR_BUFFERED | APR_EXCL, r->pool);
+        rv = disk_mktemp(&dobj->tfd, dobj->datafile, &dobj->tempfile,
+                         APR_CREATE | APR_WRITE | APR_BINARY |
+                         APR_BUFFERED | APR_EXCL, conf->cache_root_len,
+                         r->pool);
         if (rv != APR_SUCCESS) {
             return rv;
         }
@@ -1072,8 +1089,8 @@
     disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
 
     /* XXX: Set default values */
-    conf->dirlevels = DEFAULT_DIRLEVELS;
-    conf->dirlength = DEFAULT_DIRLENGTH;
+    conf->dirlevel1 = DEFAULT_DIRLEVEL1;
+    conf->dirlevel2 = DEFAULT_DIRLEVEL2;
     conf->maxfs = DEFAULT_MAX_FILE_SIZE;
     conf->minfs = DEFAULT_MIN_FILE_SIZE;
 
@@ -1105,33 +1122,22 @@
  * filename = "/key % prime1 /key %prime2/key %prime3"
  */
 static const char
-*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
+*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg1,
+                     const char *arg2)
 {
     disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
                                                  &disk_cache_module);
-    int val = atoi(arg);
-    if (val < 1)
+    int val1 = atoi(arg1);
+    int val2 = atoi(arg2);
+
+    if (val1 < 1 || val2 < 1)
         return "CacheDirLevels value must be an integer greater than 0";
-    if (val * conf->dirlength > CACHEFILE_LEN)
-        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
-    conf->dirlevels = val;
-    return NULL;
-}
-static const char
-*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
-{
-    disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
-                                                 &disk_cache_module);
-    int val = atoi(arg);
-    if (val < 1)
-        return "CacheDirLength value must be an integer greater than 0";
-    if (val * conf->dirlevels > CACHEFILE_LEN)
-        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
 
-    conf->dirlength = val;
+    conf->dirlevel1 = val1;
+    conf->dirlevel2 = val2;
+
     return NULL;
 }
-
 static const char
 *set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
 {
@@ -1153,10 +1159,8 @@
 {
     AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
                  "The directory to store cache files"),
-    AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
+    AP_INIT_TAKE2("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
                   "The number of levels of subdirectories in the cache"),
-    AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
-                  "The number of characters in subdirectory names"),
     AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF,
                   "The minimum file size to cache a document"),
     AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF,
Index: modules/cache/mod_disk_cache.h
===================================================================
--- modules/cache/mod_disk_cache.h	(revision 423984)
+++ modules/cache/mod_disk_cache.h	(working copy)
@@ -24,6 +24,8 @@
 #define VARY_FORMAT_VERSION 3
 #define DISK_FORMAT_VERSION 4
 
+#define DIR_LEVELS_LEN      6
+
 #define CACHE_HEADER_SUFFIX ".header"
 #define CACHE_DATA_SUFFIX   ".data"
 #define CACHE_VDIR_SUFFIX   ".vary"
@@ -78,16 +80,16 @@
  */
 /* TODO: Make defaults OS specific */
 #define CACHEFILE_LEN 20        /* must be less than HASH_LEN/2 */
-#define DEFAULT_DIRLEVELS 3
-#define DEFAULT_DIRLENGTH 2
+#define DEFAULT_DIRLEVEL1 16
+#define DEFAULT_DIRLEVEL2 256
 #define DEFAULT_MIN_FILE_SIZE 1
 #define DEFAULT_MAX_FILE_SIZE 1000000
 
 typedef struct {
     const char* cache_root;
     apr_size_t cache_root_len;
-    int dirlevels;               /* Number of levels of subdirectories */
-    int dirlength;               /* Length of subdirectory names */
+    unsigned int dirlevel1;      /* Number of level 1 directories      */
+    unsigned int dirlevel2;      /* Number of level 2 subdirectories   */
     apr_size_t minfs;            /* minumum file size for cached files */
     apr_size_t maxfs;            /* maximum file size for cached files */
 } disk_cache_conf;

Mime
View raw message