httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Brian Pane <bp...@pacbell.net>
Subject [PATCH] Re: directory_walk performance
Date Thu, 02 Aug 2001 08:54:33 GMT
William A. Rowe, Jr. wrote:

>Brian,
>
>  see my commit to core/request.c ... I've dropped in the new directory_walk
>code.
>
>  I'd like us to optimize it for things we _know_ (e.g., it's given as a
><Directory > so skip the stat, unless FollowSymLinks isn't set, and assume
>for the remainder of the request that it _is_ a directory) and then introduce
>your optimizations.  Hopefully both branches in the same source help us keep 
>running while we keep developing.  Focus on the second directory_walk function
>since the first will be deprecated and removed when we've got it right :)
>
>Bill
>

Here's a patch that implements my pre-merge optimization to
reduce (and in some cases completely eliminate) the calls to
ap_merge_per_dir_configs.  Its impacts on directory_walk are
minor; all the real work happens in a new post-config-phase
function.

I was able to generalize the pre-merge technique to support
even wildcard directories (something that my original prototype
couldn't handle correctly).  The comments in core.c describe the
algorithm.

--Brian

Index: include/http_core.h
===================================================================
RCS file: /home/cvspublic/httpd-2.0/include/http_core.h,v
retrieving revision 1.46
diff -u -r1.46 http_core.h
--- include/http_core.h    2001/07/30 18:51:57    1.46
+++ include/http_core.h    2001/08/02 08:39:09
@@ -492,6 +492,14 @@
     char *access_name;
     apr_array_header_t *sec;
     apr_array_header_t *sec_url;
+
+    /* Pre-merged per-dir configs */
+#define NUM_PRE_MERGE_DIRS 6
+    struct {
+    const char *dirname;
+    ap_conf_vector_t *conf;
+    } pre_merged_dir_conf[1 << (NUM_PRE_MERGE_DIRS - 1)];
+
 } core_server_config;
 
 /* for http_config.c */
Index: server/core.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/core.c,v
retrieving revision 1.32
diff -u -r1.32 core.c
--- server/core.c    2001/08/01 19:15:22    1.32
+++ server/core.c    2001/08/02 08:39:11
@@ -3295,6 +3295,166 @@
     ap_set_version(pconf);
 }
 
+
+static int is_possible_predecessor(const char *dir1, const char *dir2)
+{
+    char c1, c2;
+    while ((c1 = *dir1++)) {
+    c2 = *dir2++;
+    if (c1 != c2) {
+        if ((c1 == '*') || (c2 == '*')) {
+        while ((c1 = *dir1) && (c1 != '/'))
+            dir1++;
+        while ((c2 = *dir2) && (c2 != '/'))
+            dir2++;
+        }
+        else {
+        return 0;
+        }
+    }
+    }
+    return 1;
+}
+
+
+/* Pre-merge the per-directory configurations, to avoid the
+ * overhead of doing this for each request (see directory_walk
+ * in request.c).
+ *
+ * Background:
+ *
+ * There are 'n' per-directory configurations.  The directory_walk()
+ * function scans through them from i=0 through i=n-1 and either merges
+ * 'per-dir-config[i]' into its pending request configuration or
+ * leaves that config out (depending on whether the directory name
+ * associated with per-dir-config[i] matches the requested file
+ * or not).
+ *
+ * It's possible to represent the set of merges done for a request
+ * as a vector of bits, V[0] through V[n-1].  V[i] is 1 if
+ * per-dir-config[i] is used for the requests, or 0 if it isn't.
+ * Conceptually, pre_merge_per_dir_configs() is responsible for
+ * pre-building the final configuration associated with each
+ * possible value of this bit vector.  Given this pre-merged
+ * config structure, directory_walk() can just look up the
+ * end result instead of actually doing all of the merging at
+ * request time.
+ *
+ * In the general case, it's not feasible to precompute all
+ * permutations of V, because there are O(2^n) possibilities.
+ * Thus pre_merge_per_dir_configs() computes the pre-merge
+ * for the first NUM_PRE_MERGE_DIRS directories.  It still
+ * uses O(2^m) storage, but m is constrained to a small value.
+ * As an additional space optimization, the function detects
+ * impossible permutations (e.g., if you have <Directory>
+ * blocks for /foo and /bar/*, their configs will never actually
+ * be merged) and doesn't compute the merged configs for these.
+ */
+static void pre_merge_per_dir_configs(apr_pool_t *p, server_rec *s)
+{
+    core_server_config *sconf;
+    apr_array_header_t *sec;
+    int nelts;
+    ap_conf_vector_t **elts;
+    int num_bits;
+    int i;
+    int max;
+    int mask;
+    int threshold;
+    int j;
+
+    sconf = ap_get_module_config(s->module_config, &core_module);
+    sec = sconf->sec;
+    nelts = sec->nelts;
+    elts = (ap_conf_vector_t **)sec->elts;
+
+    num_bits =
+    (sec->nelts < NUM_PRE_MERGE_DIRS) ? sec->nelts : NUM_PRE_MERGE_DIRS;
+    max = (1 << (num_bits - 1));
+
+    sconf->pre_merged_dir_conf[0].conf = s->lookup_defaults;
+    sconf->pre_merged_dir_conf[0].dirname = "";
+    mask = 1;
+    threshold = 2;
+    j = 0;
+    /* i holds the value of the vector 'V' in the
+     * algorithm described in the comments at the top
+     * of this function.  To make the code simpler, the
+     * bits are 'backwards'; the low order bit of i is V[0].
+     * sconf->pre_merged_dir_conf is a vector of 2^NUM_PRE_MERGE_DIRS
+     * elements; for any permutation P of the bits in V,
+     * sconf->pre_merged_dir_conf[P] contains the merged
+     * per-dir configuration corresponding to P.
+     */
+    for (i = 1; i < max; i++) {
+    int parent;
+    if (i == threshold) {
+        mask = threshold;
+        threshold <<= 1;
+        j++;
+    }
+
+    /* The bit vector 'V' can be thought of as a tree;
+     * the configuration for a node can be computed
+     * incrementally by merging with its parent
+     */
+    parent = i & (mask - 1);
+
+    if (sconf->pre_merged_dir_conf[parent].conf == NULL) {
+        /* If the parent node represents an unreachable
+         * state, so does the child, so don't bother
+         * computing it
+         */
+        continue;
+    }
+    if (i & mask) {
+        /* A 1 value in the new high-order bit corresponds to
+         * a permutation in which the j'th per-directory config
+         * is a match against the requested filename.  Thus we
+         * should compute the merge of the j'th config with
+         * the config stored in the parent node.  However,
+         * if the j'th config corresponds to a <Directory>
+         * path that can't possibly match a filename that
+         * matched the parent node, then we needn't waste
+         * time or memory computing it
+         */
+        core_dir_config *core_elt = ap_get_module_config(elts[j],
+                                 &core_module);
+        const char *dirname = core_elt->d;
+        const char *parent_dirname =
+        sconf->pre_merged_dir_conf[parent].dirname;
+        if (is_possible_predecessor(parent_dirname, dirname)) {
+        sconf->pre_merged_dir_conf[i].conf =
+            ap_merge_per_dir_configs(p,
+             sconf->pre_merged_dir_conf[parent].conf, elts[j]);
+        sconf->pre_merged_dir_conf[i].dirname = dirname;
+        }
+    }
+    else {
+        /* A 0 value in the new high-order bit corresponds to
+         * a permutation in which the j'th per-directory config
+         * is not matched.  Thus the merged configuration for
+         * this node is the same as that for its parent, and
+         * we can copy the pointer rather than making another
+         * copy of the merged config
+         */
+        sconf->pre_merged_dir_conf[i] = sconf->pre_merged_dir_conf[parent];
+    }
+    }
+}
+
+static void core_post_config_merge(apr_pool_t *p, apr_pool_t *plog,
+                   apr_pool_t *ptemp, server_rec *s)
+{
+    server_rec *virt;
+
+    for (virt = s->next; virt; virt = virt->next) {
+    pre_merge_per_dir_configs(p, virt);
+    }
+    pre_merge_per_dir_configs(p, s);
+}
+
+
 static void core_open_logs(apr_pool_t *pconf, apr_pool_t *plog, 
apr_pool_t *ptemp, server_rec *s)
 {
     ap_open_logs(s, pconf);
@@ -3339,6 +3499,7 @@
 static void register_hooks(apr_pool_t *p)
 {
     ap_hook_post_config(core_post_config,NULL,NULL,APR_HOOK_REALLY_FIRST);
+    
ap_hook_post_config(core_post_config_merge,NULL,NULL,APR_HOOK_REALLY_LAST);
     
ap_hook_translate_name(ap_core_translate,NULL,NULL,APR_HOOK_REALLY_LAST);
     ap_hook_open_logs(core_open_logs,NULL,NULL,APR_HOOK_MIDDLE);
     ap_hook_handler(default_handler,NULL,NULL,APR_HOOK_REALLY_LAST);
Index: server/request.c
===================================================================
RCS file: /home/cvspublic/httpd-2.0/server/request.c,v
retrieving revision 1.18
diff -u -r1.18 request.c
--- server/request.c    2001/08/01 11:59:55    1.18
+++ server/request.c    2001/08/02 08:39:12
@@ -717,6 +717,7 @@
     int num_sec = sconf->sec->nelts;
     int sec;
     unsigned int seg;
+    unsigned int bitmask;
     int res;
     ap_conf_vector_t *entry_config;
     ap_conf_vector_t *this_conf;
@@ -814,9 +815,12 @@
      * seg keeps track of which segment we've copied.
      * sec keeps track of which section we're on, since sections are
      *     ordered by number of segments. See core_reorder_directories
+     * bitmask keeps track of which per-dir configs we've merged...
+     *     given a list of 'n'
      */
     seg = 1;
     sec = 0;
+    bitmask = 0;
     do {
         int overrides_here;
         core_dir_config *core_dir = ap_get_module_config(per_dir_defaults,
@@ -852,9 +856,23 @@
                 this_conf = entry_config;
 
             if (this_conf) {
-                per_dir_defaults = ap_merge_per_dir_configs(r->pool,
-                                                            
per_dir_defaults,
-                                                            this_conf);
+        if (sec < NUM_PRE_MERGE_DIRS) {
+            bitmask |= (1 << sec);
+            if (sconf->pre_merged_dir_conf[bitmask].conf) {
+            per_dir_defaults =
+                sconf->pre_merged_dir_conf[bitmask].conf;
+            }
+            else {
+            per_dir_defaults =
+                ap_merge_per_dir_configs(r->pool, per_dir_defaults,
+                             this_conf);
+            }
+        }
+        else {
+            per_dir_defaults =
+            ap_merge_per_dir_configs(r->pool, per_dir_defaults,
+                         this_conf);
+        }
                 core_dir = ap_get_module_config(per_dir_defaults,
                                                 &core_module);
             }
@@ -1008,9 +1026,23 @@
 
         if (entry_core->r) {
             if (!ap_regexec(entry_core->r, r->filename, 0, NULL, 
REG_NOTEOL)) {
-                per_dir_defaults = ap_merge_per_dir_configs(r->pool,
-                                                            
per_dir_defaults,
-                                                            entry_config);
+        if (sec < NUM_PRE_MERGE_DIRS) {
+            bitmask |= (1 << sec);
+            if (sconf->pre_merged_dir_conf[bitmask].conf) {
+            per_dir_defaults =
+                sconf->pre_merged_dir_conf[bitmask].conf;
+            }
+            else {
+            per_dir_defaults =
+                ap_merge_per_dir_configs(r->pool, per_dir_defaults,
+                             entry_config);
+            }
+        }
+        else {
+            per_dir_defaults =
+            ap_merge_per_dir_configs(r->pool, per_dir_defaults,
+                         entry_config);
+        }
             }
         }
     }


Mime
View raw message