httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Randy Terbush <ra...@zyzzyva.com>
Subject Re: [PATCH] directory_walk and relatives performance improvements
Date Fri, 07 Feb 1997 04:39:20 GMT

Nice work Dean. +1


> I decided to do some gprofs of the server.  I'm pretty sure that when
> Randy did it before (it was Randy right?) we got bogus results because
> when a fork() happens the child inherits all the parent's current profile,
> so lots of stuff get overcounted.  So for my profiling I chose just
> to run with -X.  My test set was 1000 requests from the access log on
> www.arctic.org ... which is mostly static files with a few SSI.
> 
> The profile before optimizing:
> <http://www.arctic.org/~dgaudet/apache/gprof.pre-optimize>
> 
> Notice the heavy use of pstrcat() by make_full_path().  A quick look at
> directory_walk() shows that the call to make_full_path() is to initialize
> an auto variable... a variable which is only used in one obscure if()
> much further down into the function.  auto initializers are evil!
> I cleaned this one, and a few others while I was at it.
> 
> Notice the heavy use of is_matchexp() inside directory_walk() (and its
> relatives).  It's being called on a string that is static after config
> time, so I added the member d_is_matchexp to core_dir_config and set it
> appropriately at config time.
> 
> Then I generated another profile:
> <http://www.arctic.org/~dgaudet/apache/gprof.optimize>
> 
> Massive time spent in merge_core_dir_configs().  After eyeballing it I
> noticed the section that merges the response_code_strings[].  There are now
> 38 response codes (there were only 10 in 1.1), so this is expensive.  I'm
> guessing that most people don't modify the response codes on a per
> directory basis... so I optimized it as follows:
> 
>     char *response_code_strings[RESPONSE_CODES];
> 
> became:
> 
>     char **response_code_strings;
> 
> And it isn't allocated until one of them is set.  This let me speed up
> the merge.
> 
> The final profile is at:
> <http://www.arctic.org/~dgaudet/apache/gprof.optimize2>
> 
> Some areas I looked at but didn't touch include
> 
> - tweaking directory_walk() to avoid repeated calls to make_dirstr()
>     (and its associated palloc()s) -- do one allocation at the beginning
>     and screw with the string as you go
> 
> - implementing a simple hash for tables -- no buckets, just a bit vector.
>     If the bit is 0 then you can short circuit the scan through the
>     table when adding/merging.
> 
> - instead of scanning the array of modules each time a handler needs to be
>     invoked, build linked lists of only the non-NULL handlers at config time.
>     There are an awful lot of NULLs in the array which are a waste of time.
>     run_method is up there in the final profile... so this should be a win.
> 
> I wish I had real profiling tools... like something that would give me
> sampling information on a line basis.  (but linux tools suck)
> 
> At any rate, here is the patch.
> 
> Dean
> 
> Index: http_core.c
> ===================================================================
> RCS file: /export/home/cvs/apache/src/http_core.c,v
> retrieving revision 1.62
> diff -c -3 -r1.62 http_core.c
> *** http_core.c	1997/02/01 22:03:36	1.62
> --- http_core.c	1997/02/02 08:16:45
> ***************
> *** 83,88 ****
> --- 83,90 ----
>     
>       if (!dir || dir[strlen(dir) - 1] == '/') conf->d = dir;
>       else conf->d = pstrcat (a, dir, "/", NULL);
> +     conf->d_is_matchexp = conf->d ? is_matchexp( conf->d ) : 0;
> + 
>   
>       conf->opts = dir ? OPT_UNSET : OPT_ALL;
>       conf->opts_add = conf->opts_remove = OPT_NONE;
> ***************
> *** 120,125 ****
> --- 122,128 ----
>       memcpy ((char *)conf, (const char *)base, sizeof(core_dir_config));
>       
>       conf->d = new->d;
> +     conf->d_is_matchexp = new->d_is_matchexp;
>       conf->r = new->r;
>       
>       if (new->opts != OPT_UNSET) conf->opts = new->opts;
> ***************
> *** 133,141 ****
>       if (new->auth_name) conf->auth_name = new->auth_name;
>       if (new->requires) conf->requires = new->requires;
>   
> !     for (i = 0; i < RESPONSE_CODES; ++i)
> !         if (new->response_code_strings[i] != NULL)
> ! 	   conf->response_code_strings[i] = new->response_code_strings[i];
>       if (new->hostname_lookups != 2)
>   	conf->hostname_lookups = new->hostname_lookups;
>       if ((new->do_rfc1413 & 2) == 0) conf->do_rfc1413 = new->do_rfc1413;
> --- 136,153 ----
>       if (new->auth_name) conf->auth_name = new->auth_name;
>       if (new->requires) conf->requires = new->requires;
>   
> !     if( new->response_code_strings ) {
> ! 	if( conf->response_code_strings == NULL ) {
> ! 	    conf->response_code_strings = palloc(a,
> ! 		sizeof(*conf->response_code_strings) * RESPONSE_CODES );
> ! 	    memcpy( conf->response_code_strings, new->response_code_strings,
> ! 		sizeof(*conf->response_code_strings) * RESPONSE_CODES );
> ! 	} else {
> ! 	    for (i = 0; i < RESPONSE_CODES; ++i)
> ! 		if (new->response_code_strings[i] != NULL)
> ! 		conf->response_code_strings[i] = new->response_code_strings[i];
> ! 	}
> !     }
>       if (new->hostname_lookups != 2)
>   	conf->hostname_lookups = new->hostname_lookups;
>       if ((new->do_rfc1413 & 2) == 0) conf->do_rfc1413 = new->do_rfc1413;
> ***************
> *** 299,304 ****
> --- 311,319 ----
>       core_dir_config *conf = 
>         (core_dir_config *)get_module_config(r->per_dir_config, &core_module);

>   
> +     if( conf->response_code_strings == NULL ) {
> + 	return NULL;
> +     }
>       return conf->response_code_strings[error_index];
>   }
>   
> ***************
> *** 435,440 ****
> --- 450,459 ----
>                   
>       /* Store it... */
>   
> +     if( conf->response_code_strings == NULL ) {
> + 	conf->response_code_strings = pcalloc(cmd->pool,
> + 	    sizeof(*conf->response_code_strings) * RESPONSE_CODES );
> +     }
>       conf->response_code_strings[index_number] = pstrdup (cmd->pool, line);
>   
>       return NULL;
> ***************
> *** 664,669 ****
> --- 683,689 ----
>   
>       conf = (core_dir_config *)get_module_config(new_url_conf, &core_module);
>       conf->d = pstrdup(cmd->pool, cmd->path);	/* No mangling, please */
> +     conf->d_is_matchexp = is_matchexp( conf->d );
>       conf->r = r;
>   
>       add_per_url_conf (cmd->server, new_url_conf);
> ***************
> *** 714,719 ****
> --- 734,740 ----
>   
>       conf = (core_dir_config *)get_module_config(new_file_conf, &core_module);
>       conf->d = pstrdup(cmd->pool, cmd->path);
> +     conf->d_is_matchexp = is_matchexp( conf->d );
>       conf->r = r;
>   
>       add_file_conf (c, new_file_conf);
> Index: http_core.h
> ===================================================================
> RCS file: /export/home/cvs/apache/src/http_core.h,v
> retrieving revision 1.18
> diff -c -3 -r1.18 http_core.h
> *** http_core.h	1997/01/01 18:10:18	1.18
> --- http_core.h	1997/02/02 08:16:45
> ***************
> *** 128,133 ****
> --- 128,139 ----
>   
>   typedef struct {
>       char *d;
> +     /* since is_matchexp(conf->d) was being called so frequently in
> +      * directory_walk() and its relatives, this field was created and
> +      * is set to the result of that call.
> +      */
> +     int d_is_matchexp;
> + 
>       allow_options_t opts;
>       allow_options_t opts_add;
>       allow_options_t opts_remove;
> ***************
> *** 150,158 ****
>       int content_md5;
>       
>       /* Custom response config. These can contain text or a URL to redirect to.
>        */
>     
> !     char *response_code_strings[RESPONSE_CODES];
>   
>       /* Hostname resolution etc */
>       int hostname_lookups;
> --- 156,167 ----
>       int content_md5;
>       
>       /* Custom response config. These can contain text or a URL to redirect to.
> +      * if response_code_strings is NULL then there are none in the config,
> +      * if it's not null then it's allocated to sizeof(char*)*RESPONSE_CODES.
> +      * This lets us do quick merges in merge_core_dir_configs().
>        */
>     
> !     char **response_code_strings;
>   
>       /* Hostname resolution etc */
>       int hostname_lookups;
> Index: http_request.c
> ===================================================================
> RCS file: /export/home/cvs/apache/src/http_request.c,v
> retrieving revision 1.40
> diff -c -3 -r1.40 http_request.c
> *** http_request.c	1997/01/25 15:44:39	1.40
> --- http_request.c	1997/02/02 08:16:46
> ***************
> *** 286,292 ****
>   		if (!regexec(entry_core->r, test_filename, 0, NULL, 0))
>   		    this_conf = entry_config;
>   	    }
> ! 	    else if (is_matchexp(entry_dir)) {
>   		if (!strcmp_match(test_filename, entry_dir))
>   		    this_conf = entry_config;
>   	    }
> --- 286,292 ----
>   		if (!regexec(entry_core->r, test_filename, 0, NULL, 0))
>   		    this_conf = entry_config;
>   	    }
> ! 	    else if (entry_core->d_is_matchexp) {
>   		if (!strcmp_match(test_filename, entry_dir))
>   		    this_conf = entry_config;
>   	    }
> ***************
> *** 319,337 ****
>       for (i = 1; i <= num_dirs; ++i) {
>           core_dir_config *core_dir =
>   	  (core_dir_config *)get_module_config(per_dir_defaults, &core_module);
> ! 	int allowed_here = core_dir->opts;
> ! 	int overrides_here = core_dir->override;
>           void *this_conf = NULL, *htaccess_conf = NULL;
>   	char *this_dir = make_dirstr (r->pool, test_filename, i);
> - 	char *config_name = make_full_path(r->pool, this_dir,
> - 					   sconf->access_name);
>   	int j;
>         
>   	/* Do symlink checks first, because they are done with the
>   	 * permissions appropriate to the *parent* directory...
>   	 */
>   	
> ! 	if ((res = check_symlinks (this_dir, allowed_here)))
>   	{
>   	    log_reason("Symbolic link not allowed", this_dir, r);
>   	    return res;
> --- 319,334 ----
>       for (i = 1; i <= num_dirs; ++i) {
>           core_dir_config *core_dir =
>   	  (core_dir_config *)get_module_config(per_dir_defaults, &core_module);
> ! 	int overrides_here;
>           void *this_conf = NULL, *htaccess_conf = NULL;
>   	char *this_dir = make_dirstr (r->pool, test_filename, i);
>   	int j;
>         
>   	/* Do symlink checks first, because they are done with the
>   	 * permissions appropriate to the *parent* directory...
>   	 */
>   	
> ! 	if ((res = check_symlinks (this_dir, core_dir->opts)))
>   	{
>   	    log_reason("Symbolic link not allowed", this_dir, r);
>   	    return res;
> ***************
> *** 363,390 ****
>   		    this_conf = entry_config;
>   		}
>   	    }
> ! 	    else if (is_matchexp(entry_dir) &&
>   		     !strcmp_match(this_dir, entry_dir)) {
>   		sec[j] = NULL;	
>   	        this_conf = entry_config;
>   	    }
>   	    else if (!strcmp (this_dir, entry_dir))
>   	        this_conf = entry_config;
> - 	}
>   
> ! 	if (this_conf)
> ! 	{
> ! 	    per_dir_defaults =
> ! 	        merge_per_dir_configs (r->pool, per_dir_defaults, this_conf);
> ! 	    core_dir =(core_dir_config *)get_module_config(per_dir_defaults,
>   							   &core_module);
>   	}
>   	overrides_here = core_dir->override;
>   
>   	/* If .htaccess files are enabled, check for one.
>   	 */
>   	
>   	if (overrides_here) {
>   	    res = parse_htaccess (&htaccess_conf, r, overrides_here,
>   				  this_dir, config_name);
>   	    if (res) return res;
> --- 360,390 ----
>   		    this_conf = entry_config;
>   		}
>   	    }
> ! 	    else if (entry_core->d_is_matchexp &&
>   		     !strcmp_match(this_dir, entry_dir)) {
>   		sec[j] = NULL;	
>   	        this_conf = entry_config;
>   	    }
>   	    else if (!strcmp (this_dir, entry_dir))
>   	        this_conf = entry_config;
>   
> !           if (this_conf)
> !           {
> !               per_dir_defaults =
> !                   merge_per_dir_configs (r->pool, per_dir_defaults, this_conf);
> !               core_dir =(core_dir_config *)get_module_config(per_dir_defaults,
>   							   &core_module);
> +           }
>   	}
> + 
>   	overrides_here = core_dir->override;
>   
>   	/* If .htaccess files are enabled, check for one.
>   	 */
>   	
>   	if (overrides_here) {
> + 	    char *config_name = make_full_path(r->pool, this_dir,
> + 					   sconf->access_name);
>   	    res = parse_htaccess (&htaccess_conf, r, overrides_here,
>   				  this_dir, config_name);
>   	    if (res) return res;
> ***************
> *** 456,462 ****
>   		if (!regexec(entry_core->r, test_location, 0, NULL, 0))
>   		    this_conf = entry_config;
>   	    }
> ! 	    else if (is_matchexp(entry_url)) {
>   		if (!strcmp_match(test_location, entry_url))
>   		    this_conf = entry_config;
>   	    }
> --- 456,462 ----
>   		if (!regexec(entry_core->r, test_location, 0, NULL, 0))
>   		    this_conf = entry_config;
>   	    }
> ! 	    else if( entry_core->d_is_matchexp ) {
>   		if (!strcmp_match(test_location, entry_url))
>   		    this_conf = entry_config;
>   	    }
> ***************
> *** 518,524 ****
>   		if (!regexec(entry_core->r, test_file, 0, NULL, 0))
>   		    this_conf = entry_config;
>   	    }
> ! 	    else if (is_matchexp(entry_file)) {
>   		if (!strcmp_match(test_file, entry_file))
>   		    this_conf = entry_config;
>   	    }
> --- 518,524 ----
>   		if (!regexec(entry_core->r, test_file, 0, NULL, 0))
>   		    this_conf = entry_config;
>   	    }
> ! 	    else if ( entry_core->d_is_matchexp ) {
>   		if (!strcmp_match(test_file, entry_file))
>   		    this_conf = entry_config;
>   	    }




Mime
View raw message