httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Dean Gaudet <dgau...@arctic.org>
Subject [PATCH] less than 20 syscalls for a static request
Date Thu, 14 Aug 1997 09:53:18 GMT
Ok yeah it's me, performance freak, again.  Remember the last traces I
sent around?  The best was 29 syscalls for a static request.  But a lot of
them were calls to alarm()... which really sucks because the majority of
requests don't ever time out, so it's a waste to be telling the system to
change your alarm.  I have a way around it. 

(To get to this point you need "Options +FollowSymLinks", "HostnameLookups
Off", "AllowOverride none", "Rule STATUS=no", mod_mmap, and the latest code.) 

I define the notion of "virtual time".  A child moves forward in virtual
time whenever it updates its scoreboard entry, or when it changes its
timeout (via hard/soft/kill_timeout).  Instead of calling alert() to set a
timeout, the child just notes the timeout_len in its scoreboard.

The parent every second during scoreboard maintenance compares the child's
current vtime versus its last observed vtime.  If it has changed then the
parent stores the time(0) into the child's scoreboard.  If there is no change
the parent compares the time of the last change + the timeout_length against
the current time ... and sends a SIGALRM if it has expired.

That is to say, if the child doesn't go forward in vtime in timeout_len
real seconds +/- scoreboard maintenance interval then it is sent a SIGALRM.
Which is pretty much what alarm(timeout_len) would do.

This doesn't work at all with scoreboard files.  I was careful in the
ordering of various operations to ensure that even on SMP systems this
behaves well.  The data structure is just nice enough that multiple cpus
can whack on it without locking.

And without further ado, here is Apache in 22 syscalls:

accept(15, {sin_family=AF_INET, sin_port=htons(1361), sin_addr=inet_addr("127.0.0.1")}, [16])
= 3
flock(18, LOCK_UN)                      = 0
sigaction(SIGUSR1, {SIG_IGN}, {0x804c1b8, [], SA_INTERRUPT}) = 0
getsockname(3, {sin_family=AF_INET, sin_port=htons(8080), sin_addr=inet_addr("127.0.0.1")},
[16]) = 0
setsockopt(3, IPPROTO_TCP1, [1], 4)     = 0
read(3, "GET /apache_pb.gif HTTP/1.0\n\n\n"..., 4096) = 30
sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}) = 0
time(NULL)                              = 871551872
stat("/home/dgaudet/ap/apache3/htdocs/apache_pb.gif", {st_mode=S_IFREG|0664, st_size=2326,
...}) = 0
open("/home/dgaudet/ap/apache3/htdocs/apache_pb.gif", O_RDONLY) = 4
mmap(0, 2326, PROT_READ, MAP_PRIVATE, 4, 0) = 0x40008000
close(4)                                = 0
time(NULL)                              = 871551872
write(16, "127.0.0.1 - - [14/Aug/1997:02:44"..., 82) = 82
write(3, "HTTP/1.1 200 OK\r\nDate: Thu, 14"..., 2570) = 2570
shutdown(3, 1 /* send */)               = 0
oldselect(4, [3], NULL, [3], {2, 0})    = 1 (in [3], left {2, 0})
read(3, "", 2048)                       = 0
close(3)                                = 0
sigaction(SIGUSR1, {0x804c1b8, [], SA_INTERRUPT}, {SIG_IGN}) = 0
munmap(0x40008000, 2326)                = 0
flock(18, LOCK_EX)                      = 0

So I said I could get it in under 20 syscalls.  The way to do that is
to eliminate all the SIGUSR1 manipulations and replace them with a global
usr1 state variable, leaving the handler active at all times.  I haven't
done this yet because I'm not 100% sure that we've always required all of
the code to expect an EINTR ... code which runs under timeouts certainly
expects it.  But other code might not.

This is pretty much the minimum.  I suppose it could go down to 17
syscalls if mmap wasn't used, but mmap is a bigger win.  It could go down
to 16 if log writes were buffered.  Very recent linux kernel patches
implement SO_LINGER properly, removing another 3 syscalls (and adding
one more) yielding 15.  One of the time() calls could be eliminated if
the log were to use the request_time.  And the flocks could be removed
on some archs.  Yielding 12 syscalls.  But I think I'll be happy with 19 :)

Dean

Index: core/buff.c
===================================================================
RCS file: /export/home/cvs/apachen/src/core/buff.c,v
retrieving revision 1.43
diff -u -r1.43 buff.c
--- buff.c	1997/08/08 08:00:18	1.43
+++ buff.c	1997/08/14 09:31:13
@@ -474,7 +474,7 @@
     fd_set fds;
     struct timeval tv;
 
-    if (fb->incnt > 0) {
+    if (fb->incnt > 0 || fb->outcnt == 0) {
 	return;
     }
     /* test for a block */
Index: core/http_main.c
===================================================================
RCS file: /export/home/cvs/apachen/src/core/http_main.c,v
retrieving revision 1.199
diff -u -r1.199 http_main.c
--- http_main.c	1997/08/13 08:37:18	1.199
+++ http_main.c	1997/08/14 09:31:22
@@ -238,6 +238,11 @@
 pool *ptrans;			/* Pool for per-transaction stuff */
 
 int APACHE_TLS my_pid;		/* it seems silly to call getpid all the time */
+#ifndef MULTITHREAD
+static int my_child_num;
+#endif
+
+static scoreboard *scoreboard_image = NULL;
 
 /* small utility macros to make things easier to read */
 
@@ -542,7 +547,16 @@
     if(x) {
 	alarm_fn = fn;
     }
+#ifndef OPTIMIZE_TIMEOUTS
     old = alarm(x);
+#else
+    /* Just note the timeout in our scoreboard, no need to call the system.
+     * We also note that the virtual time has gone forward.
+     */
+    old = scoreboard_image->servers[my_child_num].timeout_len;
+    scoreboard_image->servers[my_child_num].timeout_len = x;
+    ++scoreboard_image->servers[my_child_num].cur_vtime;
+#endif
 #endif
     return(old);
 }
@@ -800,7 +814,6 @@
  */
 #undef HAVE_MMAP
 #define HAVE_MMAP 1
-static scoreboard *scoreboard_image = NULL;
 
 void reinit_scoreboard (pool *p)
 {
@@ -821,7 +834,6 @@
 
 #else /* MULTITHREAD */
 #if defined(HAVE_MMAP)
-static scoreboard *scoreboard_image=NULL;
 
 static void setup_shared_mem(void)
 {
@@ -927,7 +939,6 @@
 }
 
 #elif defined(HAVE_SHMGET)
-static scoreboard *scoreboard_image=NULL;
 static key_t shmkey = IPC_PRIVATE;
 static int shmid = -1;
 
@@ -1024,9 +1035,7 @@
 }
 
 #else
-#define SCOREBOARD_FILE
 static scoreboard _scoreboard_image;
-static scoreboard *scoreboard_image=&_scoreboard_image;
 static int scoreboard_fd;
 
 /* XXX: things are seriously screwed if we ever have to do a partial
@@ -1078,6 +1087,7 @@
     memset(scoreboard_image, 0, SCOREBOARD_SIZE);
     scoreboard_image->global.exit_generation=exit_gen;
 #else
+    scoreboard_image=&_scoreboard_image;
     scoreboard_fname = server_root_relative (p, scoreboard_fname);
 
     scoreboard_fd = popenf(p, scoreboard_fname, O_CREAT|O_BINARY|O_RDWR, 0644);
@@ -1164,10 +1174,7 @@
 static inline void put_scoreboard_info(int child_num,
     short_score *new_score_rec)
 { 
-#ifndef SCOREBOARD_FILE
-    memcpy(&scoreboard_image->servers[child_num], new_score_rec,
-	   sizeof(short_score));
-#else 
+#ifdef SCOREBOARD_FILE
     lseek(scoreboard_fd, (long)child_num * sizeof(short_score), 0);
     force_write(scoreboard_fd, new_score_rec, sizeof(short_score));
 #endif
@@ -1176,48 +1183,53 @@
 int update_child_status (int child_num, int status, request_rec *r)
 {
     int old_status;
-    short_score new_score_rec;
+    short_score *ss;
 
     if (child_num < 0)
 	return -1;
     
     sync_scoreboard_image();
-    new_score_rec = scoreboard_image->servers[child_num];
-    old_status = new_score_rec.status;
-    new_score_rec.x.pid = my_pid;
-    new_score_rec.status = status;
+    ss = &scoreboard_image->servers[child_num];
+    old_status = ss->status;
+    ss->x.pid = my_pid;
+    ss->status = status;
+    ++ss->cur_vtime;
 
 #if defined(STATUS)
-    new_score_rec.last_used=time(NULL);
+#ifdef OPTIMIZE_TIMEOUTS
+    ss->last_used = ss->last_rtime;	/* close enough */
+#else
+    ss->last_used=time(NULL);
+#endif
     if (status == SERVER_READY || status == SERVER_DEAD) {
 	/*
 	 * Reset individual counters
 	 */
 	if (status == SERVER_DEAD) {
-	    new_score_rec.my_access_count = 0L;
-	    new_score_rec.my_bytes_served = 0L;
+	    ss->my_access_count = 0L;
+	    ss->my_bytes_served = 0L;
 	}
-	new_score_rec.conn_count = (unsigned short)0;
-	new_score_rec.conn_bytes = (unsigned long)0;
+	ss->conn_count = (unsigned short)0;
+	ss->conn_bytes = (unsigned long)0;
     }
     if (r) {
 	int slot_size;
 	conn_rec *c = r->connection;
-	slot_size = sizeof(new_score_rec.client) - 1;
-	strncpy(new_score_rec.client, get_remote_host(c, r->per_dir_config,
+	slot_size = sizeof(ss->client) - 1;
+	strncpy(ss->client, get_remote_host(c, r->per_dir_config,
 	 REMOTE_NOLOOKUP), slot_size);
-	new_score_rec.client[slot_size] = '\0';
-	slot_size = sizeof(new_score_rec.request) - 1;
-	strncpy(new_score_rec.request, (r->the_request ? r->the_request :
+	ss->client[slot_size] = '\0';
+	slot_size = sizeof(ss->request) - 1;
+	strncpy(ss->request, (r->the_request ? r->the_request :
 	 "NULL"), slot_size);
-	new_score_rec.request[slot_size] = '\0';
-	slot_size = sizeof(new_score_rec.vhost) - 1;
-	strncpy(new_score_rec.vhost,r->server->server_hostname, slot_size);
-	new_score_rec.vhost[slot_size] = '\0';
+	ss->request[slot_size] = '\0';
+	slot_size = sizeof(ss->vhost) - 1;
+	strncpy(ss->vhost,r->server->server_hostname, slot_size);
+	ss->vhost[slot_size] = '\0';
     }
 #endif
 
-    put_scoreboard_info(child_num, &new_score_rec);
+    put_scoreboard_info(child_num, ss);
 
     return old_status;
 }
@@ -1240,7 +1252,7 @@
 #if defined(STATUS)
 void time_process_request (int child_num, int status)
 {
-    short_score new_score_rec;
+    short_score *ss;
 #if defined(NO_GETTIMEOFDAY)
     struct tms tms_blk;
 #endif
@@ -1249,56 +1261,55 @@
 	return ;
     
     sync_scoreboard_image();
-    new_score_rec = scoreboard_image->servers[child_num];
+    ss = &scoreboard_image->servers[child_num];
 
     if (status == START_PREQUEST) {
 #if defined(NO_GETTIMEOFDAY)
-	if ((new_score_rec.start_time = times(&tms_blk)) == -1)
-	    new_score_rec.start_time = (clock_t)0;
+	if ((ss->start_time = times(&tms_blk)) == -1)
+	    ss->start_time = (clock_t)0;
 #else
-	if (gettimeofday(&new_score_rec.start_time, (struct timezone *)0) < 0)
-	    new_score_rec.start_time.tv_sec =
-	    new_score_rec.start_time.tv_usec = 0L;
+	if (gettimeofday(&ss->start_time, (struct timezone *)0) < 0)
+	    ss->start_time.tv_sec =
+	    ss->start_time.tv_usec = 0L;
 #endif
     }
     else if (status == STOP_PREQUEST) {
 #if defined(NO_GETTIMEOFDAY)
-	if ((new_score_rec.stop_time = times(&tms_blk)) == -1)
-	    new_score_rec.stop_time = new_score_rec.start_time = (clock_t)0;
+	if ((ss->stop_time = times(&tms_blk)) == -1)
+	    ss->stop_time = ss->start_time = (clock_t)0;
 #else
-	if (gettimeofday(&new_score_rec.stop_time, (struct timezone *)0) < 0)
-	    new_score_rec.stop_time.tv_sec =
-	    new_score_rec.stop_time.tv_usec =
-	    new_score_rec.start_time.tv_sec =
-	    new_score_rec.start_time.tv_usec = 0L;
+	if (gettimeofday(&ss->stop_time, (struct timezone *)0) < 0)
+	    ss->stop_time.tv_sec =
+	    ss->stop_time.tv_usec =
+	    ss->start_time.tv_sec =
+	    ss->start_time.tv_usec = 0L;
 #endif
 
     }
 
-    put_scoreboard_info(child_num, &new_score_rec);
-
+    put_scoreboard_info(child_num, ss);
 }
 
 static void increment_counts (int child_num, request_rec *r)
 {
     long int bs=0;
-    short_score new_score_rec;
+    short_score *ss;
 
     sync_scoreboard_image();
-    new_score_rec = scoreboard_image->servers[child_num];
+    ss = &scoreboard_image->servers[child_num];
 
     if (r->sent_bodyct)
         bgetopt(r->connection->client, BO_BYTECT, &bs);
 
-    times(&new_score_rec.times);
-    new_score_rec.access_count ++;
-    new_score_rec.my_access_count ++;
-    new_score_rec.conn_count ++;
-    new_score_rec.bytes_served += (unsigned long)bs;
-    new_score_rec.my_bytes_served += (unsigned long)bs;
-    new_score_rec.conn_bytes += (unsigned long)bs;
+    times(&ss->times);
+    ss->access_count ++;
+    ss->my_access_count ++;
+    ss->conn_count ++;
+    ss->bytes_served += (unsigned long)bs;
+    ss->my_bytes_served += (unsigned long)bs;
+    ss->conn_bytes += (unsigned long)bs;
 
-    put_scoreboard_info(child_num, &new_score_rec); 
+    put_scoreboard_info(child_num, ss); 
 }
 #endif
 
@@ -2291,7 +2302,6 @@
 static int csd;
 static int dupped_csd;
 static int requests_this_child;
-static int child_num;
 static fd_set main_fds;
 
 void child_main(int child_num_arg)
@@ -2304,7 +2314,7 @@
     my_pid = getpid();
     csd = -1;
     dupped_csd = -1;
-    child_num = child_num_arg;
+    my_child_num = child_num_arg;
     requests_this_child = 0;
 
     /* needs to be done before we switch UIDs so we have permissions */
@@ -2330,7 +2340,7 @@
 
     child_init_modules(pconf, server_conf);
 
-    (void)update_child_status(child_num, SERVER_READY, (request_rec*)NULL);
+    (void)update_child_status(my_child_num, SERVER_READY, (request_rec*)NULL);
 
     /*
      * Setup the jump buffers so that we can return here after
@@ -2358,7 +2368,7 @@
          * (Re)initialize this child to a pre-connection state.
          */
 
-        alarm(0);		/* Cancel any outstanding alarms. */
+	kill_timeout(0);	/* Cancel any outstanding alarms. */
         timeout_req = NULL;	/* No request in progress */
 	current_conn = NULL;
     
@@ -2374,7 +2384,7 @@
 	    child_exit_modules(pconf, server_conf);
 	}
 
-	(void)update_child_status(child_num, SERVER_READY, (request_rec*)NULL);
+	(void)update_child_status(my_child_num, SERVER_READY, (request_rec*)NULL);
 
         /*
          * Wait for an acceptable connection to arrive.
@@ -2467,7 +2477,7 @@
 
 	sock_disable_nagle(csd);
 
-	(void)update_child_status(child_num, SERVER_BUSY_READ,
+	(void)update_child_status(my_child_num, SERVER_BUSY_READ,
 	                          (request_rec*)NULL);
 
 	conn_io = bcreate(ptrans, B_RDWR | B_SOCKET);
@@ -2495,7 +2505,7 @@
 	current_conn = new_connection (ptrans, server_conf, conn_io,
 				       (struct sockaddr_in *)&sa_client,
 				       (struct sockaddr_in *)&sa_server,
-				       child_num);
+				       my_child_num);
 
         /*
          * Read and process each request found on our connection
@@ -2508,19 +2518,19 @@
 	     * signal (SIGUSR1, SIG_IGN);
 	     */
 
-            (void)update_child_status(child_num, SERVER_BUSY_WRITE, r);
+            (void)update_child_status(my_child_num, SERVER_BUSY_WRITE, r);
 
             process_request(r);
 
 #if defined(STATUS)
-            increment_counts(child_num, r);
+            increment_counts(my_child_num, r);
 #endif
 
             if (!current_conn->keepalive || current_conn->aborted) 
                 break;
 
             destroy_pool(r->pool);
-            (void)update_child_status(child_num, SERVER_BUSY_KEEPALIVE,
+            (void)update_child_status(my_child_num, SERVER_BUSY_KEEPALIVE,
                                       (request_rec*)NULL);
 
             sync_scoreboard_image();
@@ -2667,6 +2677,10 @@
     int free_head;
     int *free_ptr;
     int free_length;
+    short_score *ss;
+#ifdef OPTIMIZE_TIMEOUTS
+    time_t now = time(0);
+#endif
 
     /* initialize the free_list */
     free_head = -1;
@@ -2678,7 +2692,8 @@
 
     sync_scoreboard_image ();
     for (i = 0; i < daemons_limit; ++i) {
-	switch (scoreboard_image->servers[i].status) {
+	ss = &scoreboard_image->servers[i];
+	switch (ss->status) {
 	case SERVER_READY:
 	    ++idle_count;
 	    /* always kill the highest numbered child if we have to...
@@ -2698,6 +2713,21 @@
 	    }
 	    break;
 	}
+#ifdef OPTIMIZE_TIMEOUTS
+	if (ss->status != SERVER_DEAD && ss->timeout_len) {
+	    /* if it's a live server, with a live timeout then start checking
+	     * its timeout */
+	    if (ss->cur_vtime != ss->last_vtime) {
+		/* it has made progress, so update its last_rtime, last_vtime */
+		ss->last_rtime = now;
+		ss->last_vtime = ss->cur_vtime;
+	    } else if (ss->last_rtime + ss->timeout_len < now) {
+		/* no progress, and the timeout length has been exceeded */
+		ss->timeout_len = 0;
+		kill (ss->x.pid, SIGALRM);
+	    }
+	}
+#endif
     }
     if (idle_count > daemons_max_free) {
 	/* kill off one child... we use SIGUSR1 because that'll cause it to
Index: core/httpd.h
===================================================================
RCS file: /export/home/cvs/apachen/src/core/httpd.h,v
retrieving revision 1.137
diff -u -r1.137 httpd.h
--- httpd.h	1997/08/05 06:02:43	1.137
+++ httpd.h	1997/08/14 09:31:24
@@ -873,3 +873,10 @@
  */
 API_EXPORT(void) log_assert(const char *szExp,const char *szFile,int nLine);
 #define ap_assert(exp) (void)( (exp) || (log_assert(#exp, __FILE__, __LINE__), 0) )
+
+/* The optimized timeout code only works if we're not MULTITHREAD and we're
+ * also not using a scoreboard file
+ */
+#if !defined (MULTITHREAD) && (defined (HAVE_MMAP) || defined (HAVE_SHMGET))
+#define OPTIMIZE_TIMEOUTS
+#endif
Index: core/scoreboard.h
===================================================================
RCS file: /export/home/cvs/apachen/src/core/scoreboard.h,v
retrieving revision 1.27
diff -u -r1.27 scoreboard.h
--- scoreboard.h	1997/07/24 04:32:30	1.27
+++ scoreboard.h	1997/08/14 09:31:24
@@ -75,12 +75,33 @@
 #define SERVER_BUSY_DNS 7       /* Looking up a hostname */
 #define SERVER_GRACEFUL 8	/* server is gracefully finishing request */
 
+/* A "virtual time" is simply a counter that indicates that a child is
+ * making progress.  The parent checks up on each child, and when they have
+ * made progress it resets the last_rtime element.  But when the child hasn't
+ * made progress in a time that's roughly timeout_len seconds long, it is
+ * sent a SIGALRM.
+ *
+ * vtime is an optimization that is used only when the scoreboard is in
+ * shared memory (it's not easy/feasible to do it in a scoreboard file).
+ * The essential observation is that timeouts rarely occur, the vast majority
+ * of hits finish before any timeout happens.  So it really sucks to have to
+ * ask the operating system to set up and destroy alarms many times during
+ * a request.
+ */
+typedef unsigned vtime_t;
+
 typedef struct {
     union {
 	pid_t pid;		/* if it's not DEAD then this is the pid */
 	int free_list;		/* otherwise this is scratch space */
     } x;
-    int status;
+#ifdef OPTIMIZE_TIMEOUTS
+    vtime_t last_vtime;		/* the last vtime the parent has seen */
+    time_t last_rtime;		/* time(0) of the last change */
+    vtime_t cur_vtime;		/* the child's current vtime */
+    unsigned short timeout_len;	/* length of the timeout */
+#endif
+    signed char status;
 #if defined(STATUS)
     unsigned long access_count;
     unsigned long bytes_served;


Mime
View raw message