httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Dean Gaudet <dgau...@arctic.org>
Subject graceful restarts, take 5
Date Tue, 22 Apr 1997 06:59:10 GMT
I'm really happy with this one. 

The restart handler (I merged the two) no longer does a longjmp.  I was
running into so many race conditions and problems dealing with reliable
versus unreliable signals, and crap like that I decided to just make the
handler set the volatile globals is_graceful, and restart_pending. 

I rearranged the main loop in standalone_main to check restart_pending at
the appropriate spot.  I systematically checked all the system calls it
uses (well I didn't dig through the config reading stuff, I *think* we're
ok there, but some modules might be screwed) to make sure they deal with
EINTR when they're supposed to.

I cleaned up a race condition in make_child related to writing the
child pid into the scoreboard.  Note that using restart_pending eliminates
the race condition that Marc discovered yesterday (child becoming parent). 

child_main now tries to serve at least one request on every connection
although there is a small race condition we can't eliminate.  It will let
itself die during keepalive even after possibly reading a request line,
but it tries to minimize that as well.  (I had to hack into
read_request_line to do that, ew.)  I think browsers supporting keepalive
will be far more happy to restart a request that they tried to issue on a
"keptalive" connection.  After all they don't know if the server timed out
on them or not.

My test case was to start viewing a 40 Mb .mov, and surf against the
server.  While doing that it was running "while 1 kill -USR1" achieving 25
or 30 restarts per second (this is a dual pentium 133 linux box).  I got
no broken images.  My mov happily kept playing.  I could surf /status and
see the G for the .mov and all the children being killed and regenerated.
I was amazed. 

The short list of features in this patch:

- working graceful restart ;)
- maintain scoreboard across graceful restart so that the various #child
  tuning parameters are strictly enforced
- elimination of many signal race conditions
- plugged memory leak in mod_browser, mod_rewrite, and mod_include
- memory allocator tuning to avoid over-allocating blocks
- some cleanup in http_main.c

caveats:

- it has only been tested on Linux (2.0.30) with LINUX_TWEAK
- systems with file-only scoreboards still have race conditions, and
  probably don't work
- SIGHUP hasn't been tested at all
- it requires the latest CVS image including Roy's timeout patch.  You'll
  probably have to unapply take 4, apply Roy's patch, and then apply this.
- see the XXX: comments in the patch

Dean

Index: alloc.c
===================================================================
RCS file: /export/home/cvs/apache/src/alloc.c,v
retrieving revision 1.26
diff -c -3 -r1.26 alloc.c
*** alloc.c	1997/04/07 10:58:38	1.26
--- alloc.c	1997/04/22 06:44:05
***************
*** 185,194 ****
     * on the free list...
     */
  
-   min_size += BLOCK_MINFREE;
- 
    while (blok != NULL) {
!     if (min_size <= blok->h.endp - blok->h.first_avail) {
        *lastptr = blok->h.next;
        blok->h.next = NULL;
        return blok;
--- 185,192 ----
     * on the free list...
     */
  
    while (blok != NULL) {
!     if (min_size + BLOCK_MINFREE <= blok->h.endp - blok->h.first_avail) {
        *lastptr = blok->h.next;
        blok->h.next = NULL;
        return blok;
***************
*** 201,206 ****
--- 199,210 ----
  
    /* Nope. */
  
+   if (min_size + BLOCK_MINFREE < BLOCK_MINALLOC) {
+     min_size = BLOCK_MINALLOC;
+   } else {
+     min_size += BLOCK_MINALLOC;
+   }
+ 
    return malloc_block (min_size);
  }
  
***************
*** 896,901 ****
--- 900,914 ----
      register_cleanup (p, (void *)preg, regex_cleanup, regex_cleanup);
  
      return preg;
+ }
+ 
+ 
+ void pregfree(pool *p, regex_t *reg)
+ {
+     block_alarms();
+     regfree (reg);
+     kill_cleanup (p, (void *)reg, regex_cleanup);
+     unblock_alarms();
  }
  
  /*****************************************************************
Index: alloc.h
===================================================================
RCS file: /export/home/cvs/apache/src/alloc.h,v
retrieving revision 1.18
diff -c -3 -r1.18 alloc.h
*** alloc.h	1997/04/07 10:58:38	1.18
--- alloc.h	1997/04/22 06:44:05
***************
*** 210,216 ****
  void note_cleanups_for_fd (pool *, int);
  void kill_cleanups_for_fd (pool *p, int fd);
  
! regex_t *pregcomp(pool *p, const char *pattern, int cflags);
  
  /* routines to note closes... file descriptors are constrained enough
   * on some systems that we want to support this.
--- 210,217 ----
  void note_cleanups_for_fd (pool *, int);
  void kill_cleanups_for_fd (pool *p, int fd);
  
! regex_t *pregcomp (pool *p, const char *pattern, int cflags);
! void pregfree (pool *p, regex_t *reg);
  
  /* routines to note closes... file descriptors are constrained enough
   * on some systems that we want to support this.
***************
*** 236,244 ****
                   FILE **pipe_err);
  #define spawn_child(p,f,v,k,in,out) spawn_child_err(p,f,v,k,in,out,NULL)
  
! /* magic numbers --- only one so far, min free bytes in a new pool block */
  
! #define BLOCK_MINFREE 8192     
  
  /* Finally, some accounting */
  
--- 237,247 ----
                   FILE **pipe_err);
  #define spawn_child(p,f,v,k,in,out) spawn_child_err(p,f,v,k,in,out,NULL)
  
! /* magic numbers --- min free bytes to consider a free pool block useable,
!  * and the min amount to allocate if we have to go to malloc() */
  
! #define BLOCK_MINFREE 4096
! #define BLOCK_MINALLOC 8192
  
  /* Finally, some accounting */
  
Index: http_main.c
===================================================================
RCS file: /export/home/cvs/apache/src/http_main.c,v
retrieving revision 1.138
diff -c -3 -r1.138 http_main.c
*** http_main.c	1997/04/21 20:29:07	1.138
--- http_main.c	1997/04/22 06:44:06
***************
*** 1,3 ****
--- 1,8 ----
+ /* XXX: systems without HAVE_SHMGET or HAVE_MMAP do not reliably update
+  * the scoreboard because a received signal might interrupt the scoreboard
+  * calls.
+  */
+ 
  /* ====================================================================
   * Copyright (c) 1995-1997 The Apache Group.  All rights reserved.
   *
***************
*** 157,163 ****
  
  server_rec *server_conf;
  JMP_BUF jmpbuffer;
- JMP_BUF restart_buffer;
  int sd;
  static fd_set listenfds;
  static int listenmaxfd;
--- 162,167 ----
***************
*** 176,181 ****
--- 180,201 ----
  
  int one_process = 0;
  
+ /* small utility macros to make things easier to read */
+ 
+ #ifdef NO_KILLPG
+ #define ap_killpg(x, y)		(kill (-(x), (y)))
+ #else
+ #define ap_killpg(x, y)		(killpg ((x), (y)))
+ #endif
+ 
+ #if defined(USE_LONGJMP)
+ #define ap_longjmp(x, y)	(longjmp ((x), (y)))
+ #define ap_setjmp(x)		(setjmp (x))
+ #else
+ #define ap_longjmp(x, y)	(siglongjmp ((x), (y)))
+ #define ap_setjmp(x)		(sigsetjmp ((x), 1))
+ #endif
+ 
  #if defined(USE_FCNTL_SERIALIZED_ACCEPT)
  static struct flock lock_it = { F_WRLCK, 0, 0, 0 };
  static struct flock unlock_it = { F_UNLCK, 0, 0, 0 };
***************
*** 359,369 ****
      }
      
      if (!current_conn) {
! #if defined(USE_LONGJMP)
! 	longjmp(jmpbuffer,1);
! #else
! 	siglongjmp(jmpbuffer,1);
! #endif
      }
      
      if (timeout_req != NULL) dirconf = timeout_req->per_dir_config;
--- 379,385 ----
      }
      
      if (!current_conn) {
! 	ap_longjmp (jmpbuffer, 1);
      }
      
      if (timeout_req != NULL) dirconf = timeout_req->per_dir_config;
***************
*** 401,411 ****
  	bclose(timeout_req->connection->client);
      
  	if (!standalone) exit(0);
! #if defined(USE_LONGJMP)
! 	longjmp(jmpbuffer,1);
! #else
! 	siglongjmp(jmpbuffer,1);
! #endif
      }
      else {   /* abort the connection */
          bsetflag(current_conn->client, B_EOUT, 1);
--- 417,424 ----
  	bclose(timeout_req->connection->client);
      
  	if (!standalone) exit(0);
! 
! 	ap_longjmp (jmpbuffer, 1);
      }
      else {   /* abort the connection */
          bsetflag(current_conn->client, B_EOUT, 1);
***************
*** 538,548 ****
      }
      
      if (!current_conn) {
! #if defined(USE_LONGJMP)
! 	longjmp(jmpbuffer,1);
! #else
! 	siglongjmp(jmpbuffer,1);
! #endif
      }
      bsetflag(current_conn->client, B_EOUT, 1);
      current_conn->aborted = 1;
--- 551,557 ----
      }
      
      if (!current_conn) {
! 	ap_longjmp (jmpbuffer, 1);
      }
      bsetflag(current_conn->client, B_EOUT, 1);
      current_conn->aborted = 1;
***************
*** 1172,1266 ****
  in wait_or_timeout(). But this routine is still useful for systems with no
  waitpid().
  */
! int reap_children()
!     {
!     int status,n;
!     int ret=0;
  
!     for(n=0 ; n < HARD_SERVER_LIMIT ; ++n)
! 	if(scoreboard_image->servers[n].status != SERVER_DEAD
! 	   && waitpid(scoreboard_image->servers[n].pid,&status,WNOHANG) == -1
! 	   && errno == ECHILD)
! 	    {
! 	    sync_scoreboard_image();
! 	    update_child_status(n,SERVER_DEAD,NULL);
! 	    ret=1;
! 	    }
!     return ret;
      }
  #endif
  
  /* Finally, this routine is used by the caretaker process to wait for
   * a while...
   */
  
! #if 1
! 
! static int wait_or_timeout(int *status)
!     {
  #ifndef NEED_WAITPID
      int ret;
  
!     ret=waitpid(-1,status,WNOHANG);
!     if(ret <= 0)
! 	{
! 	sleep(1);
  	return -1;
! 	}
      return ret;
  #else
!     if(!reap_children())
  	sleep(1);
      return -1;
  #endif
!     }
  
- #else
  
! static JMP_BUF wait_timeout_buf;
  
! static void longjmp_out_of_alarm (int sig) {
! #if defined(USE_LONGJMP)
!     longjmp (wait_timeout_buf, 1);
! #else
!     siglongjmp (wait_timeout_buf, 1);
! #endif
  }
  
! int wait_or_timeout (int *status)
  {
!     int wait_or_timeout_retval = -1;
! #ifdef BROKEN_WAIT
!     static int ntimes;
! #endif
  
! #if defined(USE_LONGJMP)
!     if (setjmp(wait_timeout_buf) != 0) {
! #else 
!     if (sigsetjmp(wait_timeout_buf, 1) != 0) {
! #endif
! 	errno = ETIMEDOUT;
! 	return wait_or_timeout_retval;
      }
! #ifdef BROKEN_WAIT
!     if(++ntimes == 60)
! 	{
! 	reap_children();
! 	ntimes=0;
! 	}
! #endif
!     signal (SIGALRM, longjmp_out_of_alarm);
!     alarm(1);
! #if defined(NEXT)
!     wait_or_timeout_retval = wait((union wait *)status);
  #else
!     wait_or_timeout_retval = wait(status);
  #endif
-     alarm(0);
-     return wait_or_timeout_retval;
  }
  
- #endif
  
  /*****************************************************************
   * Here follows a long bunch of generic server bookkeeping stuff...
--- 1181,1338 ----
  in wait_or_timeout(). But this routine is still useful for systems with no
  waitpid().
  */
! int reap_children ()
! {
!     int status, n;
!     int ret = 0;
  
!     for (n = 0; n < HARD_SERVER_LIMIT; ++n) {
! 	if (scoreboard_image->servers[n].status != SERVER_DEAD
! 		&& waitpid (scoreboard_image->servers[n].pid, &status, WNOHANG)
! 		    == -1
! 		&& errno == ECHILD) {
! 	    sync_scoreboard_image ();
! 	    update_child_status (n, SERVER_DEAD, NULL);
! 	    ret = 1;
! 	}
      }
+     return ret;
+ }
  #endif
  
  /* Finally, this routine is used by the caretaker process to wait for
   * a while...
   */
  
! static int wait_or_timeout ()
! {
  #ifndef NEED_WAITPID
      int ret;
  
!     ret = waitpid (-1, NULL, WNOHANG);
!     if (ret == -1 && errno == EINTR) {
  	return -1;
!     }
!     if (ret <= 0) {
! 	sleep (1);
! 	return -1;
!     }
      return ret;
  #else
!     if (!reap_children ()) {
  	sleep(1);
+     }
      return -1;
  #endif
! }
  
  
! void sig_term() {
!     log_error("httpd: caught SIGTERM, shutting down", server_conf);
!     cleanup_scoreboard();
!     ap_killpg (pgrp, SIGKILL);
!     close(sd);
!     exit(1);
! }
  
! void bus_error(void) {
!     char emsg[256];
! 
!     ap_snprintf
! 	(
! 	    emsg,
! 	    sizeof(emsg) - 1,
! 	    "httpd: caught SIGBUS, attempting to dump core in %s",
! 	    server_root
! 	);
!     log_error(emsg, server_conf);
!     chdir(server_root);
!     abort();         
!     exit(1);
! }
! 
! void seg_fault() {
!     char emsg[256];
! 
!     ap_snprintf
! 	(
! 	    emsg,
! 	    sizeof(emsg) - 1,
! 	    "httpd: caught SIGSEGV, attempting to dump core in %s",
! 	    server_root
! 	);
!     log_error(emsg, server_conf);
!     chdir(server_root);
!     abort();
!     exit(1);
  }
  
! void just_die()			/* SIGHUP to child process??? */
  {
!     exit (0);
! }
  
! /* volatile just in case */
! static volatile int restart_pending;
! static volatile int is_graceful;
! static volatile int generation;
! 
! static void restart (int sig)
! {
!     is_graceful = (sig == SIGUSR1);
!     restart_pending = 1;
! }
! 
! 
! void set_signals()
! {
! #ifndef NO_USE_SIGACTION
!     struct sigaction sa;
! 
!     sigemptyset(&sa.sa_mask);
!     sa.sa_flags = 0;
! 
!     if (!one_process) {
! 	sa.sa_handler = (void (*)())seg_fault;
! 	if (sigaction (SIGSEGV, &sa, NULL) < 0)
! 	    log_unixerr ("sigaction(SIGSEGV)", NULL, NULL, server_conf);
! 	sa.sa_handler = (void (*)())bus_error;
! 	if (sigaction (SIGBUS, &sa, NULL) < 0)
! 	    log_unixerr ("sigaction(SIGBUS)", NULL, NULL, server_conf);
      }
!     sa.sa_handler = (void (*)())sig_term;
!     if (sigaction (SIGTERM, &sa, NULL) < 0)
! 	log_unixerr ("sigaction(SIGTERM)", NULL, NULL, server_conf);
! 
!     /* wait_or_timeout uses sleep() which could deliver a SIGALRM just as we're
!      * trying to process the restart requests.  That's not good.  restart
!      * cleans out the SIGALRM handler, but this totally avoids the race
!      * condition between when the restart request is made and when the handler
!      * is invoked.
!      *
!      * We also don't want to ignore HUPs and USR1 while we're busy processing
!      * one.
!      */
!     sigaddset (&sa.sa_mask, SIGALRM);
!     sigaddset (&sa.sa_mask, SIGHUP);
!     sigaddset (&sa.sa_mask, SIGUSR1);
!     sa.sa_handler = (void (*)())restart;
!     if (sigaction (SIGHUP, &sa, NULL) < 0)
! 	log_unixerr ("sigaction(SIGHUP)", NULL, NULL, server_conf);
!     if (sigaction (SIGUSR1, &sa, NULL) < 0)
! 	log_unixerr ("sigaction(SIGUSR1)", NULL, NULL, server_conf);
  #else
!     if(!one_process) {
! 	signal (SIGSEGV, (void (*)())seg_fault);
!     	signal (SIGBUS, (void (*)())bus_error);
!     }
! 
!     signal (SIGTERM, (void (*)())sig_term);
!     signal (SIGHUP, (void (*)())restart);
!     signal (SIGUSR1, (void (*)())restart);
  #endif
  }
  
  
  /*****************************************************************
   * Here follows a long bunch of generic server bookkeeping stuff...
***************
*** 1310,1364 ****
  #endif
  }
  
- void sig_term() {
-     log_error("httpd: caught SIGTERM, shutting down", server_conf);
-     cleanup_scoreboard();
- #ifndef NO_KILLPG
-     killpg(pgrp,SIGKILL);
- #else
-     kill(-pgrp,SIGKILL);
- #endif
-     close(sd);
-     exit(1);
- }
- 
- void bus_error(void) {
-     char emsg[256];
- 
-     ap_snprintf
- 	(
- 	    emsg,
- 	    sizeof(emsg) - 1,
- 	    "httpd: caught SIGBUS, attempting to dump core in %s",
- 	    server_root
- 	);
-     log_error(emsg, server_conf);
-     chdir(server_root);
-     abort();         
-     exit(1);
- }
- 
- void seg_fault() {
-     char emsg[256];
- 
-     ap_snprintf
- 	(
- 	    emsg,
- 	    sizeof(emsg) - 1,
- 	    "httpd: caught SIGSEGV, attempting to dump core in %s",
- 	    server_root
- 	);
-     log_error(emsg, server_conf);
-     chdir(server_root);
-     abort();
-     exit(1);
- }
- 
- void just_die()			/* SIGHUP to child process??? */
- {
-     exit (0);
- }
- 
  /* Reset group privileges, after rereading the config files
   * (our uid may have changed, and if so, we want the new perms).
   *
--- 1382,1387 ----
***************
*** 1429,1499 ****
      return (suexec_enabled);
  }
  
- static int is_graceful;
- static int generation;
- 
- void restart() {
-     signal (SIGALRM, SIG_IGN);
-     alarm (0);
-     is_graceful=0;
- #if defined(USE_LONGJMP)
-     longjmp(restart_buffer,1);
- #else
-     siglongjmp(restart_buffer,1);
- #endif
- }
- 
- void graceful_restart()
-     {
-     scoreboard_image->global.exit_generation=generation;
-     is_graceful=1;
-     update_scoreboard_global();
- #if defined(USE_LONGJMP)
-     longjmp(restart_buffer,1);
- #else
-     siglongjmp(restart_buffer,1);
- #endif
-     }
- 
- void set_signals()
- {
- #ifndef NO_USE_SIGACTION
-     struct sigaction sa;
- 
-     sigemptyset(&sa.sa_mask);
-     sa.sa_flags = 0;
- 
-     if (!one_process) {
- 	sa.sa_handler = (void (*)())seg_fault;
- 	if (sigaction(SIGSEGV, &sa, NULL) < 0)
- 	    log_unixerr("sigaction(SIGSEGV)", NULL, NULL, server_conf);
- 	sa.sa_handler = (void (*)())bus_error;
- 	if (sigaction(SIGBUS, &sa, NULL) < 0)
- 	    log_unixerr("sigaction(SIGBUS)", NULL, NULL, server_conf);
-     }
-     /* USE WITH EXTREME CAUTION. Graceful restarts are known to break */
-     /*  problems will be dealt with in a future release */
-     sa.sa_handler=(void (*)())sig_term;
-     if(sigaction(SIGTERM,&sa,NULL) < 0)
- 	log_unixerr("sigaction(SIGTERM)", NULL, NULL, server_conf);
-     sa.sa_handler=(void (*)())restart;
-     if(sigaction(SIGHUP,&sa,NULL) < 0)
- 	log_unixerr("sigaction(SIGHUP)", NULL, NULL, server_conf);
-     sa.sa_handler=(void (*)())graceful_restart;
-     if(sigaction(SIGUSR1,&sa,NULL) < 0)
- 	log_unixerr("sigaction(SIGUSR1)", NULL, NULL, server_conf);
- #else
-     if(!one_process) {
- 	signal(SIGSEGV,(void (*)())seg_fault);
-     	signal(SIGBUS,(void (*)())bus_error);
-     }
- 
-     signal(SIGTERM,(void (*)())sig_term);
-     signal(SIGHUP,(void (*)())restart);
-     signal(SIGUSR1,(void (*)())graceful_restart);
- #endif
- }
- 
  /*****************************************************************
   * Connection structures and accounting...
   * Should these be global?  Only to this file, at least...
--- 1452,1457 ----
***************
*** 1727,1737 ****
       * Setup the jump buffers so that we can return here after
       * a signal or a timeout (yeah, I know, same thing).
       */
! #if defined(USE_LONGJMP)
!     setjmp(jmpbuffer);
! #else
!     sigsetjmp(jmpbuffer,1);
! #endif
  #ifndef __EMX__
      signal(SIGURG, timeout);
  #endif    
--- 1685,1691 ----
       * Setup the jump buffers so that we can return here after
       * a signal or a timeout (yeah, I know, same thing).
       */
!     ap_setjmp (jmpbuffer);
  #ifndef __EMX__
      signal(SIGURG, timeout);
  #endif    
***************
*** 1741,1746 ****
--- 1695,1706 ----
  	BUFF *conn_io;
  	request_rec *r;
        
+ 	/* Prepare to receive a SIGUSR1 due to graceful restart so that
+ 	 * we can exit cleanly.  Since we're between connections right
+ 	 * now it's the right time to exit, but we might be blocked in a
+ 	 * system call when the graceful restart request is made. */
+ 	signal (SIGUSR1, (void (*)())just_die);
+ 
          /*
           * (Re)initialize this child to a pre-connection state.
           */
***************
*** 1826,1831 ****
--- 1786,1796 ----
  
          accept_mutex_off(); /* unlock after "accept" */
  
+ 	/* We've got a socket, let's at least process one request off the
+ 	 * socket before we accept a graceful restart request.
+ 	 */
+ 	signal (SIGUSR1, SIG_IGN);
+ 
  	note_cleanups_for_fd(ptrans,csd);
  
          /*
***************
*** 1867,1872 ****
--- 1832,1843 ----
  
          for (;;) {
              r = read_request(current_conn);
+ 
+ 	    /* ok we've read the request... it's a little too late
+ 	     * to do a graceful restart, so ignore them for now.
+ 	     */
+ 	    signal (SIGUSR1, SIG_IGN);
+ 
              (void)update_child_status(child_num, SERVER_BUSY_WRITE, r);
  
              if (r) process_request(r); /* else premature EOF --- ignore */
***************
*** 1885,1890 ****
--- 1856,1869 ----
                  bclose(conn_io);
                  exit(0);
              }
+ 
+ 	    /* In case we get a graceful restart while we're blocked
+ 	     * waiting for the request.
+ 	     * XXX: This isn't perfect, we might actually read the
+ 	     * request and then just die without saying anything to
+ 	     * the client.
+ 	     */
+ 	    signal (SIGUSR1, (void (*)())just_die);
          }
  
          /*
***************
*** 1921,1929 ****
--- 1900,1916 ----
  	child_main (child_num);
      }
  
+     Explain1 ("Starting new child in slot %d", child_num);
+     (void)update_child_status (child_num, SERVER_STARTING, (request_rec *)NULL);
+ 
      if ((pid = fork()) == -1) {
  	log_unixerr("fork", NULL, "Unable to fork new process", server_conf);
  
+ 	/* fork didn't succeed. Fix the scoreboard or else
+ 	 * it will say SERVER_STARTING forever and ever
+ 	 */
+ 	(void)update_child_status (child_num, SERVER_DEAD, (request_rec*)NULL);
+ 
  	/* In case system resources are maxxed out, we don't want
             Apache running away with the CPU trying to fork over and
             over and over again. */
***************
*** 1933,1942 ****
--- 1920,1944 ----
      } 
      
      if (!pid) {
+ 	/* Disable the restart signal handlers and enable the just_die stuff.
+ 	 * Note that since restart() just notes that a restart has been
+ 	 * requested there's no race condition here.
+ 	 */
  	signal (SIGHUP, (void (*)())just_die);
+ 	signal (SIGUSR1, (void (*)())just_die);
  	signal (SIGTERM, (void (*)())just_die);
  	child_main (child_num);
      }
+ 
+     /* If the parent proceeds with a restart before the child has written
+      * their pid into the scoreboard we'll end up "forgetting" about the
+      * child.  So we write the child pid into the scoreboard now.  (This
+      * is safe, because the child is going to be writing the same value
+      * to the same word.)
+      * XXX: this needs to be sync'd to disk in the non shared memory stuff
+      */
+     scoreboard_image->servers[child_num].pid = pid;
+ 
      return 0;
  }
  
***************
*** 2076,2225 ****
   * Executive routines.
   */
  
- static int num_children = 0;
- 
  void standalone_main(int argc, char **argv)
  {
      struct sockaddr_in sa_server;
      int saved_sd;
  
      standalone = 1;
      sd = listenmaxfd = -1;
-     
-     if (!one_process) detach(); 
-     
- #if defined(USE_LONGJMP)
-     setjmp(restart_buffer);
- #else
-     sigsetjmp(restart_buffer,1);
- #endif
  
      ++generation;
  
!     signal (SIGHUP, SIG_IGN);	/* Until we're done (re)reading config */
!     
!     if(!one_process && !is_graceful)
!     {
! #ifndef NO_KILLPG
!       if (killpg(pgrp,SIGHUP) < 0)    /* Kill 'em off */
! #else
!       if (kill(-pgrp,SIGHUP) < 0)
! #endif
!         log_unixerr ("killpg SIGHUP", NULL, NULL, server_conf);
!     }
!     
!     if(is_graceful)
! 	{
! 	/* USE WITH EXTREME CAUTION. Graceful restarts are known to break */
! 	/*  problems will be dealt with in a future release */
! 	log_error("SIGUSR1 received.  Doing graceful restart",server_conf);
! 	kill_cleanups_for_fd(pconf,sd);
! 	}
!     else if (sd != -1 || listenmaxfd != -1) {
! 	reclaim_child_processes(); /* Not when just starting up */
! 	log_error ("SIGHUP received.  Attempting to restart", server_conf);
!     }
!     
!     copy_listeners(pconf);
!     saved_sd=sd;
!     restart_time = time(NULL);
!     clear_pool (pconf);
!     ptrans = make_sub_pool (pconf);
!     
!     server_conf = read_config(pconf, ptrans, server_confname); 
!     open_logs(server_conf, pconf);
!     set_group_privs();
!     accept_mutex_init(pconf);
!     reinit_scoreboard(pconf);
!     
!     default_server_hostnames (server_conf);
! 
!     if (listeners == NULL) {
!         if(!is_graceful) {
! 	    memset((char *) &sa_server, 0, sizeof(sa_server));
! 	    sa_server.sin_family=AF_INET;
! 	    sa_server.sin_addr=bind_address;
! 	    sa_server.sin_port=htons(server_conf->port);
  
! 	    sd = make_sock(pconf, &sa_server);
  	}
  	else {
! 	    sd = saved_sd;
! 	    note_cleanups_for_fd(pconf, sd);
  	}
-     }
-     else {
- 	listen_rec *lr;
- 	int fd;
  
! 	listenmaxfd = -1;
! 	FD_ZERO(&listenfds);
! 	for (lr=listeners; lr != NULL; lr=lr->next)
! 	{
! 	    fd=find_listener(lr);
! 	    if(fd < 0)
! 		fd = make_sock(pconf, &lr->local_addr);
! 	    FD_SET(fd, &listenfds);
! 	    if (fd > listenmaxfd) listenmaxfd = fd;
! 	    lr->fd=fd;
  	}
- 	close_unused_listeners();
- 	sd = -1;
-     }
  
!     set_signals();
!     log_pid(pconf, pid_fname);
  
!     num_children = 0;
!     
!     if (daemons_max_free < daemons_min_free + 1) /* Don't thrash... */
! 	daemons_max_free = daemons_min_free + 1;
  
!     while (num_children < daemons_to_start && num_children < daemons_limit)
{
! 	make_child(server_conf, num_children++);
!     }
  
!     log_error ("Server configured -- resuming normal operations", server_conf);
!     
!     while (1) {
! 	int status, child_slot;
! 	int pid = wait_or_timeout(&status);
! 	
! 	if (pid >= 0) {
! 	    /* Child died... note that it's gone in the scoreboard. */
  	    sync_scoreboard_image();
! 	    child_slot = find_child_by_pid (pid);
! 	    Explain2("Reaping child %d slot %d",pid,child_slot);
! 	    if (child_slot >= 0)
! 		(void)update_child_status (child_slot, SERVER_DEAD,
! 		 (request_rec*)NULL);
!         }
! 
! 	sync_scoreboard_image();
! 	if ((count_idle_servers() < daemons_min_free)
! 	 && (child_slot = find_free_child_num()) >= 0
! 	 && child_slot < daemons_limit) {
! 	    Explain1("Starting new child in slot %d",child_slot);
! 	    (void)update_child_status(child_slot,SERVER_STARTING,
! 	     (request_rec*)NULL);
! 	    if (make_child(server_conf, child_slot) < 0) {
! 		/* fork didn't succeed. Fix the scoreboard or else
! 		   it will say SERVER_STARTING forever and ever */
! 	        (void)update_child_status(child_slot,SERVER_DEAD,
! 	             (request_rec*)NULL);
  	    }
! 
  	}
  
! 	/*
! 	if(scoreboard_image->global.please_exit && !count_live_servers())
! #if defined(USE_LONGJMP)
! 	    longjmp(restart_buffer,1);
! #else
! 	    siglongjmp(restart_buffer,1);
! #endif
! 	*/
!     }
  
  } /* standalone_main */
  
--- 2078,2286 ----
   * Executive routines.
   */
  
  void standalone_main(int argc, char **argv)
  {
      struct sockaddr_in sa_server;
      int saved_sd;
+     int remaining_children_to_start;
  
      standalone = 1;
      sd = listenmaxfd = -1;
  
+     is_graceful = 0;
      ++generation;
  
!     if (!one_process) detach (); 
  
!     do {
! 	copy_listeners(pconf);
! 	saved_sd = sd;
! 	if (!is_graceful) {
! 	    restart_time = time(NULL);
! 	}
! 	clear_pool (pconf);
! 	ptrans = make_sub_pool (pconf);
! 
! 	server_conf = read_config (pconf, ptrans, server_confname); 
! 	open_logs (server_conf, pconf);
! 	set_group_privs ();
! 	accept_mutex_init (pconf);
! 	if (!is_graceful) {
! 	    reinit_scoreboard(pconf);
! 	}
! 
! 	default_server_hostnames (server_conf);
! 
! 	if (listeners == NULL) {
! 	    if (!is_graceful) {
! 		memset ((char *)&sa_server, 0, sizeof (sa_server));
! 		sa_server.sin_family = AF_INET;
! 		sa_server.sin_addr = bind_address;
! 		sa_server.sin_port = htons (server_conf->port);
! 		sd = make_sock (pconf, &sa_server);
! 	    }
! 	    else {
! 		sd = saved_sd;
! 		note_cleanups_for_fd(pconf, sd);
! 	    }
  	}
  	else {
! 	    listen_rec *lr;
! 	    int fd;
! 
! 	    listenmaxfd = -1;
! 	    FD_ZERO (&listenfds);
! 	    for (lr = listeners; lr != NULL; lr = lr->next)
! 	    {
! 		fd = find_listener (lr);
! 		if (fd < 0) {
! 		    fd = make_sock (pconf, &lr->local_addr);
! 		}
! 		FD_SET (fd, &listenfds);
! 		if (fd > listenmaxfd) listenmaxfd = fd;
! 		lr->fd = fd;
! 	    }
! 	    close_unused_listeners ();
! 	    sd = -1;
  	}
  
! 	set_signals ();
! 	log_pid (pconf, pid_fname);
! 
! 	if (daemons_max_free < daemons_min_free + 1) /* Don't thrash... */
! 	    daemons_max_free = daemons_min_free + 1;
! 
! 	/* If we're doing a graceful_restart then we're going to see a lot
! 	 * of children exiting immediately when we get into the main loop
! 	 * below (because we just sent them SIGUSR1).  This happens pretty
! 	 * rapidly... and for each one that exits we'll start a new one until
! 	 * we reach at least daemons_min_free.  But we may be permitted to
! 	 * start more than that, so we'll just keep track of how many we're
! 	 * supposed to start up without the 1 second penalty between each fork.
! 	 */
! 	remaining_children_to_start = daemons_to_start;
! 	if( remaining_children_to_start > daemons_limit ) {
! 	    remaining_children_to_start = daemons_limit;
! 	}
! 	if (!is_graceful) {
! 	    while (remaining_children_to_start) {
! 		--remaining_children_to_start;
! 		make_child (server_conf, remaining_children_to_start);
! 	    }
  	}
  
! 	log_error ("Server configured -- resuming normal operations",
! 	    server_conf);
! 	restart_pending = 0;
! 
! 	while (!restart_pending) {
! 	    int child_slot;
! 	    int pid = wait_or_timeout ();
! 
! 	    /* XXX: if it takes longer than 1 second for all our children
! 	     * to start up and get into IDLE state then we may spawn an
! 	     * extra child
! 	     */
! 	    if (pid >= 0) {
! 		/* Child died... note that it's gone in the scoreboard. */
! 		sync_scoreboard_image ();
! 		child_slot = find_child_by_pid (pid);
! 		Explain2 ("Reaping child %d slot %d", pid, child_slot);
! 		if (child_slot >= 0) {
! 		    (void)update_child_status (child_slot, SERVER_DEAD,
! 			(request_rec *)NULL);
! 		} else if (is_graceful) {
! 		    /* Great, we've probably just lost a slot in the
! 		     * scoreboard.  Somehow we don't know about this
! 		     * child.
! 		     */
! 		    log_printf (server_conf,
! 			"long lost child came home! (pid %d)", pid );
! 		}
! 	    } else if (remaining_children_to_start) {
! 		/* we hit a 1 second timeout in which none of the previous
! 	 	 * generation of children needed to be reaped... so assume
! 		 * they're all done, and pick up the slack if any is left.
! 		 */
! 		while (remaining_children_to_start > 0) {
! 		    child_slot = find_free_child_num ();
! 		    if (child_slot < 0 || child_slot >= daemons_limit) {
! 			remaining_children_to_start = 0;
! 			break;
! 		    }
! 		    if (make_child (server_conf, child_slot) < 0) {
! 			remaining_children_to_start = 0;
! 			break;
! 		    }
! 		    --remaining_children_to_start;
! 		}
! 		/* In any event we really shouldn't do the code below because
! 		 * few of the servers we just started are in the IDLE state
! 		 * yet, so we'd mistakenly create an extra server.
! 		 */
! 		continue;
! 	    }
  
! 	    sync_scoreboard_image ();
! 	    if ((remaining_children_to_start
! 		    || (count_idle_servers () < daemons_min_free))
! 		&& (child_slot = find_free_child_num ()) >= 0
! 		&& child_slot < daemons_limit) {
! 		make_child (server_conf, child_slot);
! 	    }
! 	    if (remaining_children_to_start) {
! 		--remaining_children_to_start;
! 	    }
! 	}
  
! 	/* we've been told to restart */
  
! 	if (one_process) {
! 	    /* not worth thinking about */
! 	    exit (0);
! 	}
! 
! 	if (is_graceful) {
! 	    int i;
! 
! 	    /* USE WITH CAUTION:  Graceful restarts are not known to work
! 	    * in various configurations on the architectures we support. */
! 	    scoreboard_image->global.exit_generation = generation;
! 	    update_scoreboard_global ();
! 
! 	    log_error ("SIGUSR1 received.  Doing graceful restart",server_conf);
! 	    kill_cleanups_for_fd (pconf, sd);
! 	    /* kill off the idle ones */
! 	    if (ap_killpg(pgrp, SIGUSR1) < 0) {
! 		log_unixerr ("killpg SIGUSR1", NULL, NULL, server_conf);
! 	    }
! 	    /* This is mostly for debugging... so that we know what is still
! 	     * gracefully dealing with existing request.
! 	     * XXX: clean this up a bit?
! 	     */
  	    sync_scoreboard_image();
! 	    for (i = 0; i < daemons_limit; ++i ) {
! 		if (scoreboard_image->servers[i].status != SERVER_DEAD) {
! 		    scoreboard_image->servers[i].status = SERVER_GRACEFUL;
! 		}
  	    }
! #if !defined(HAVE_MMAP) && !defined(HAVE_SHMGET)
! 	    lseek (scoreboard_fd, 0L, 0);
! 	    force_write (scoreboard_fd, (char*)scoreboard_image,
! 			sizeof(*scoreboard_image));
! #endif
! 	}
! 	else {
! 	    /* Kill 'em off */
! 	    if (ap_killpg (pgrp, SIGHUP) < 0) {
! 		log_unixerr ("killpg SIGHUP", NULL, NULL, server_conf);
! 	    }
! 	    reclaim_child_processes(); /* Not when just starting up */
! 	    log_error ("SIGHUP received.  Attempting to restart", server_conf);
  	}
+ 	++generation;
  
!     } while (restart_pending);
  
  } /* standalone_main */
  
***************
*** 2288,2294 ****
  #endif
  
      setup_prelinked_modules();
!     
      suexec_enabled = init_suexec();
      server_conf = read_config (pconf, ptrans, server_confname);
      
--- 2349,2355 ----
  #endif
  
      setup_prelinked_modules();
! 
      suexec_enabled = init_suexec();
      server_conf = read_config (pconf, ptrans, server_confname);
      
Index: http_protocol.c
===================================================================
RCS file: /export/home/cvs/apache/src/http_protocol.c,v
retrieving revision 1.115
diff -c -3 -r1.115 http_protocol.c
*** http_protocol.c	1997/04/21 20:29:08	1.115
--- http_protocol.c	1997/04/22 06:44:06
***************
*** 626,631 ****
--- 626,633 ----
              return 0;
  	}
      }
+     /* we've probably got something to do, ignore graceful restart requests */
+     signal (SIGUSR1, SIG_IGN);
      bsetflag( conn->client, B_SAFEREAD, 0 );
      if (len == (HUGE_STRING_LEN - 1)) {
          log_printf(r->server, "request failed for %s, reason: header too long",
Index: mod_browser.c
===================================================================
RCS file: /export/home/cvs/apache/src/mod_browser.c,v
retrieving revision 1.8
diff -c -3 -r1.8 mod_browser.c
*** mod_browser.c	1997/03/07 14:15:38	1.8
--- mod_browser.c	1997/04/22 06:44:06
***************
*** 117,125 ****
  
      new = push_array(sconf->browsers);
      new->name = name;
!     new->preg = pcalloc(cmd->pool, sizeof(regex_t));
!     if (regcomp(new->preg, name, REG_EXTENDED|REG_NOSUB|cflags))
  	return "Browser regex could not be compiled.";
      new->features = make_table(cmd->pool, 5);
  
      var = getword(cmd->pool, &feature, '=');
--- 117,126 ----
  
      new = push_array(sconf->browsers);
      new->name = name;
!     new->preg = pregcomp (cmd->pool, name, REG_EXTENDED|REG_NOSUB|cflags);
!     if (new->preg == NULL) {
  	return "Browser regex could not be compiled.";
+     }
      new->features = make_table(cmd->pool, 5);
  
      var = getword(cmd->pool, &feature, '=');
Index: mod_include.c
===================================================================
RCS file: /export/home/cvs/apache/src/mod_include.c,v
retrieving revision 1.29
diff -c -3 -r1.29 mod_include.c
*** mod_include.c	1997/04/22 02:35:10	1.29
--- mod_include.c	1997/04/22 06:44:06
***************
*** 861,879 ****
  
  int re_check(request_rec *r, char *string, char *rexp) 
  {
!     regex_t compiled;
!     char err_string[MAX_STRING_LEN];
      int regex_error;
  
!     regex_error = regcomp(&compiled, rexp, REG_EXTENDED|REG_NOSUB);
!     if (regex_error) {
!         regerror(regex_error, &compiled, err_string, (size_t)MAX_STRING_LEN);
!         log_printf(r->server,
!             "unable to compile pattern %s [%s]", rexp, err_string);
          return -1;
      }
!     regex_error = regexec(&compiled, string, 0, (regmatch_t *)NULL, 0);
!     regfree(&compiled);
      return(!regex_error);
  }
  
--- 861,876 ----
  
  int re_check(request_rec *r, char *string, char *rexp) 
  {
!     regex_t *compiled;
      int regex_error;
  
!     compiled = pregcomp (r->pool, rexp, REG_EXTENDED|REG_NOSUB);
!     if (compiled == NULL) {
!         log_printf(r->server, "unable to compile pattern %s", rexp);
          return -1;
      }
!     regex_error = regexec(compiled, string, 0, (regmatch_t *)NULL, 0);
!     pregfree (r->pool, compiled);
      return(!regex_error);
  }
  
Index: mod_rewrite.c
===================================================================
RCS file: /export/home/cvs/apache/src/mod_rewrite.c,v
retrieving revision 1.27
diff -c -3 -r1.27 mod_rewrite.c
*** mod_rewrite.c	1997/04/17 02:52:51	1.27
--- mod_rewrite.c	1997/04/22 06:44:07
***************
*** 2759,2765 ****
      cache *c;
  
      c = (cache *)palloc(p, sizeof(cache));
!     c->pool = make_sub_pool(NULL);
      c->lists = make_array(c->pool, 2, sizeof(cachelist));
      return c;
  }
--- 2759,2765 ----
      cache *c;
  
      c = (cache *)palloc(p, sizeof(cache));
!     c->pool = make_sub_pool(p);
      c->lists = make_array(c->pool, 2, sizeof(cachelist));
      return c;
  }
Index: mod_status.c
===================================================================
RCS file: /export/home/cvs/apache/src/mod_status.c,v
retrieving revision 1.45
diff -c -3 -r1.45 mod_status.c
*** mod_status.c	1997/04/06 07:43:42	1.45
--- mod_status.c	1997/04/22 06:44:07
***************
*** 227,232 ****
--- 227,233 ----
      status[SERVER_BUSY_KEEPALIVE]='K';
      status[SERVER_BUSY_LOG]='L';
      status[SERVER_BUSY_DNS]='D';
+     status[SERVER_GRACEFUL]='G';
  
      if (r->method_number != M_GET) return NOT_IMPLEMENTED;
      r->content_type = "text/html";
***************
*** 279,285 ****
  	    ready++;
          else if (res == SERVER_BUSY_READ || res==SERVER_BUSY_WRITE || 
  		 res == SERVER_STARTING || res==SERVER_BUSY_KEEPALIVE ||
! 		 res == SERVER_BUSY_LOG || res==SERVER_BUSY_DNS)
  	    busy++;
  #if defined(STATUS)
          lres = score_record.access_count;
--- 280,287 ----
  	    ready++;
          else if (res == SERVER_BUSY_READ || res==SERVER_BUSY_WRITE || 
  		 res == SERVER_STARTING || res==SERVER_BUSY_KEEPALIVE ||
! 		 res == SERVER_BUSY_LOG || res==SERVER_BUSY_DNS ||
! 		 res == SERVER_GRACEFUL)
  	    busy++;
  #if defined(STATUS)
          lres = score_record.access_count;
***************
*** 407,412 ****
--- 409,415 ----
  	rputs("\"<B><code>K</code></B>\" Keepalive (read), \n",r);
  	rputs("\"<B><code>D</code></B>\" DNS Lookup,<BR>\n",r);
  	rputs("\"<B><code>L</code></B>\" Logging, \n",r);
+ 	rputs("\"<B><code>G</code></B>\" Gracefully finishing, \n",r);
  	rputs("\"<B><code>.</code></B>\" Open slot with no current process<P>\n",r);
      }
  
***************
*** 468,473 ****
--- 471,482 ----
  		        case SERVER_DEAD:
  		            rputs("Dead",r);
  		            break;
+ 			case SERVER_GRACEFUL:
+ 			    rputs("Graceful",r);
+ 			    break;
+ 			default:
+ 			    rputs("?STATE?",r);
+ 			    break;
  		    }
  #ifdef __EMX__
                      /* Allow for OS/2 not having CPU stats */
***************
*** 521,526 ****
--- 530,541 ----
  		        case SERVER_DEAD:
  		            rputs("<td>.",r);
  		            break;
+ 			case SERVER_GRACEFUL:
+ 			    rputs("<td>G",r);
+ 			    break;
+ 			default:
+ 			    rputs("<td>?",r);
+ 			    break;
  		    }
  #ifdef __EMX__
  	            /* Allow for OS/2 not having CPU stats */
Index: scoreboard.h
===================================================================
RCS file: /export/home/cvs/apache/src/scoreboard.h,v
retrieving revision 1.20
diff -c -3 -r1.20 scoreboard.h
*** scoreboard.h	1997/01/01 18:10:44	1.20
--- scoreboard.h	1997/04/22 06:44:07
***************
*** 71,76 ****
--- 71,77 ----
  #define SERVER_BUSY_KEEPALIVE 5 /* Waiting for more requests via keepalive */
  #define SERVER_BUSY_LOG 6       /* Logging the request */
  #define SERVER_BUSY_DNS 7       /* Looking up a hostname */
+ #define SERVER_GRACEFUL 8	/* server is gracefully finishing request */
  
  typedef struct {
      pid_t pid;


Mime
View raw message