httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Scott Hess" <sc...@avantgo.com>
Subject Graceful shutdown patch for 1.3.4
Date Thu, 03 Jun 1999 00:28:03 GMT
We're load-balancing across multiple httpds on FreeBSD3.1 behind a
load-balancing switch.  Periodically, we need to pull one of the servers
down for servicing.  We COULD muck with the switch to idle that machine,
wait, then take it out, but we're reluctant to muck with the switch for such
things.  Instead, we'd like to just stop accepting requests on that machine
and let the switch automagically transfer new requests to the other
machines.

This could almost be done by modifying the Port directive, and doing a
graceful restart.  Unfortunately, while the active children finish their
current request, new requests are accepted into the listen queue, which will
then be rudely yanked away when the children finish.

I've modified 1.3.4 to use shutdown( listenFd, SHUT_RDWR), which tells
FreeBSD to stop allowing new connections into the socket's listen queue, but
still allows the clients to continue pulling connections off the front of
the queue.  The parent does the shutdown() and waits for the children to
exit.  The children watch for a graceful_shutdown flag on the scoreboard,
and reset their ap_max_requests_per_child to 0 (in case the listen queue
contains more connections than the sum of the remaining requests the
children are willing to handle).

The result is that all incoming connection requests either run to
completion, or get ECONNREFUSED.  (In my testing,) No connections get cut
off in mid-stream.  [This is exactly what our load balancer needs to see to
drop the machine from the rotation.]

There remains a race condition, namely, if there are no children ready to
service requests in the listen queue, the requests will get cut off in
mid-stream (actually, they'll have sent their headers, with no response,
yet).

The shutdown() trick doesn't work on at least Linux kernel 2.0.34.  In fact,
I only know that it DOES work for FreeBSD 3.1, I've tested nothing else
other than Linux.  The patch applies with one easy-to-fix hangup to
Apache1.3.6, but it's going to be a week or two before I can make sure it
runs well on 1.3.6.

Later,
scott hess
scott@avantgo.com

The patch:
# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
# /usr/tmp/graceful.patch
#
echo x - /usr/tmp/graceful.patch
sed 's/^X//' >/usr/tmp/graceful.patch << 'END-of-/usr/tmp/graceful.patch'
XIndex: src/include/http_main.h
X===================================================================
XRCS file: /usr/local/cvsroot/vendor/apache/src/include/http_main.h,v
Xretrieving revision 1.2
Xretrieving revision 1.3
Xdiff -c -r1.2 -r1.3
X*** http_main.h 1999/04/02 17:33:17 1.2
X--- http_main.h 1999/06/02 17:01:17 1.3
X***************
X*** 112,118 ****
X--- 112,122 ----
X   * reset_timeout() resets the timeout in progress.
X   */
X
X+ #ifdef GRACEFUL_SHUTDOWN
X+ void ap_start_shutdown(int);
X+ #else
X  void ap_start_shutdown(void);
X+ #endif
X  void ap_start_restart(int);
X  API_EXPORT(void) ap_hard_timeout(char *, request_rec *);
X  void ap_keepalive_timeout(char *, request_rec *);
XIndex: src/include/scoreboard.h
X===================================================================
XRCS file: /usr/local/cvsroot/vendor/apache/src/include/scoreboard.h,v
Xretrieving revision 1.1
Xretrieving revision 1.2
Xdiff -c -r1.1 -r1.2
X*** scoreboard.h 1999/03/02 19:36:23 1.1
X--- scoreboard.h 1999/06/02 17:01:17 1.2
X***************
X*** 164,169 ****
X--- 164,173 ----
X  typedef struct {
X      ap_generation_t running_generation; /* the generation of children
which
X                                           * should still be serving
requests. */
X+ #ifdef GRACEFUL_SHUTDOWN
X+     unsigned char graceful_shutdown;    /* running_generation changed due
to
X+                                          * graceful shutdown request. */
X+ #endif
X  } global_score;
X
X  /* stuff which the parent generally writes and the children rarely read
*/
XIndex: src/main/http_main.c
X===================================================================
XRCS file: /usr/local/cvsroot/vendor/apache/src/main/http_main.c,v
Xretrieving revision 1.4
Xdiff -c -r1.4 http_main.c
X*** http_main.c 1999/04/24 21:13:07 1.4
X--- http_main.c 1999/06/02 23:10:36
X***************
X*** 618,623 ****
X--- 618,630 ----
X      sigdelset(&accept_block_mask, SIGHUP);
X      sigdelset(&accept_block_mask, SIGTERM);
X      sigdelset(&accept_block_mask, SIGUSR1);
X+ #ifdef GRACEFUL_SHUTDOWN
X+     /* Not positive if this is necessary, because only parent sees
X+      * SIGUSR2.  But I don't pretend to understand the implications
X+      * of this type of mutex.
X+      */
X+     sigdelset(&accept_block_mask, SIGUSR2);
X+ #endif /* GRACEFUL_SHUTDOWN */
X      ap_register_cleanup(p, NULL, accept_mutex_cleanup, ap_null_cleanup);
X  }
X
X***************
X*** 2307,2312 ****
X--- 2314,2406 ----
X  #endif /* ndef MULTITHREAD */
X  }
X
X+ #ifdef GRACEFUL_SHUTDOWN
X+ static int volatile is_graceful;
X+ /* Used for SIGUSR2 graceful shutdown.  Just like
reclaim_child_processes(),
X+  * but without sending signals.  Waits for all children to die a natural
X+  * death.  Exits if is_graceful goes to 0 (perhaps due to a SIGTERM).
X+  */
X+ static void graceful_reclaim_child_processes( void)
X+ {
X+ #ifndef MULTITHREAD
X+     int i, status;
X+     long int waittime = 1024 * 16; /* in usecs */
X+     struct timeval tv;
X+     int waitret, tries;
X+     int not_dead_yet;
X+ #ifndef NO_OTHER_CHILD
X+     other_child_rec *ocr, *nocr;
X+ #endif
X+
X+     ap_sync_scoreboard_image();
X+
X+     for (tries = 1; is_graceful; ++tries) {
X+  /* don't want to hold up progress any more than
X+   * necessary, but we need to allow children a few moments to exit.
X+   * Set delay with an exponential backoff.
X+   */
X+  tv.tv_sec = waittime / 1000000;
X+  tv.tv_usec = waittime % 1000000;
X+  waittime = waittime * 4;
X+  ap_select(0, NULL, NULL, NULL, &tv);
X+
X+  /* now see who is done */
X+  not_dead_yet = 0;
X+  for (i = 0; i < max_daemons_limit; ++i) {
X+      int pid = ap_scoreboard_image->parent[i].pid;
X+
X+      if (pid == my_pid || pid == 0)
X+   continue;
X+
X+      waitret = waitpid(pid, &status, WNOHANG);
X+      if (waitret == pid || waitret == -1) {
X+   ap_scoreboard_image->parent[i].pid = 0;
X+   continue;
X+      }
X+      ++not_dead_yet;
X+      switch (tries) {
X+      case 1:     /*  16ms */
X+      case 2:     /*  82ms */
X+      case 3:     /* 344ms */
X+      case 4:     /* 1.4sec */
X+   break;
X+      case 5:     /*  6 sec */
X+      default:
X+   /* Back off - but don't back out of the ballpark. */
X+   waittime=12*1024*1024;
X+   break;
X+      }
X+  }
X+ #ifndef NO_OTHER_CHILD
X+  for (ocr = other_children; ocr; ocr = nocr) {
X+      nocr = ocr->next;
X+      if (ocr->pid == -1)
X+   continue;
X+
X+      waitret = waitpid(ocr->pid, &status, WNOHANG);
X+      if (waitret == ocr->pid) {
X+   ocr->pid = -1;
X+   (*ocr->maintenance) (OC_REASON_DEATH, ocr->data, status);
X+      }
X+      else if (waitret == 0) {
X+   (*ocr->maintenance) (OC_REASON_RESTART, ocr->data, -1);
X+   ++not_dead_yet;
X+      }
X+      else if (waitret == -1) {
X+   /* uh what the heck? they didn't call unregister? */
X+   ocr->pid = -1;
X+   (*ocr->maintenance) (OC_REASON_LOST, ocr->data, -1);
X+      }
X+  }
X+ #endif
X+  if (!not_dead_yet) {
X+      /* nothing left to wait for */
X+      break;
X+  }
X+     }
X+ #endif /* ndef MULTITHREAD */
X+ }
X+ #endif /* GRACEFUL_SHUTDOWN */
X
X  #if defined(NEED_WAITPID)
X  /*
X***************
X*** 2674,2682 ****
X--- 2768,2783 ----
X   * variables are no longer used to pass on the required action to the
parent.
X   */
X
X+ #ifdef GRACEFUL_SHUTDOWN
X+ void ap_start_shutdown(int graceful)
X+ #else
X  void ap_start_shutdown(void)
X+ #endif /* GRACEFUL_SHUTDOWN */
X  {
X  #ifndef WIN32
X+ #ifdef GRACEFUL_SHUTDOWN
X+     is_graceful = graceful;
X+ #endif /* GRACEFUL_SHUTDOWN */
X      if (shutdown_pending == 1) {
X   /* Um, is this _probably_ not an error, if the user has
X    * tried to do a shutdown twice quickly, so we won't
X***************
X*** 2707,2713 ****
X--- 2808,2818 ----
X
X  static void sig_term(int sig)
X  {
X+ #ifdef GRACEFUL_SHUTDOWN
X+     ap_start_shutdown(sig == SIGUSR2);
X+ #else
X      ap_start_shutdown();
X+ #endif /* GRACEFUL_SHUTDOWN */
X  }
X
X  static void restart(int sig)
X***************
X*** 2757,2762 ****
X--- 2862,2871 ----
X      sa.sa_handler = sig_term;
X      if (sigaction(SIGTERM, &sa, NULL) < 0)
X   ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf,
"sigaction(SIGTERM)");
X+ #ifdef GRACEFUL_SHUTDOWN
X+     if (sigaction(SIGUSR2, &sa, NULL) < 0)
X+  ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf,
"sigaction(SIGUSR2)");
X+ #endif
X  #ifdef SIGINT
X      if (sigaction(SIGINT, &sa, NULL) < 0)
X          ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf,
"sigaction(SIGINT)");
X***************
X*** 2810,2815 ****
X--- 2919,2929 ----
X  #ifdef SIGUSR1
X      signal(SIGUSR1, restart);
X  #endif /* SIGUSR1 */
X+ #ifdef GRACEFUL_SHUTDOWN
X+ #ifdef SIGUSR2
X+     signal(SIGUSR2, sig_term);
X+ #endif /* SIGUSR2 */
X+ #endif /* GRACEFUL_SHUTDOWN */
X  #endif
X  }
X
X***************
X*** 3672,3677 ****
X--- 3786,3795 ----
X   ap_sync_scoreboard_image();
X   if (ap_scoreboard_image->global.running_generation != ap_my_generation)
{
X       clean_child_exit(0);
X+ #ifdef GRACEFUL_SHUTDOWN
X+  } else if( ap_scoreboard_image->global.graceful_shutdown!=0) {
X+             ap_max_requests_per_child=0;
X+ #endif /* GRACEFUL_SHUTDOWN */
X   }
X
X  #ifndef WIN32
X***************
X*** 3769,3777 ****
X--- 3887,3897 ----
X         */
X                  case EPROTO:
X  #endif
X+ #if !defined( GRACEFUL_SHUTDOWN) || !defined( __FreeBSD__)
X  #ifdef ECONNABORTED
X                  case ECONNABORTED:
X  #endif
X+ #endif /* GRACEFUL_SHUTDOWN */
X        /* Linux generates the rest of these, other tcp
X         * stacks (i.e. bsd) tend to hide them behind
X         * getsockopt() interfaces.  They occur when
X***************
X*** 3793,3798 ****
X--- 3913,3932 ----
X    case ENETUNREACH:
X  #endif
X                      break;
X+ #if defined( GRACEFUL_SHUTDOWN) && defined( __FreeBSD__)
X+ #ifdef ECONNABORTED
X+                     /* This could happen due to parent shutting down
X+                      * the listen socket.  Exit if graceful_shutdown
X+                      * is indicated by parent.
X+                      */
X+                 case ECONNABORTED:
X+                     ap_sync_scoreboard_image();
X+                     if( ap_scoreboard_image->global.graceful_shutdown!=0)
{
X+                         clean_child_exit(0);
X+                     }
X+                     break;
X+ #endif
X+ #endif /* GRACEFUL_SHUTDOWN */
X
X    default:
X        ap_log_error(APLOG_MARK, APLOG_ERR, server_conf,
X***************
X*** 3919,3924 ****
X--- 4053,4063 ----
X       if (ap_scoreboard_image->global.running_generation !=
ap_my_generation) {
X    ap_bclose(conn_io);
X    clean_child_exit(0);
X+ #ifdef GRACEFUL_SHUTDOWN
X+             } else if( ap_scoreboard_image->global.graceful_shutdown!=0)
{
X+                 /* No persistent connections in graceful_shutdown. */
X+                 break;
X+ #endif /* GRACEFUL_SHUTDOWN */
X       }
X
X       /* In case we get a graceful restart while we're blocked
X***************
X*** 4427,4439 ****
X   }
X
X   if (shutdown_pending) {
X!      /* Time to gracefully shut down:
X!       * Kill child processes, tell them to call child_exit, etc...
X!       */
X!      if (ap_killpg(pgrp, SIGTERM) < 0) {
X!   ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf, "killpg SIGTERM");
X!      }
X!      reclaim_child_processes(1);  /* Start with SIGTERM */
X
X       /* cleanup pid file on normal shutdown */
X       {
X--- 4566,4624 ----
X   }
X
X   if (shutdown_pending) {
X! #ifdef GRACEFUL_SHUTDOWN
X!      if( is_graceful) {
X!   ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_NOTICE, server_conf,
X!         "SIGUSR2 received.  Doing graceful shutdown");
X!
X! #ifdef __FreeBSD__
X!                 /* Tell children about our graceful shutdown. */
X!                 ap_scoreboard_image->global.graceful_shutdown = 1;
X!   update_scoreboard_global();
X!
X!                 /* Shutdown the listen sockets.  This shutdown() will
cause
X!                  * clients already in the queue to be processed, but no
more
X!                  * clients can enter the queue.  The children will notice
X!                  * this and exit when the queue is empty. */
X!                 if( ap_listeners) {
X!                     listen_rec *lr=ap_listeners;
X!                     do {
X!                         shutdown( lr->fd, SHUT_RDWR);
X!                         lr=lr->next;
X!                     } while( lr && lr!=ap_listeners);
X!                 }
X! #else
X!                 /* Basically, just cribbed from the graceful restart.
Not
X!                  * quite so graceful, but at least nobody gets halfway
X!                  * serviced. */
X!                 /* kill off the idle ones */
X!                 if (ap_killpg(pgrp, SIGUSR1) < 0) {
X!                     ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf,
"killpg SIGUSR1");
X!                 }
X! #endif
X!   graceful_reclaim_child_processes();
X!
X!                 /* At this point, there should be no children, unless we
X!                  * received a SIGTERM since our SIGUSR2.  In that case,
X!                  * is_graceful should now be false.
X!                  */
X!      }
X! #endif /* GRACEFUL_SHUTDOWN */
X!
X! #ifdef GRACEFUL_SHUTDOWN
X!             /* If still in is_graceful, the children should already be
dead. */
X!             if( !is_graceful) {
X! #endif /* GRACEFUL_SHUTDOWN */
X!                 /* Time to gracefully shut down:
X!                  * Kill child processes, tell them to call child_exit,
etc...
X!                  */
X!                 if (ap_killpg(pgrp, SIGTERM) < 0) {
X!                     ap_log_error(APLOG_MARK, APLOG_WARNING, server_conf,
"killpg SIGTERM");
X!                 }
X!                 reclaim_child_processes(1);  /* Start with SIGTERM */
X! #ifdef GRACEFUL_SHUTDOWN
X!             }
X! #endif /* GRACEFUL_SHUTDOWN */
X
X       /* cleanup pid file on normal shutdown */
X       {
X***************
X*** 4454,4459 ****
X--- 4639,4647 ----
X   /* we've been told to restart */
X   signal(SIGHUP, SIG_IGN);
X   signal(SIGUSR1, SIG_IGN);
X+ #ifdef GRACEFUL_SHUTDOWN
X+  signal(SIGUSR2, SIG_IGN);
X+ #endif /* GRACEFUL_SHUTDOWN */
X
X   if (one_process) {
X       /* not worth thinking about */
X***************
X*** 5866,5872 ****
X--- 6054,6064 ----
X      setup_signal_names(prefix);
X
X      if (!strcasecmp(signal, "shutdown"))
X+ #ifdef GRACEFUL_SHUTDOWN
X+  ap_start_shutdown(0);
X+ #else
X   ap_start_shutdown();
X+ #endif /* GRACEFUL_SHUTDOWN */
X      else if (!strcasecmp(signal, "restart"))
X   ap_start_restart(1);
X      else
END-of-/usr/tmp/graceful.patch
exit




Mime
View raw message