httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From <...@covalent.net>
Subject New graceful restart mechanism.
Date Mon, 04 Jun 2001 16:20:46 GMT

This patch implements the pipe-of-death mechanism that was discussed last
week.  This only modifies the prefork MPM.  If this patch is accepted,
then the other MPMs are trivial to modify.  The one thing I didn't do,
which could easily be done now, is to remove the process_status field from
the scoreboard.  That field is not used once this patch is applied, and
can safely be removed.

The basic premise, is that signals are just not very good when it comes to
gracefully shutting down the server.  They don't work well, either for
threaded or non-threaded MPMs.  So, we have implemented a series of
pipe-of-death functions.  The pod is no longer selected on, allowing for
SINGLE_LISTEN_UNSERIALIZED_ACCEPT again.  Instead, the first thing a
thread does after accepting a connection, is to check the pod.  If there
is data there, the process dies (after serving the current request), if
there is no data, it doesn't die.  This could be improved a bit, by using
OOB data to wake up the thread, and we would only check the pod if we
received OOB data.  This is an optimization however, and I am not sure how
that would work in real-life.

I have tested this, but of course it could use far more people banging on
it before I commit for good.  Assuming nobody complains, I will commit it
tomorrow or the next day.

Ryan

? build.log
? build.err
Index: include/mpm_common.h
===================================================================
RCS file: /home/cvs/httpd-2.0/include/mpm_common.h,v
retrieving revision 1.20
diff -u -d -b -w -u -r1.20 mpm_common.h
--- include/mpm_common.h	2001/03/22 12:32:21	1.20
+++ include/mpm_common.h	2001/06/04 15:59:24
@@ -164,6 +164,14 @@

 #define AP_MPM_HARD_LIMITS_FILE APACHE_MPM_DIR "/mpm_default.h"

+#ifdef AP_MPM_USES_POD
+AP_DECLARE(apr_file_t *) ap_mpm_pod_open(apr_pool_t *p);
+AP_DECLARE(apr_status_t) ap_mpm_pod_check(void);
+AP_DECLARE(void) ap_mpm_pod_close(void);
+AP_DECLARE(void) ap_mpm_pod_signal(apr_pool_t *p);
+AP_DECLARE(void) ap_mpm_pod_killpg(apr_pool_t *p, int num);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
Index: server/mpm_common.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm_common.c,v
retrieving revision 1.47
diff -u -d -b -w -u -r1.47 mpm_common.c
--- server/mpm_common.c	2001/05/16 20:51:28	1.47
+++ server/mpm_common.c	2001/06/04 16:00:37
@@ -338,4 +338,61 @@
 }
 #endif /* def NEED_INITGROUPS */

+#ifdef AP_MPM_USES_POD
+static apr_file_t *pod_in = NULL, *pod_out = NULL;
+
+AP_DECLARE(apr_status_t) ap_mpm_pod_open(apr_pool_t *p)
+{
+    apr_file_t *pod;

+    pod = apr_file_pipe_create(&pod_in, &pod_out, p);
+    apr_file_pipe_timeout_set(out, 0);
+    return APR_SUCCESS;
+}
+
+AP_DECLARE(apr_status_t) ap_mpm_pod_check(void)
+{
+    char c;
+    apr_size_t len = 1;
+    apr_status_t rv;
+
+    rv = apr_file_read(pod_in, &c, &len);
+
+    if ((rv == APR_SUCCESS) && (len == 1)) {
+        return APR_SUCCESS;
+    }
+    return 1;
+}
+
+AP_DECLARE(void) ap_mpm_pod_close(void)
+{
+    apr_file_close(pod_out);
+    apr_file_close(pod_in);
+}
+
+AP_DECLARE(void) ap_mpm_pod_signal(apr_pool_t *p)
+{
+    apr_socket_t *sock = NULL;
+    apr_sockaddr_t *sa = NULL;
+
+    if ((rv = apr_file_write(pipe_of_death_out, &char_of_death, &one))
+                                 != APR_SUCCESS) {
+        if (APR_STATUS_IS_EINTR(rv)) continue;
+            ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf,
+                         "write pipe_of_death");
+        }
+    }
+
+    apr_sockaddr_info_get(&sa, "127.0.0.1", APR_UNSPEC, ap_listeners->sd->port,
0, p);
+    apr_socket_create(&sock, sa->family, SOCK_STREAM, p);
+    apr_connect(sock, sa);
+}
+
+AP_DECLARE(void) ap_mpm_pod_killpg(apr_pool_t *p, int num)
+{
+    int i;
+    for (i = 0; i < num; i++) {
+        ap_mpm_pod_signal(p);
+    }
+}
+#endif
Index: server/mpm/prefork/mpm.h
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/prefork/mpm.h,v
retrieving revision 1.11
diff -u -d -b -w -u -r1.11 mpm.h
--- server/mpm/prefork/mpm.h	2001/04/13 19:00:38	1.11
+++ server/mpm/prefork/mpm.h	2001/06/04 16:00:49
@@ -68,6 +68,7 @@

 #define MPM_NAME "Prefork"

+#define AP_MPM_USES_POD 1
 #define AP_MPM_NEEDS_RECLAIM_CHILD_PROCESSES 1
 #define MPM_SYNC_CHILD_TABLE() (ap_sync_scoreboard_image())
 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
Index: server/mpm/prefork/prefork.c
===================================================================
RCS file: /home/cvs/httpd-2.0/server/mpm/prefork/prefork.c,v
retrieving revision 1.175
diff -u -d -b -w -u -r1.175 prefork.c
--- server/mpm/prefork/prefork.c	2001/05/15 02:38:15	1.175
+++ server/mpm/prefork/prefork.c	2001/06/04 16:00:49
@@ -182,6 +182,8 @@
 char tpf_server_name[INETD_SERVNAME_LENGTH+1];
 #endif /* TPF */

+int die_now = 0;
+
 #ifdef GPROF
 /*
  * change directory for gprof to plop the gmon.out file
@@ -232,7 +234,7 @@
     if (pchild) {
 	apr_pool_destroy(pchild);
     }
-    ap_scoreboard_image->parent[my_child_num].process_status = SB_WORKING;
+    ap_mpm_pod_close();
     chdir_for_gprof();
     exit(code);
 }
@@ -378,15 +380,7 @@
 static void please_die_gracefully(int sig)
 {
     /* clean_child_exit(0); */
-    ap_scoreboard_image->parent[my_child_num].process_status = SB_IDLE_DIE;
-    if (sig == SIGHUP) {
-        (void) ap_update_child_status(AP_CHILD_THREAD_FROM_ID(my_child_num),
-                                      SERVER_GRACEFUL, (request_rec *) NULL);
-    }
-    else {
-        (void) ap_update_child_status(AP_CHILD_THREAD_FROM_ID(my_child_num),
-                                      SERVER_IDLE_KILL, (request_rec *) NULL);
-    }
+    die_now = 1;
 }

 /* volatile just in case */
@@ -414,9 +408,11 @@
 	return;
     }
     restart_pending = 1;
+#if 0
     if ((is_graceful = (sig == SIGWINCH))) {
         apr_pool_cleanup_kill(pconf, NULL, ap_cleanup_scoreboard);
     }
+#endif
 }

 static void set_signals(void)
@@ -479,12 +475,9 @@

     /* we want to ignore HUPs and WINCH while we're busy processing one */
     sigaddset(&sa.sa_mask, SIGHUP);
-    sigaddset(&sa.sa_mask, SIGWINCH);
     sa.sa_handler = restart;
     if (sigaction(SIGHUP, &sa, NULL) < 0)
 	ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, "sigaction(SIGHUP)");
-    if (sigaction(SIGWINCH, &sa, NULL) < 0)
-	ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, "sigaction(SIGWINCH)");
 #else
     if (!one_process) {
 	apr_signal(SIGSEGV, sig_coredump);
@@ -513,7 +506,7 @@
     apr_signal(SIGHUP, restart);
 #endif /* SIGHUP */
 #ifdef SIGWINCH
-    apr_signal(SIGWINCH, restart);
+    apr_signal(SIGWINCH, SIG_IGN);
 #endif /* SIGWINCH */
 #ifdef SIGPIPE
     apr_signal(SIGPIPE, SIG_IGN);
@@ -533,9 +526,6 @@
 static int requests_this_child;
 static fd_set main_fds;

-#define I_AM_TO_SHUTDOWN()                                                   \
-(ap_scoreboard_image->parent[my_child_num].process_status != SB_WORKING)
-
 int ap_graceful_stop_signalled(void)
 {
     /* not ever called anymore... */
@@ -581,12 +571,11 @@
     apr_signal(SIGHUP, please_die_gracefully);

     ap_sync_scoreboard_image();
-    while (!I_AM_TO_SHUTDOWN()) {
+    while (!die_now) {

 	/* Prepare to receive a SIGWINCH due to graceful restart so that
 	 * we can exit cleanly.
 	 */
-	apr_signal(SIGWINCH, please_die_gracefully);
         apr_signal(SIGTERM, just_die);

 	/*
@@ -667,16 +656,22 @@
 	    /* if we accept() something we don't want to die, so we have to
 	     * defer the exit
 	     */
-            apr_signal(SIGTERM, please_die_gracefully);
 	    for (;;) {
                 ap_sync_scoreboard_image();
-		if (I_AM_TO_SHUTDOWN()) {
+		if (die_now) {
 		    /* we didn't get a socket, and we were told to die */
 		    clean_child_exit(0);
 		}
 		stat = apr_accept(&csd, sd, ptrans);
 		if (stat == APR_SUCCESS || !APR_STATUS_IS_EINTR(stat))
 		    break;
+                /* In reality, this could be done later, but to keep it
+                 * consistent with MPMs that have a thread race-condition,
+                 * we will do it here.
+                 */
+                if (ap_mpm_pod_check()) {
+                    die_now = 1;
+                }
 	    }

 	    if (stat == APR_SUCCESS)
@@ -778,19 +773,13 @@
 	    }

             ap_sync_scoreboard_image();
-	    if (I_AM_TO_SHUTDOWN()) {
+	    if (die_now) {
 		clean_child_exit(0);
 	    }
 	}

 	SAFE_ACCEPT(accept_mutex_off());	/* unlock after "accept" */

-	/* We've got a socket, let's at least process one request off the
-	 * socket before we accept a graceful restart request.  We set
-	 * the signal to ignore because we don't want to disturb any
-	 * third party code.
-	 */
-	apr_signal(SIGWINCH, SIG_IGN);
 	/*
 	 * We now have a connection, so set it up with the appropriate
 	 * socket options, file descriptors, and read/write buffers.
@@ -895,7 +884,6 @@
 	 * requested there's no race condition here.
 	 */
 	apr_signal(SIGHUP, please_die_gracefully);
-	apr_signal(SIGWINCH, please_die_gracefully);
 	apr_signal(SIGTERM, just_die);
         ap_scoreboard_image->parent[slot].process_status = SB_WORKING;
 	child_main(slot);
@@ -941,7 +929,7 @@
 #endif
 static int hold_off_on_exponential_spawning;

-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(apr_pool_t *p)
 {
     int i;
     int to_kill;
@@ -1003,7 +991,7 @@
 	 * shut down gracefully, in case it happened to pick up a request
 	 * while we were counting
 	 */
-	kill(ap_scoreboard_image->parent[to_kill].pid, SIGWINCH);
+	ap_mpm_pod_signal(p);
 	idle_spawn_rate = 1;
     }
     else if (idle_count < ap_daemons_min_free) {
@@ -1100,6 +1088,7 @@
 	/* XXX: hey, what's the right way for the mpm to indicate a fatal error? */
 	return 1;
     }
+    ap_mpm_pod_open(pconf);

     SAFE_ACCEPT(accept_mutex_init(pconf));
     if (!is_graceful) {
@@ -1210,7 +1199,7 @@
 	    continue;
 	}

-	perform_idle_server_maintenance();
+	perform_idle_server_maintenance(pconf);
 #ifdef TPF
     shutdown_pending = os_check_server(tpf_server_name);
     ap_check_signals();
@@ -1245,7 +1234,6 @@

     /* we've been told to restart */
     apr_signal(SIGHUP, SIG_IGN);
-    apr_signal(SIGWINCH, SIG_IGN);
     if (one_process) {
 	/* not worth thinking about */
 	return 1;
@@ -1265,12 +1253,11 @@

     if (is_graceful) {
 	ap_log_error(APLOG_MARK, APLOG_NOERRNO|APLOG_NOTICE, 0, ap_server_conf,
-		    "SIGWINCH received.  Doing graceful restart");
+		    "Graceful restart requested, doing restart");

 	/* kill off the idle ones */
-	if (unixd_killpg(getpgrp(), SIGWINCH) < 0) {
-	    ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, "killpg SIGWINCH");
-	}
+        ap_mpm_pod_killpg(pconf, ap_daemons_limit);
+
 #ifndef SCOREBOARD_FILE
 	/* This is mostly for debugging... so that we know what is still
 	    * gracefully dealing with existing request.  But we can't really

_______________________________________________________________________________
Ryan Bloom                        	rbb@apache.org
406 29th St.
San Francisco, CA 94131
-------------------------------------------------------------------------------



Mime
View raw message