httpd-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Yann Ylavic <ylavic....@gmail.com>
Subject Re: SO_REUSEPORT in the children processes
Date Fri, 07 Mar 2014 17:28:29 GMT
On Fri, Mar 7, 2014 at 6:24 PM, Reindl Harald <h.reindl@thelounge.net> wrote:
>
>
> Am 07.03.2014 18:07, schrieb Yann Ylavic
>
> can you please post plaintext instead HTML to lists
>
> for me such messages are unreadable after medical operations
> on both eyes because you override my MUA font settings
>

Sorry, this was posted from gmail...

Here is the plain text.

***

Hi all,

the patch about SO_REUSEPORT proposed by Yingqi Lu in [1] and
discussed in [2] uses listeners buckets to address a defect [3] in the
current linux implementation (his patch goes beyond SO_REUSEPORT
though, and suggests a new MPM even when the option is not available).

Should this defect be addressed by linux folks, the
event/worker/prefork MPMs could take full advantage of the option
(linux-3.9+) with quite simple modifications of the current code.
I'm proposing here the corresponding patch.

The idea is to re-create and re-bind/listen the parent's listeners
sockets for each child process, when starting, before dropping
priviledges.

For this, the patch introduces a new ap_reopen_listeners() function
(meant to be called by each child) to do the job on the inherited
listeners. It does nothing unless HAVE_SO_REUSEPORT is defined.

The advantage of this approach is that no accept mutex is needed
anymore (each child has its own sockets), hence the SAFE_ACCEPT macros
can do nothing when HAVE_SO_REUSEPORT is defined.
The new (incoming) connections are evenly distributed accross the
children for all the listners (as assured by Linux when SO_REUSEPORT
is used).

I'm proposing the patch here so that everyone can figure out whether
SO_REUSEPORT per se needs its own MPM or not (once/if the defect is
fixed).
The option seems to be quite easily pluggable to existing MPMs (no ABI
change), and I don't see an advantage to not using it when available
(and working).

Also, FreeBSD has an implementation of SO_REUSEPORT,
however
I couldn't find whether it has the same scheduling garantee
or not
(at least I guess the accept mutex can be avoided too).

Regarding the linux kernel defect, is someone aware of a fix/work on
that in the latest versions?

Finally, about the accept mutex, mpm event seems to work well without
any, why prefork and worker would need one (both poll() all the
listeners in a single thread, while other children can do the same)?

The patch follows and is attached.
It can already be tested with a workaround against the defect: don't
let perform_idle_server_maintenance() create/stop children after
startup (eg. StartServers, ServerLimit, Min/MaxSpareServers using the
same value).

Thoughts, feedbacks welcome.

Regards,
Yann.

[1] https://issues.apache.org/bugzilla/show_bug.cgi?id=55897#c7
[2] http://mail-archives.apache.org/mod_mbox/httpd-bugs/201312.mbox/%3Cbug-55897-7868@https.issues.apache.org/bugzilla/%3E
[3] http://lwn.net/Articles/542629/ and http://lwn.net/Articles/542738/

Index: server/mpm/event/event.c
===================================================================
--- server/mpm/event/event.c    (revision 1575322)
+++ server/mpm/event/event.c    (working copy)
@@ -2356,6 +2356,13 @@ static void child_main(int child_num_arg)
     /*stuff to do before we switch id's, so we have permissions. */
     ap_reopen_scoreboard(pchild, NULL, 0);

+    rv = ap_reopen_listeners(pchild, num_listensocks);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
         clean_child_exit(APEXIT_CHILDFATAL);
     }
Index: server/mpm/prefork/prefork.c
===================================================================
--- server/mpm/prefork/prefork.c    (revision 1575322)
+++ server/mpm/prefork/prefork.c    (working copy)
@@ -271,7 +271,9 @@ static void accept_mutex_off(void)
  * multiple Listen statements.  Define SINGLE_LISTEN_UNSERIALIZED_ACCEPT
  * when it's safe in the single Listen case.
  */
-#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
+#if HAVE_SO_REUSEPORT
+#define SAFE_ACCEPT(stmt)
+#elif defined(SINGLE_LISTEN_UNSERIALIZED_ACCEPT)
 #define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0)
 #else
 #define SAFE_ACCEPT(stmt) do {stmt;} while(0)
@@ -536,6 +538,13 @@ static void child_main(int child_num_arg)
         clean_child_exit(APEXIT_CHILDFATAL);
     }

+    status = ap_reopen_listeners(pchild, num_listensocks);
+    if (status != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, status, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
         clean_child_exit(APEXIT_CHILDFATAL);
     }
Index: server/mpm/worker/worker.c
===================================================================
--- server/mpm/worker/worker.c    (revision 1575322)
+++ server/mpm/worker/worker.c    (working copy)
@@ -220,7 +220,9 @@ static apr_os_thread_t *listener_os_thread;
 /* Locks for accept serialization */
 static apr_proc_mutex_t *accept_mutex;

-#ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
+#if HAVE_SO_REUSEPORT
+#define SAFE_ACCEPT(stmt)
+#elif defined(SINGLE_LISTEN_UNSERIALIZED_ACCEPT)
 #define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
 #else
 #define SAFE_ACCEPT(stmt) (stmt)
@@ -1228,6 +1230,13 @@ static void child_main(int child_num_arg)
     /*stuff to do before we switch id's, so we have permissions.*/
     ap_reopen_scoreboard(pchild, NULL, 0);

+    rv = ap_reopen_listeners(pchild, num_listensocks);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO()
+                     "Couldn't re-open listeners");
+        clean_child_exit(APEXIT_CHILDFATAL);
+    }
+
     rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex,

apr_proc_mutex_lockfile(accept_mutex),
                                                pchild));
Index: server/listen.c
===================================================================
--- server/listen.c    (revision 1575322)
+++ server/listen.c    (working copy)
@@ -60,6 +60,9 @@ static apr_status_t make_sock(apr_pool_t *p, ap_li
 #endif
     apr_status_t stat;

+    /* until further notice */
+    server->sd = NULL;
+
 #ifndef WIN32
     stat = apr_socket_opt_set(s, APR_SO_REUSEADDR, one);
     if (stat != APR_SUCCESS && stat != APR_ENOTIMPL) {
@@ -125,6 +128,23 @@ static apr_status_t make_sock(apr_pool_t *p, ap_li
 #endif

     if (do_bind_listen) {
+#if HAVE_SO_REUSEPORT
+        {
+            int yes = 1;
+            apr_os_sock_t sd = -1;
+            apr_os_sock_get(&sd, s);
+            if (setsockopt(sd, SOL_SOCKET, SO_REUSEPORT,
+                           (void *)&yes, sizeof(int)) == -1) {
+                stat = errno;
+                ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+                              "make_sock: for address %pI, setsockopt: "
+                              "(SO_REUSEPORT)", server->bind_addr);
+                apr_socket_close(s);
+                return stat;
+            }
+        }
+#endif
+
 #if APR_HAVE_IPV6
         if (server->bind_addr->family == APR_INET6) {
             stat = apr_socket_opt_set(s, APR_IPV6_V6ONLY, v6only_setting);
@@ -727,6 +747,34 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
     return num_listeners;
 }

+AP_DECLARE(apr_status_t) ap_reopen_listeners(apr_pool_t *p, int num)
+{
+#if HAVE_SO_REUSEPORT
+    ap_listen_rec *lr;
+
+    for (lr = ap_listeners; lr && num; lr = lr->next, --num) {
+        apr_socket_t *sd = lr->sd, *s;
+
+        rv = apr_socket_create(&s, lr->bind_addr->family, SOCK_STREAM, 0, p);
+        if (rv != APR_SUCCESS) {
+            return rv;
+        }
+        lr->sd = s;
+
+        rv = make_sock(p, lr, 1);
+        if (rv != APR_SUCCESS) {
+            /* make_sock() has closed lr->sd,
+             * restore the original socket */
+            lr->sd = sd;
+            return rv;
+        }
+        /* not needed anymore */
+        apr_socket_close(sd);
+    }
+#endif
+    return APR_SUCCESS;
+}
+
 AP_DECLARE_NONSTD(void) ap_close_listeners(void)
 {
     ap_listen_rec *lr;
Index: include/ap_listen.h
===================================================================
--- include/ap_listen.h    (revision 1575322)
+++ include/ap_listen.h    (working copy)
@@ -92,6 +92,15 @@ AP_DECLARE(void) ap_listen_pre_config(void);
 AP_DECLARE(int) ap_setup_listeners(server_rec *s);

 /**
+ * Loop through the global ap_listen_rec list and reopen up to @a num sockets
+ * when applicable (eg. HAVE_SO_REUSEPORT to reuse ports).
+ * @param p The pool reopen the sockets on
+ * @param num The number of sockets to reopen (all if negative)
+ * @return APR_SUCCESS or an APR error
+ */
+AP_DECLARE(apr_status_t) ap_reopen_listeners(apr_pool_t *p, int num);
+
+/**
  * Loop through the global ap_listen_rec list and close each of the sockets.
  */
 AP_DECLARE_NONSTD(void) ap_close_listeners(void);
[EOS]

Mime
View raw message