Mailing-List: contact dev-help@httpd.apache.org; run by ezmlm
Precedence: bulk
Reply-To: dev@httpd.apache.org
Received-SPF: pass (athena.apache.org: domain of bojan@rexursive.com
 designates 150.101.121.179 as permitted sender)
Subject: Re: Crazy slowloris mitigation patch
From: Bojan Smojver <bojan@rexursive.com>
To: dev@httpd.apache.org
In-Reply-To: <1256113052.9540.17.camel@shrek.rexursive.com>
References: <1255640418.4989.42.camel@shrek.rexursive.com>
	 <1255863419.4989.70.camel@shrek.rexursive.com>
	 <1255900042.4989.80.camel@shrek.rexursive.com>
	 <200910190000.29787.sf@sfritsch.de>
	 <1255904571.4989.89.camel@shrek.rexursive.com>
	 <1256078616.9540.12.camel@shrek.rexursive.com>
	 <1256113052.9540.17.camel@shrek.rexursive.com>
Content-Type: multipart/mixed; boundary="=-fYYEQuX11cYWFdlgW8FA"
Date: Wed, 21 Oct 2009 21:11:00 +1100
Message-Id: <1256119860.9540.18.camel@shrek.rexursive.com>
Mime-Version: 1.0


--=-fYYEQuX11cYWFdlgW8FA
Content-Type: text/plain
Content-Transfer-Encoding: 7bit

On Wed, 2009-10-21 at 19:17 +1100, Bojan Smojver wrote:
> Actually, calling close() is the wrong thing to do. Calling shutdown()
> is the go.

This is what I mean.

-- 
Bojan

--=-fYYEQuX11cYWFdlgW8FA
Content-Disposition: attachment; filename="httpd-shutdown_sockets.patch"
Content-Type: text/x-patch; name="httpd-shutdown_sockets.patch";
 charset="UTF-8"
Content-Transfer-Encoding: 7bit

--- httpd-2.2.14-v/server/mpm/prefork/prefork.c	2009-02-01 07:54:55.000000000 +1100
+++ httpd-2.2.14/server/mpm/prefork/prefork.c	2009-10-21 17:45:28.697322726 +1100
@@ -48,6 +48,7 @@
 #include "ap_listen.h"
 #include "ap_mmn.h"
 #include "apr_poll.h"
+#include "apr_md5.h"
 
 #ifdef HAVE_BSTRING_H
 #include <bstring.h>            /* for IRIX, FD_SET calls bzero() */
@@ -336,6 +337,29 @@
     die_now = 1;
 }
 
+static int volatile client_socket = -1;
+
+#ifndef NO_USE_SIGACTION
+static void shutdown_socket(int sig, siginfo_t *info, void *context)
+#else
+static void shutdown_socket(int sig)
+#endif
+{
+#ifndef NO_USE_SIGACTION
+    if (info->si_pid == getppid()) {
+#endif
+        if (client_socket != -1) {
+            shutdown(client_socket, SHUT_RDWR);
+            client_socket = -1;
+        }
+#ifndef NO_USE_SIGACTION
+    }
+    else {
+        clean_child_exit(0);
+    }
+#endif
+}
+
 /* volatile just in case */
 static int volatile shutdown_pending;
 static int volatile restart_pending;
@@ -659,8 +683,12 @@
 
         current_conn = ap_run_create_connection(ptrans, ap_server_conf, csd, my_child_num, sbh, bucket_alloc);
         if (current_conn) {
+            apr_os_sock_get((apr_os_sock_t *)&client_socket, csd);
+
             ap_process_connection(current_conn, csd);
             ap_lingering_close(current_conn);
+
+            client_socket = -1;
         }
 
         /* Check the pod and the generation number after processing a
@@ -733,6 +761,10 @@
     }
 
     if (!pid) {
+#ifndef NO_USE_SIGACTION
+        struct sigaction act;
+#endif
+
 #ifdef HAVE_BINDPROCESSOR
         /* by default AIX binds to a single processor
          * this bit unbinds children which will then bind to another cpu
@@ -755,6 +787,19 @@
          * The pod is used for signalling the graceful restart.
          */
         apr_signal(AP_SIG_GRACEFUL, stop_listening);
+
+        /* If the parent sends SIGINT to the child, we shutdown the
+         * client socket, as we suspect that we are under a DoS attack.
+         */ 
+#ifndef NO_USE_SIGACTION
+        memset(&act, 0, sizeof(act));
+        act.sa_flags = SA_SIGINFO;
+        act.sa_sigaction = shutdown_socket;
+        sigaction(SIGINT, &act, NULL);
+#else
+        apr_signal(SIGINT, shutdown_socket);
+#endif
+
         child_main(slot);
     }
 
@@ -803,6 +848,8 @@
     int free_slots[MAX_SPAWN_RATE];
     int last_non_dead;
     int total_non_dead;
+    int status;
+    static apr_time_t maxed_out = 0;
 
     /* initialize the free_list */
     free_length = 0;
@@ -813,8 +860,6 @@
     total_non_dead = 0;
 
     for (i = 0; i < ap_daemons_limit; ++i) {
-        int status;
-
         if (i >= ap_max_daemons_limit && free_length == idle_spawn_rate)
             break;
         ws = &ap_scoreboard_image->servers[i][0];
@@ -856,12 +901,17 @@
          */
         ap_mpm_pod_signal(pod);
         idle_spawn_rate = 1;
+        maxed_out = 0;
     }
     else if (idle_count < ap_daemons_min_free) {
         /* terminate the free list */
         if (free_length == 0) {
             /* only report this condition once */
             static int reported = 0;
+            static unsigned char sb_digest[APR_MD5_DIGESTSIZE];
+            apr_time_t now = apr_time_now();
+            apr_md5_ctx_t ctx;
+            pid_t pid;
 
             if (!reported) {
                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
@@ -870,6 +920,118 @@
                 reported = 1;
             }
             idle_spawn_rate = 1;
+
+            /* If after one maintenace interval we still see the same
+             * situation on the scoreboard, shutdown all client sockets
+             * in read state and at least 10% of all client sockets.
+             * Crude, but seems to clear things out.
+             */
+            if (maxed_out) {
+                apr_time_t diff = now - maxed_out;
+
+                if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) {
+                    unsigned char cur_digest[APR_MD5_DIGESTSIZE];
+
+                    /* Current digest of the scoreboard.
+                     */
+                    apr_md5_init(&ctx);
+                    for (i = 0; i < ap_daemons_limit; ++i) {
+                        status = ap_scoreboard_image->servers[i][0].status;
+                        apr_md5_update(&ctx, &status, sizeof(status));
+
+                        pid = ap_scoreboard_image->parent[i].pid;
+                        apr_md5_update(&ctx, &pid, sizeof(pid));
+                    }
+                    apr_md5_final(cur_digest, &ctx);
+
+                    /* If we haven't had a change for one maintenance
+                     * interval, we need to make room.
+                     */
+                    if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) {
+                        maxed_out = 0;
+                    }
+                    else {
+                        int rdrs = 0, cull = ap_daemons_limit / 10;
+
+                        /* Disconnect all readers (includes keep alive).
+                         */
+                        for (i = 0; i < ap_daemons_limit; ++i) {
+                            pid = ap_scoreboard_image->parent[i].pid;
+                            status = ap_scoreboard_image->servers[i][0].status;
+
+                            if (status == SERVER_BUSY_READ ||
+                                status == SERVER_BUSY_KEEPALIVE) {
+                                ap_mpm_safe_kill(pid, SIGINT);
+                                rdrs++;
+                            }
+                        }
+
+                        /* Make up to 10% of all sockets, if required.
+                         */
+                        for (i = 0; i < ap_daemons_limit && cull > rdrs; ++i) {
+                            pid = ap_scoreboard_image->parent[i].pid;
+                            status = ap_scoreboard_image->servers[i][0].status;
+
+                            if (status != SERVER_BUSY_READ &&
+                                status != SERVER_BUSY_KEEPALIVE) {
+                                ap_mpm_safe_kill(pid, SIGINT);
+                                cull--;
+                            }
+                        }
+                    }
+                }
+            }
+            else {
+                int rdrs = 0;
+
+                /* Create digest of the scorboard, see if things
+                 * change next time around.
+                 */
+                apr_md5_init(&ctx);
+                for (i = 0; i < ap_daemons_limit; ++i) {
+                    status = ap_scoreboard_image->servers[i][0].status;
+
+                    /* These are the conditions we are concerned with.
+                     */ 
+                    switch (status) { 
+                    case SERVER_BUSY_READ:
+                    case SERVER_BUSY_KEEPALIVE:
+                        rdrs++;
+                    case SERVER_BUSY_WRITE:
+                    case SERVER_DEAD:
+                    case SERVER_GRACEFUL:
+                        break;
+                    default:
+                        return;
+                    }
+
+                    apr_md5_update(&ctx, &status, sizeof(status));
+
+                    pid = ap_scoreboard_image->parent[i].pid;
+                    apr_md5_update(&ctx, &pid, sizeof(pid));
+                }
+                apr_md5_final(sb_digest, &ctx);
+
+                /* Over 95% in read state (includes keep alive), clear now.
+                 */
+                if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) {
+                    /* Disconnect all readers (includes keep alive).
+                     */
+                    for (i = 0; i < ap_daemons_limit; ++i) {
+                        pid = ap_scoreboard_image->parent[i].pid;
+                        status = ap_scoreboard_image->servers[i][0].status;
+
+                        if (status == SERVER_BUSY_READ ||
+                            status == SERVER_BUSY_KEEPALIVE) {
+                            ap_mpm_safe_kill(pid, SIGINT);
+                            rdrs++;
+                        }
+                    }
+                }
+                else {
+                    maxed_out = now;
+                }
+            }
         }
         else {
             if (idle_spawn_rate >= 8) {
@@ -902,10 +1064,13 @@
             else if (idle_spawn_rate < MAX_SPAWN_RATE) {
                 idle_spawn_rate *= 2;
             }
+
+            maxed_out = 0;
         }
     }
     else {
         idle_spawn_rate = 1;
+        maxed_out = 0;
     }
 }
 
--- httpd-2.2.14-v/server/mpm/worker/worker.c	2007-07-18 00:48:25.000000000 +1000
+++ httpd-2.2.14/server/mpm/worker/worker.c	2009-10-21 17:44:40.584322944 +1100
@@ -32,6 +32,7 @@
 #include "apr_poll.h"
 #define APR_WANT_STRFUNC
 #include "apr_want.h"
+#include "apr_md5.h"
 
 #if APR_HAVE_UNISTD_H
 #include <unistd.h>
@@ -357,6 +358,82 @@
     clean_child_exit(0);
 }
 
+#ifndef NO_USE_SIGACTION
+static void shutdown_socket(int sig, siginfo_t *info, void *context)
+#else
+static void shutdown_socket(int sig)
+#endif
+{
+#ifndef NO_USE_SIGACTION
+    if (info->si_pid == getppid())
+#endif
+    {
+        int csd, i, j, slot = 0, status, total_rdrs = 0, rdrs = 0,
+            cull = ap_daemons_limit * ap_threads_per_child / 10;
+
+        /* Determine total number of readers (includes keep alive), our
+         * slot and the number of our own readers.
+         */
+        for (i = 0; i < ap_daemons_limit; ++i) {
+            if (ap_scoreboard_image->parent[i].pid == ap_my_pid) {
+                slot = i;
+            }
+
+            for (j = 0; j < ap_threads_per_child; j++) {
+                status = ap_scoreboard_image->servers[i][j].status;
+
+                if (status == SERVER_BUSY_READ ||
+                    status == SERVER_BUSY_KEEPALIVE) {
+
+                    total_rdrs++;
+
+                    if (slot == i) {
+                        rdrs++;
+                    }
+                }
+            }
+        }
+
+        /* Disconnect all readers (includes keep alive).
+         */
+        for (j = 0; j < ap_threads_per_child; j++) {
+            status = ap_scoreboard_image->servers[slot][j].status;
+
+            if (worker_sockets[j] &&
+                (status == SERVER_BUSY_READ ||
+                 status == SERVER_BUSY_KEEPALIVE)) {
+
+                apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]);
+                shutdown(csd, SHUT_RDWR);
+            }
+        }
+
+        /* Make up to 10% of all sockets, if required.
+         */
+        if (total_rdrs < cull) {
+            cull = ((ap_threads_per_child - rdrs) * (cull - total_rdrs)) / cull;
+
+            for (j = 0; j < ap_threads_per_child && cull > 0; j++) {
+                status = ap_scoreboard_image->servers[slot][j].status;
+
+                if (worker_sockets[j] &&
+                    status != SERVER_BUSY_READ &&
+                    status != SERVER_BUSY_KEEPALIVE) {
+
+                    apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]);
+                    shutdown(csd, SHUT_RDWR);
+                    cull--;
+                }
+            }
+        }
+    }
+#ifndef NO_USE_SIGACTION
+    else {
+        clean_child_exit(0);
+    }
+#endif
+}
+
 /*****************************************************************
  * Connection structures and accounting...
  */
@@ -1247,12 +1324,31 @@
         join_workers(ts->listener, threads);
     }
     else { /* !one_process */
+#ifndef NO_USE_SIGACTION
+        struct sigaction act;
+#endif
+
         /* remove SIGTERM from the set of blocked signals...  if one of
          * the other threads in the process needs to take us down
          * (e.g., for MaxRequestsPerChild) it will send us SIGTERM
          */
         unblock_signal(SIGTERM);
         apr_signal(SIGTERM, dummy_signal_handler);
+
+
+       /* If the parent sends SIGINT to the child, we shutdown the
+        * client socket, as we suspect that we are under a DoS attack.
+        */
+        unblock_signal(SIGINT);
+#ifndef NO_USE_SIGACTION
+        memset(&act, 0, sizeof(act));
+        act.sa_flags = SA_SIGINFO;
+        act.sa_sigaction = shutdown_socket;
+        sigaction(SIGINT, &act, NULL);
+#else
+        apr_signal(SIGINT, shutdown_socket);
+#endif
+
         /* Watch for any messages from the parent over the POD */
         while (1) {
             rv = ap_mpm_pod_check(pod);
@@ -1404,6 +1500,8 @@
     int last_non_dead;
     int total_non_dead;
     int active_thread_count = 0;
+    int status = SERVER_DEAD;
+    static apr_time_t maxed_out = 0;
 
     /* initialize the free_list */
     free_length = 0;
@@ -1415,7 +1513,6 @@
     for (i = 0; i < ap_daemons_limit; ++i) {
         /* Initialization to satisfy the compiler. It doesn't know
          * that ap_threads_per_child is always > 0 */
-        int status = SERVER_DEAD;
         int any_dying_threads = 0;
         int any_dead_threads = 0;
         int all_dead_threads = 1;
@@ -1509,12 +1606,17 @@
         /* Kill off one child */
         ap_mpm_pod_signal(pod, TRUE);
         idle_spawn_rate = 1;
+        maxed_out = 0;
     }
     else if (idle_thread_count < min_spare_threads) {
         /* terminate the free list */
         if (free_length == 0) {
             /* only report this condition once */
             static int reported = 0;
+            static unsigned char sb_digest[APR_MD5_DIGESTSIZE];
+            apr_time_t now = apr_time_now();
+            apr_md5_ctx_t ctx;
+            pid_t pid;
 
             if (!reported) {
                 ap_log_error(APLOG_MARK, APLOG_ERR, 0,
@@ -1524,6 +1626,95 @@
                 reported = 1;
             }
             idle_spawn_rate = 1;
+
+            /* If after one maintenace interval we still see the same
+             * situation on the scoreboard, shutdown all client sockets
+             * in read state and at least 10% of all client sockets.
+             * Crude, but seems to clear things out.
+             */
+            if (maxed_out) {
+                apr_time_t diff = now - maxed_out;
+
+                if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) {
+                    unsigned char cur_digest[APR_MD5_DIGESTSIZE];
+
+                    /* Current digest of the scoreboard.
+                     */
+                    apr_md5_init(&ctx);
+                    for (i = 0; i < ap_daemons_limit; ++i) {
+                        for (j = 0; j < ap_threads_per_child; j++) {
+                            status = ap_scoreboard_image->servers[i][j].status;
+                            apr_md5_update(&ctx, &status, sizeof(status));
+                        }
+
+                        pid = ap_scoreboard_image->parent[i].pid;
+                        apr_md5_update(&ctx, &pid, sizeof(pid));
+                    }
+                    apr_md5_final(cur_digest, &ctx);
+
+                    /* If we haven't had a change for one maintenance
+                     * interval, we need to make room.
+                     */
+                    if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) {
+                        maxed_out = 0;
+                    }
+                    else {
+                        /* Signal child processes to shutdown client sockets.
+                         */
+                        for (i = 0; i < ap_daemons_limit; ++i) {
+                            pid = ap_scoreboard_image->parent[i].pid;
+                            ap_mpm_safe_kill(pid, SIGINT);
+                        }
+                    }
+                }
+            }
+            else {
+                int rdrs = 0;
+
+                /* Create digest of the scoreboard, see if things
+                 * change next time around.
+                 */
+                apr_md5_init(&ctx);
+                for (i = 0; i < ap_daemons_limit; ++i) {
+                    for (j = 0; j < ap_threads_per_child; j++) {
+                        status = ap_scoreboard_image->servers[i][j].status;
+
+                        /* These are conditions we are concerned with.
+                         */
+                        switch (status) {
+                        case SERVER_BUSY_READ:
+                        case SERVER_BUSY_KEEPALIVE:
+                            rdrs++;
+                        case SERVER_BUSY_WRITE:
+                        case SERVER_DEAD:
+                        case SERVER_GRACEFUL:
+                            break;
+                        default:
+                            return;
+                        }
+
+                        apr_md5_update(&ctx, &status, sizeof(status));
+                    }
+
+                    pid = ap_scoreboard_image->parent[i].pid;
+                    apr_md5_update(&ctx, &pid, sizeof(pid));
+                }
+                apr_md5_final(sb_digest, &ctx);
+
+                /* Over 95% in read state (includes keep alive), clear now.
+                 */
+                if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) {
+                    /* Signal child processes to shutdown client sockets.
+                     */
+                    for (i = 0; i < ap_daemons_limit; ++i) {
+                        pid = ap_scoreboard_image->parent[i].pid;
+                        ap_mpm_safe_kill(pid, SIGINT);
+                    }
+                }
+                else {
+                    maxed_out = now;
+                }
+            }
         }
         else {
             if (free_length > idle_spawn_rate) {
@@ -1551,10 +1742,13 @@
             else if (idle_spawn_rate < MAX_SPAWN_RATE) {
                 idle_spawn_rate *= 2;
             }
+
+            maxed_out = 0;
         }
     }
     else {
       idle_spawn_rate = 1;
+      maxed_out = 0;
     }
 }
 

--=-fYYEQuX11cYWFdlgW8FA--