Return-Path: Delivered-To: apmail-httpd-dev-archive@www.apache.org Received: (qmail 11972 invoked from network); 21 Oct 2009 10:11:26 -0000 Received: from hermes.apache.org (HELO mail.apache.org) (140.211.11.3) by minotaur.apache.org with SMTP; 21 Oct 2009 10:11:26 -0000 Received: (qmail 67773 invoked by uid 500); 21 Oct 2009 10:11:25 -0000 Delivered-To: apmail-httpd-dev-archive@httpd.apache.org Received: (qmail 67665 invoked by uid 500); 21 Oct 2009 10:11:25 -0000 Mailing-List: contact dev-help@httpd.apache.org; run by ezmlm Precedence: bulk Reply-To: dev@httpd.apache.org list-help: list-unsubscribe: List-Post: List-Id: Delivered-To: mailing list dev@httpd.apache.org Received: (qmail 67656 invoked by uid 99); 21 Oct 2009 10:11:25 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 21 Oct 2009 10:11:25 +0000 X-ASF-Spam-Status: No, hits=-2.4 required=5.0 tests=AWL,BAYES_00 X-Spam-Check-By: apache.org Received-SPF: pass (athena.apache.org: domain of bojan@rexursive.com designates 150.101.121.179 as permitted sender) Received: from [150.101.121.179] (HELO beauty.rexursive.com) (150.101.121.179) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 21 Oct 2009 10:11:22 +0000 Received: from [10.1.120.24] (shrek.rexursive.com [10.1.120.24]) by beauty.rexursive.com (Postfix) with ESMTP id 8CB988C030 for ; Wed, 21 Oct 2009 21:11:00 +1100 (EST) Subject: Re: Crazy slowloris mitigation patch From: Bojan Smojver To: dev@httpd.apache.org In-Reply-To: <1256113052.9540.17.camel@shrek.rexursive.com> References: <1255640418.4989.42.camel@shrek.rexursive.com> <1255863419.4989.70.camel@shrek.rexursive.com> <1255900042.4989.80.camel@shrek.rexursive.com> <200910190000.29787.sf@sfritsch.de> <1255904571.4989.89.camel@shrek.rexursive.com> <1256078616.9540.12.camel@shrek.rexursive.com> <1256113052.9540.17.camel@shrek.rexursive.com> Content-Type: multipart/mixed; boundary="=-fYYEQuX11cYWFdlgW8FA" Date: Wed, 21 Oct 2009 21:11:00 +1100 Message-Id: <1256119860.9540.18.camel@shrek.rexursive.com> Mime-Version: 1.0 X-Mailer: Evolution 2.26.3 (2.26.3-1.fc11) --=-fYYEQuX11cYWFdlgW8FA Content-Type: text/plain Content-Transfer-Encoding: 7bit On Wed, 2009-10-21 at 19:17 +1100, Bojan Smojver wrote: > Actually, calling close() is the wrong thing to do. Calling shutdown() > is the go. This is what I mean. -- Bojan --=-fYYEQuX11cYWFdlgW8FA Content-Disposition: attachment; filename="httpd-shutdown_sockets.patch" Content-Type: text/x-patch; name="httpd-shutdown_sockets.patch"; charset="UTF-8" Content-Transfer-Encoding: 7bit --- httpd-2.2.14-v/server/mpm/prefork/prefork.c 2009-02-01 07:54:55.000000000 +1100 +++ httpd-2.2.14/server/mpm/prefork/prefork.c 2009-10-21 17:45:28.697322726 +1100 @@ -48,6 +48,7 @@ #include "ap_listen.h" #include "ap_mmn.h" #include "apr_poll.h" +#include "apr_md5.h" #ifdef HAVE_BSTRING_H #include /* for IRIX, FD_SET calls bzero() */ @@ -336,6 +337,29 @@ die_now = 1; } +static int volatile client_socket = -1; + +#ifndef NO_USE_SIGACTION +static void shutdown_socket(int sig, siginfo_t *info, void *context) +#else +static void shutdown_socket(int sig) +#endif +{ +#ifndef NO_USE_SIGACTION + if (info->si_pid == getppid()) { +#endif + if (client_socket != -1) { + shutdown(client_socket, SHUT_RDWR); + client_socket = -1; + } +#ifndef NO_USE_SIGACTION + } + else { + clean_child_exit(0); + } +#endif +} + /* volatile just in case */ static int volatile shutdown_pending; static int volatile restart_pending; @@ -659,8 +683,12 @@ current_conn = ap_run_create_connection(ptrans, ap_server_conf, csd, my_child_num, sbh, bucket_alloc); if (current_conn) { + apr_os_sock_get((apr_os_sock_t *)&client_socket, csd); + ap_process_connection(current_conn, csd); ap_lingering_close(current_conn); + + client_socket = -1; } /* Check the pod and the generation number after processing a @@ -733,6 +761,10 @@ } if (!pid) { +#ifndef NO_USE_SIGACTION + struct sigaction act; +#endif + #ifdef HAVE_BINDPROCESSOR /* by default AIX binds to a single processor * this bit unbinds children which will then bind to another cpu @@ -755,6 +787,19 @@ * The pod is used for signalling the graceful restart. */ apr_signal(AP_SIG_GRACEFUL, stop_listening); + + /* If the parent sends SIGINT to the child, we shutdown the + * client socket, as we suspect that we are under a DoS attack. + */ +#ifndef NO_USE_SIGACTION + memset(&act, 0, sizeof(act)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = shutdown_socket; + sigaction(SIGINT, &act, NULL); +#else + apr_signal(SIGINT, shutdown_socket); +#endif + child_main(slot); } @@ -803,6 +848,8 @@ int free_slots[MAX_SPAWN_RATE]; int last_non_dead; int total_non_dead; + int status; + static apr_time_t maxed_out = 0; /* initialize the free_list */ free_length = 0; @@ -813,8 +860,6 @@ total_non_dead = 0; for (i = 0; i < ap_daemons_limit; ++i) { - int status; - if (i >= ap_max_daemons_limit && free_length == idle_spawn_rate) break; ws = &ap_scoreboard_image->servers[i][0]; @@ -856,12 +901,17 @@ */ ap_mpm_pod_signal(pod); idle_spawn_rate = 1; + maxed_out = 0; } else if (idle_count < ap_daemons_min_free) { /* terminate the free list */ if (free_length == 0) { /* only report this condition once */ static int reported = 0; + static unsigned char sb_digest[APR_MD5_DIGESTSIZE]; + apr_time_t now = apr_time_now(); + apr_md5_ctx_t ctx; + pid_t pid; if (!reported) { ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, @@ -870,6 +920,118 @@ reported = 1; } idle_spawn_rate = 1; + + /* If after one maintenace interval we still see the same + * situation on the scoreboard, shutdown all client sockets + * in read state and at least 10% of all client sockets. + * Crude, but seems to clear things out. + */ + if (maxed_out) { + apr_time_t diff = now - maxed_out; + + if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) { + unsigned char cur_digest[APR_MD5_DIGESTSIZE]; + + /* Current digest of the scoreboard. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + status = ap_scoreboard_image->servers[i][0].status; + apr_md5_update(&ctx, &status, sizeof(status)); + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(cur_digest, &ctx); + + /* If we haven't had a change for one maintenance + * interval, we need to make room. + */ + if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) { + maxed_out = 0; + } + else { + int rdrs = 0, cull = ap_daemons_limit / 10; + + /* Disconnect all readers (includes keep alive). + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + status = ap_scoreboard_image->servers[i][0].status; + + if (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE) { + ap_mpm_safe_kill(pid, SIGINT); + rdrs++; + } + } + + /* Make up to 10% of all sockets, if required. + */ + for (i = 0; i < ap_daemons_limit && cull > rdrs; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + status = ap_scoreboard_image->servers[i][0].status; + + if (status != SERVER_BUSY_READ && + status != SERVER_BUSY_KEEPALIVE) { + ap_mpm_safe_kill(pid, SIGINT); + cull--; + } + } + } + } + } + else { + int rdrs = 0; + + /* Create digest of the scorboard, see if things + * change next time around. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + status = ap_scoreboard_image->servers[i][0].status; + + /* These are the conditions we are concerned with. + */ + switch (status) { + case SERVER_BUSY_READ: + case SERVER_BUSY_KEEPALIVE: + rdrs++; + case SERVER_BUSY_WRITE: + case SERVER_DEAD: + case SERVER_GRACEFUL: + break; + default: + return; + } + + apr_md5_update(&ctx, &status, sizeof(status)); + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(sb_digest, &ctx); + + /* Over 95% in read state (includes keep alive), clear now. + */ + if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) { + /* Disconnect all readers (includes keep alive). + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + status = ap_scoreboard_image->servers[i][0].status; + + if (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE) { + ap_mpm_safe_kill(pid, SIGINT); + rdrs++; + } + } + } + else { + maxed_out = now; + } + } } else { if (idle_spawn_rate >= 8) { @@ -902,10 +1064,13 @@ else if (idle_spawn_rate < MAX_SPAWN_RATE) { idle_spawn_rate *= 2; } + + maxed_out = 0; } } else { idle_spawn_rate = 1; + maxed_out = 0; } } --- httpd-2.2.14-v/server/mpm/worker/worker.c 2007-07-18 00:48:25.000000000 +1000 +++ httpd-2.2.14/server/mpm/worker/worker.c 2009-10-21 17:44:40.584322944 +1100 @@ -32,6 +32,7 @@ #include "apr_poll.h" #define APR_WANT_STRFUNC #include "apr_want.h" +#include "apr_md5.h" #if APR_HAVE_UNISTD_H #include @@ -357,6 +358,82 @@ clean_child_exit(0); } +#ifndef NO_USE_SIGACTION +static void shutdown_socket(int sig, siginfo_t *info, void *context) +#else +static void shutdown_socket(int sig) +#endif +{ +#ifndef NO_USE_SIGACTION + if (info->si_pid == getppid()) +#endif + { + int csd, i, j, slot = 0, status, total_rdrs = 0, rdrs = 0, + cull = ap_daemons_limit * ap_threads_per_child / 10; + + /* Determine total number of readers (includes keep alive), our + * slot and the number of our own readers. + */ + for (i = 0; i < ap_daemons_limit; ++i) { + if (ap_scoreboard_image->parent[i].pid == ap_my_pid) { + slot = i; + } + + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[i][j].status; + + if (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE) { + + total_rdrs++; + + if (slot == i) { + rdrs++; + } + } + } + } + + /* Disconnect all readers (includes keep alive). + */ + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[slot][j].status; + + if (worker_sockets[j] && + (status == SERVER_BUSY_READ || + status == SERVER_BUSY_KEEPALIVE)) { + + apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]); + shutdown(csd, SHUT_RDWR); + } + } + + /* Make up to 10% of all sockets, if required. + */ + if (total_rdrs < cull) { + cull = ((ap_threads_per_child - rdrs) * (cull - total_rdrs)) / cull; + + for (j = 0; j < ap_threads_per_child && cull > 0; j++) { + status = ap_scoreboard_image->servers[slot][j].status; + + if (worker_sockets[j] && + status != SERVER_BUSY_READ && + status != SERVER_BUSY_KEEPALIVE) { + + apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]); + shutdown(csd, SHUT_RDWR); + cull--; + } + } + } + } +#ifndef NO_USE_SIGACTION + else { + clean_child_exit(0); + } +#endif +} + /***************************************************************** * Connection structures and accounting... */ @@ -1247,12 +1324,31 @@ join_workers(ts->listener, threads); } else { /* !one_process */ +#ifndef NO_USE_SIGACTION + struct sigaction act; +#endif + /* remove SIGTERM from the set of blocked signals... if one of * the other threads in the process needs to take us down * (e.g., for MaxRequestsPerChild) it will send us SIGTERM */ unblock_signal(SIGTERM); apr_signal(SIGTERM, dummy_signal_handler); + + + /* If the parent sends SIGINT to the child, we shutdown the + * client socket, as we suspect that we are under a DoS attack. + */ + unblock_signal(SIGINT); +#ifndef NO_USE_SIGACTION + memset(&act, 0, sizeof(act)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = shutdown_socket; + sigaction(SIGINT, &act, NULL); +#else + apr_signal(SIGINT, shutdown_socket); +#endif + /* Watch for any messages from the parent over the POD */ while (1) { rv = ap_mpm_pod_check(pod); @@ -1404,6 +1500,8 @@ int last_non_dead; int total_non_dead; int active_thread_count = 0; + int status = SERVER_DEAD; + static apr_time_t maxed_out = 0; /* initialize the free_list */ free_length = 0; @@ -1415,7 +1513,6 @@ for (i = 0; i < ap_daemons_limit; ++i) { /* Initialization to satisfy the compiler. It doesn't know * that ap_threads_per_child is always > 0 */ - int status = SERVER_DEAD; int any_dying_threads = 0; int any_dead_threads = 0; int all_dead_threads = 1; @@ -1509,12 +1606,17 @@ /* Kill off one child */ ap_mpm_pod_signal(pod, TRUE); idle_spawn_rate = 1; + maxed_out = 0; } else if (idle_thread_count < min_spare_threads) { /* terminate the free list */ if (free_length == 0) { /* only report this condition once */ static int reported = 0; + static unsigned char sb_digest[APR_MD5_DIGESTSIZE]; + apr_time_t now = apr_time_now(); + apr_md5_ctx_t ctx; + pid_t pid; if (!reported) { ap_log_error(APLOG_MARK, APLOG_ERR, 0, @@ -1524,6 +1626,95 @@ reported = 1; } idle_spawn_rate = 1; + + /* If after one maintenace interval we still see the same + * situation on the scoreboard, shutdown all client sockets + * in read state and at least 10% of all client sockets. + * Crude, but seems to clear things out. + */ + if (maxed_out) { + apr_time_t diff = now - maxed_out; + + if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) { + unsigned char cur_digest[APR_MD5_DIGESTSIZE]; + + /* Current digest of the scoreboard. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[i][j].status; + apr_md5_update(&ctx, &status, sizeof(status)); + } + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(cur_digest, &ctx); + + /* If we haven't had a change for one maintenance + * interval, we need to make room. + */ + if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) { + maxed_out = 0; + } + else { + /* Signal child processes to shutdown client sockets. + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + ap_mpm_safe_kill(pid, SIGINT); + } + } + } + } + else { + int rdrs = 0; + + /* Create digest of the scoreboard, see if things + * change next time around. + */ + apr_md5_init(&ctx); + for (i = 0; i < ap_daemons_limit; ++i) { + for (j = 0; j < ap_threads_per_child; j++) { + status = ap_scoreboard_image->servers[i][j].status; + + /* These are conditions we are concerned with. + */ + switch (status) { + case SERVER_BUSY_READ: + case SERVER_BUSY_KEEPALIVE: + rdrs++; + case SERVER_BUSY_WRITE: + case SERVER_DEAD: + case SERVER_GRACEFUL: + break; + default: + return; + } + + apr_md5_update(&ctx, &status, sizeof(status)); + } + + pid = ap_scoreboard_image->parent[i].pid; + apr_md5_update(&ctx, &pid, sizeof(pid)); + } + apr_md5_final(sb_digest, &ctx); + + /* Over 95% in read state (includes keep alive), clear now. + */ + if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) { + /* Signal child processes to shutdown client sockets. + */ + for (i = 0; i < ap_daemons_limit; ++i) { + pid = ap_scoreboard_image->parent[i].pid; + ap_mpm_safe_kill(pid, SIGINT); + } + } + else { + maxed_out = now; + } + } } else { if (free_length > idle_spawn_rate) { @@ -1551,10 +1742,13 @@ else if (idle_spawn_rate < MAX_SPAWN_RATE) { idle_spawn_rate *= 2; } + + maxed_out = 0; } } else { idle_spawn_rate = 1; + maxed_out = 0; } } --=-fYYEQuX11cYWFdlgW8FA--