httpd-cvs mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From p..@apache.org
Subject svn commit: r888840 - in /httpd/mod_fcgid/trunk/modules/fcgid: fcgid_bridge.c fcgid_pm_main.c
Date Wed, 09 Dec 2009 15:36:46 GMT
Author: pqf
Date: Wed Dec  9 15:36:46 2009
New Revision: 888840

URL: http://svn.apache.org/viewvc?rev=888840&view=rev
Log:
Bug fix,  Bug 47873 -  unreliable coordination between daemon and request thread for BusyTimeout
processing
New protocol is:
1. Process node's diewhy init with FCGID_DIE_KILLSELF
2. Process manager set diewhy to FCGID_DIE_BUSY_TIMEOUT and gracefully kill process while
busy timeout
3. Process manager forced kill process while busy timeout and diewhy is FCGID_DIE_BUSY_TIMEOUT
4. Request handler move process node to error list if node's diewhy is FCGID_DIE_BUSY_TIMEOUT

Modified:
    httpd/mod_fcgid/trunk/modules/fcgid/fcgid_bridge.c
    httpd/mod_fcgid/trunk/modules/fcgid/fcgid_pm_main.c

Modified: httpd/mod_fcgid/trunk/modules/fcgid/fcgid_bridge.c
URL: http://svn.apache.org/viewvc/httpd/mod_fcgid/trunk/modules/fcgid/fcgid_bridge.c?rev=888840&r1=888839&r2=888840&view=diff
==============================================================================
--- httpd/mod_fcgid/trunk/modules/fcgid/fcgid_bridge.c (original)
+++ httpd/mod_fcgid/trunk/modules/fcgid/fcgid_bridge.c Wed Dec  9 15:36:46 2009
@@ -33,7 +33,7 @@
 #define FCGID_APPLY_TRY_COUNT 2
 #define FCGID_REQUEST_COUNT 32
 
-static fcgid_procnode *apply_free_procnode(request_rec *r,
+static fcgid_procnode *apply_free_procnode(request_rec * r,
                                            fcgid_command * command)
 {
     /* Scan idle list, find a node match inode, deviceid and groupid
@@ -70,7 +70,8 @@
 
             proctable_unlock(r);
             return current_node;
-        } else
+        }
+        else
             previous_node = current_node;
 
         current_node = next_node;
@@ -82,7 +83,7 @@
 }
 
 static void
-return_procnode(request_rec *r,
+return_procnode(request_rec * r,
                 fcgid_procnode * procnode, int communicate_error)
 {
     fcgid_procnode *previous_node, *current_node, *next_node;
@@ -102,7 +103,8 @@
             /* Unlink from busy list */
             previous_node->next_index = current_node->next_index;
             break;
-        } else
+        }
+        else
             previous_node = current_node;
         current_node = next_node;
     }
@@ -112,7 +114,8 @@
         /* Link to error list */
         procnode->next_index = error_list_header->next_index;
         error_list_header->next_index = procnode - proc_table;
-    } else {
+    }
+    else {
         /* Link to idle list */
         procnode->next_index = idle_list_header->next_index;
         idle_list_header->next_index = procnode - proc_table;
@@ -121,8 +124,7 @@
     proctable_unlock(r);
 }
 
-static int
-count_busy_processes(request_rec *r, fcgid_command * command)
+static int count_busy_processes(request_rec * r, fcgid_command * command)
 {
     int result = 0;
     fcgid_procnode *previous_node, *current_node, *next_node;
@@ -177,35 +179,23 @@
 
         /* FIXME See BZ #47483 */
         /* Return procnode
-           I will return this slot to idle(or error) list except:
-           I take too much time on this request( greater than BusyTimeout) ),
-           so the process manager may have put this slot from busy list to error
-           list, and the contain of this slot may have been modified
-           In this case I will do nothing and return, let the process manager 
-           do the job   
+           I will return this slot to idle(or error) list
          */
-        int dt = (int)
-            (apr_time_sec(apr_time_now()) - apr_time_sec(ctx->active_time));
-        if (dt > sconf->busy_timeout) { /* can't reference from procnode->cmdopts
-                                         * because procnode could have been reused
-                                         */
-            /* Do nothing but print log */
-            ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
-                          "mod_fcgid: process busy timeout (%d), took %d seconds for this
request",
-                          sconf->busy_timeout, dt);
-        } else if (ctx->has_error) {
+        if (ctx->procnode->diewhy == FCGID_DIE_BUSY_TIMEOUT) {
+            return_procnode(r, ctx->procnode, 1 /* busy timeout */ );
+        }
+        else if (ctx->has_error) {
             ctx->procnode->diewhy = FCGID_DIE_COMM_ERROR;
-            return_procnode(r, ctx->procnode,
-                            1 /* communication error */ );
-        } else if (ctx->procnode->cmdopts.max_requests_per_process
-                   && ctx->procnode->requests_handled >=
-                   ctx->procnode->cmdopts.max_requests_per_process) {
+            return_procnode(r, ctx->procnode, 1 /* communication error */ );
+        }
+        else if (ctx->procnode->cmdopts.max_requests_per_process
+                 && ctx->procnode->requests_handled >=
+                 ctx->procnode->cmdopts.max_requests_per_process) {
             ctx->procnode->diewhy = FCGID_DIE_LIFETIME_EXPIRED;
-            return_procnode(r, ctx->procnode,
-                            1 /* handled all requests */ );
-        } else
-            return_procnode(r, ctx->procnode,
-                            0 /* communication ok */ );
+            return_procnode(r, ctx->procnode, 1 /* handled all requests */ );
+        }
+        else
+            return_procnode(r, ctx->procnode, 0 /* communication ok */ );
 
         ctx->procnode = NULL;
     }
@@ -216,7 +206,7 @@
 static int getsfunc_fcgid_BRIGADE(char *buf, int len, void *arg)
 {
     apr_bucket_brigade *bb = (apr_bucket_brigade *) arg;
-    const char *dst_end = buf + len - 1;    /* leave room for terminating null */
+    const char *dst_end = buf + len - 1;        /* leave room for terminating null */
     char *dst = buf;
     apr_bucket *e = APR_BRIGADE_FIRST(bb);
     apr_status_t rv;
@@ -263,7 +253,8 @@
                 done = 1;
                 getColon = 0;
                 break;
-            } else if (getLF && (*src == ' ' || *src == '\t')) {
+            }
+            else if (getLF && (*src == ' ' || *src == '\t')) {
                 *dst++ = '\r';
                 *dst++ = '\n';
                 getLF = 0;
@@ -271,7 +262,8 @@
 
             if (*src == '\n') {
                 getLF = 1;
-            } else if (*src != '\r') {
+            }
+            else if (*src != '\r') {
                 *dst++ = *src;
             }
             src++;
@@ -329,20 +321,17 @@
                 fcgi_request.cmdopts.ipc_comm_timeout;
 
             /* Apply a process slot */
-            bucket_ctx->procnode =
-                apply_free_procnode(r, &fcgi_request);
+            bucket_ctx->procnode = apply_free_procnode(r, &fcgi_request);
             if (bucket_ctx->procnode)
                 break;
 
             /* Avoid sleeping the very first time through if there are no
                busy processes; the problem is just that we haven't spawned
                anything yet, so waiting is pointless */
-            if (i > 0 || j > 0
-                || count_busy_processes(r, &fcgi_request)) {
+            if (i > 0 || j > 0 || count_busy_processes(r, &fcgi_request)) {
                 apr_sleep(apr_time_from_sec(1));
 
-                bucket_ctx->procnode =
-                    apply_free_procnode(r, &fcgi_request);
+                bucket_ctx->procnode = apply_free_procnode(r, &fcgi_request);
                 if (bucket_ctx->procnode)
                     break;
             }
@@ -357,10 +346,10 @@
                                  &bucket_ctx->ipc) != APR_SUCCESS) {
                 proc_close_ipc(&bucket_ctx->ipc);
                 bucket_ctx->procnode->diewhy = FCGID_DIE_CONNECT_ERROR;
-                return_procnode(r, bucket_ctx->procnode,
-                                1 /* has error */ );
+                return_procnode(r, bucket_ctx->procnode, 1 /* has error */ );
                 bucket_ctx->procnode = NULL;
-            } else
+            }
+            else
                 break;
         }
     }
@@ -373,11 +362,11 @@
     }
     bucket_ctx->active_time = bucket_ctx->procnode->last_active_time =
         apr_time_now();
+    bucket_ctx->procnode->diewhy = FCGID_DIE_KILLSELF;
 
     /* Write output_brigade to fastcgi server */
     if ((rv =
-         proc_write_ipc(&bucket_ctx->ipc,
-                        output_brigade)) != APR_SUCCESS) {
+         proc_write_ipc(&bucket_ctx->ipc, output_brigade)) != APR_SUCCESS) {
         bucket_ctx->has_error = 1;
         return HTTP_INTERNAL_SERVER_ERROR;
     }
@@ -414,7 +403,8 @@
 
         ap_internal_redirect_handler(location, r);
         return HTTP_OK;
-    } else if (location && r->status == 200) {
+    }
+    else if (location && r->status == 200) {
         /* XX Note that if a script wants to produce its own Redirect 
          * body, it now has to explicitly *say* "Status: 302" 
          */
@@ -437,8 +427,8 @@
     return cond_status;
 }
 
-static int add_request_body(request_rec *r, apr_pool_t *request_pool,
-                            apr_bucket_brigade *output_brigade)
+static int add_request_body(request_rec * r, apr_pool_t * request_pool,
+                            apr_bucket_brigade * output_brigade)
 {
     apr_bucket *bucket_input, *bucket_header;
     apr_file_t *fd = NULL;
@@ -458,9 +448,9 @@
      */
     seen_eos = 0;
     do {
-        apr_bucket_brigade *input_brigade =
-            apr_brigade_create(request_pool,
-                               r->connection->bucket_alloc);
+        apr_bucket_brigade *input_brigade = apr_brigade_create(request_pool,
+                                                               r->connection->
+                                                               bucket_alloc);
 
         if ((rv = ap_get_brigade(r->input_filters, input_brigade,
                                  AP_MODE_READBYTES,
@@ -510,10 +500,10 @@
             request_size += len;
             if (request_size > sconf->max_request_len) {
                 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
-                              "mod_fcgid: HTTP request length %" APR_OFF_T_FMT 
-                              " (so far) exceeds MaxRequestLen (%" APR_OFF_T_FMT
-                              ")",
-                             request_size, sconf->max_request_len);
+                              "mod_fcgid: HTTP request length %" APR_OFF_T_FMT
+                              " (so far) exceeds MaxRequestLen (%"
+                              APR_OFF_T_FMT ")", request_size,
+                              sconf->max_request_len);
                 return HTTP_INTERNAL_SERVER_ERROR;
             }
 
@@ -521,10 +511,9 @@
                 apr_size_t wrote_len;
                 static const char *fd_key = "fcgid_fd";
 
-                if (fd == NULL){
+                if (fd == NULL) {
                     void *tmp;
-                    apr_pool_userdata_get(&tmp, fd_key,
-                                          r->connection->pool);
+                    apr_pool_userdata_get(&tmp, fd_key, r->connection->pool);
                     fd = tmp;
 
                     if (fd != NULL) {
@@ -578,7 +567,8 @@
                     apr_bucket_file_create(fd, cur_pos, len, r->pool,
                                            r->connection->bucket_alloc);
                 cur_pos += len;
-            } else {
+            }
+            else {
                 if (APR_BUCKET_IS_HEAP(bucket_input))
                     apr_bucket_copy(bucket_input, &bucket_stdin);
                 else {
@@ -589,8 +579,7 @@
                     bucket_stdin =
                         apr_bucket_heap_create(pcopydata, len,
                                                apr_bucket_free,
-                                               r->connection->
-                                               bucket_alloc);
+                                               r->connection->bucket_alloc);
                 }
             }
 
@@ -615,8 +604,7 @@
     bucket_header =
         apr_bucket_heap_create((const char *) stdin_request_header,
                                sizeof(*stdin_request_header),
-                               apr_bucket_free,
-                               r->connection->bucket_alloc);
+                               apr_bucket_free, r->connection->bucket_alloc);
     if (!init_header(FCGI_STDIN, 1, 0, 0, stdin_request_header)) {
         ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
                       "mod_fcgid: header overflow");

Modified: httpd/mod_fcgid/trunk/modules/fcgid/fcgid_pm_main.c
URL: http://svn.apache.org/viewvc/httpd/mod_fcgid/trunk/modules/fcgid/fcgid_pm_main.c?rev=888840&r1=888839&r2=888840&view=diff
==============================================================================
--- httpd/mod_fcgid/trunk/modules/fcgid/fcgid_pm_main.c (original)
+++ httpd/mod_fcgid/trunk/modules/fcgid/fcgid_pm_main.c Wed Dec  9 15:36:46 2009
@@ -54,8 +54,9 @@
     apr_time_t last_active_time, start_time;
     apr_time_t now = apr_time_now();
     int idle_timeout, proc_lifetime;
-    fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config,
-                                                    &fcgid_module);
+    fcgid_server_conf *sconf =
+        ap_get_module_config(main_server->module_config,
+                             &fcgid_module);
 
     /* Should I check the idle list now? */
     if (procmgr_must_exit()
@@ -77,17 +78,21 @@
         start_time = current_node->start_time;
         idle_timeout = current_node->cmdopts.idle_timeout;
         proc_lifetime = current_node->cmdopts.proc_lifetime;
-        if (((idle_timeout && 
-              (apr_time_sec(now) - apr_time_sec(last_active_time) > idle_timeout))
-             || (proc_lifetime && 
-              (apr_time_sec(now) - apr_time_sec(start_time) > proc_lifetime)))
+        if (((idle_timeout &&
+              (apr_time_sec(now) - apr_time_sec(last_active_time) >
+               idle_timeout))
+             || (proc_lifetime
+                 && (apr_time_sec(now) - apr_time_sec(start_time) >
+                     proc_lifetime)))
             && is_kill_allowed(main_server, current_node)) {
             /* Set die reason for log */
             if (idle_timeout &&
-                (apr_time_sec(now) - apr_time_sec(last_active_time) > idle_timeout))
+                (apr_time_sec(now) - apr_time_sec(last_active_time) >
+                 idle_timeout))
                 current_node->diewhy = FCGID_DIE_IDLE_TIMEOUT;
-            else if (proc_lifetime && 
-                     (apr_time_sec(now) - apr_time_sec(start_time) > proc_lifetime))
+            else if (proc_lifetime &&
+                     (apr_time_sec(now) - apr_time_sec(start_time) >
+                      proc_lifetime))
                 current_node->diewhy = FCGID_DIE_LIFETIME_EXPIRED;
 
             /* Unlink from idle list */
@@ -96,7 +101,8 @@
             /* Link to error list */
             current_node->next_index = error_list_header->next_index;
             error_list_header->next_index = current_node - proc_table;
-        } else
+        }
+        else
             previous_node = current_node;
 
         current_node = next_node;
@@ -107,17 +113,13 @@
 static apr_time_t lastbusyscan = 0;
 static void scan_busylist(server_rec * main_server)
 {
-    /*
-       Scan the busy list
-       1. move all expired node to error list
-     */
-    fcgid_procnode *previous_node, *current_node, *next_node;
-    fcgid_procnode *error_list_header;
+    fcgid_procnode *current_node;
     fcgid_procnode *proc_table;
     apr_time_t last_active_time;
     apr_time_t now = apr_time_now();
-    fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config,
-                                                    &fcgid_module);
+    fcgid_server_conf *sconf =
+        ap_get_module_config(main_server->module_config,
+                             &fcgid_module);
 
     /* Should I check the busy list? */
     if (procmgr_must_exit()
@@ -126,33 +128,28 @@
         return;
     lastbusyscan = now;
 
-    /* Check the list */
+    /* Check busy list */
     proc_table = proctable_get_table_array();
-    previous_node = proctable_get_busy_list();
-    error_list_header = proctable_get_error_list();
 
     proctable_pm_lock(main_server);
-    current_node = &proc_table[previous_node->next_index];
+    current_node = &proc_table[proctable_get_busy_list()->next_index];
     while (current_node != proc_table) {
-        next_node = &proc_table[current_node->next_index];
-
         last_active_time = current_node->last_active_time;
-        /* FIXME See BZ #47483 */
         if (apr_time_sec(now) - apr_time_sec(last_active_time) >
-            (current_node->cmdopts.busy_timeout + 10)) {
-            /* Set dir reason for log */
-            current_node->diewhy = FCGID_DIE_BUSY_TIMEOUT;
-
-            /* Unlink from busy list */
-            previous_node->next_index = current_node->next_index;
-
-            /* Link to error list */
-            current_node->next_index = error_list_header->next_index;
-            error_list_header->next_index = current_node - proc_table;
-        } else
-            previous_node = current_node;
-
-        current_node = next_node;
+            (current_node->cmdopts.busy_timeout)) {
+            /* Protocol: 
+               1. diewhy init with FCGID_DIE_KILLSELF
+               2. Process manager set diewhy to FCGID_DIE_BUSY_TIMEOUT and gracefully kill
process while busy timeout
+               3. Process manager forced kill process while busy timeout and diewhy is FCGID_DIE_BUSY_TIMEOUT
+             */
+            if (current_node->diewhy == FCGID_DIE_BUSY_TIMEOUT)
+                proc_kill_force(current_node, main_server);
+            else {
+                current_node->diewhy = FCGID_DIE_BUSY_TIMEOUT;
+                proc_kill_gracefully(current_node, main_server);
+            }
+        }
+        current_node = &proc_table[current_node->next_index];
     }
     proctable_pm_unlock(main_server);
 }
@@ -174,8 +171,9 @@
     apr_time_t last_active_time;
     apr_time_t now = apr_time_now();
     fcgid_procnode temp_header;
-    fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config,
-                                                    &fcgid_module);
+    fcgid_server_conf *sconf =
+        ap_get_module_config(main_server->module_config,
+                             &fcgid_module);
 
     memset(&temp_header, 0, sizeof(temp_header));
 
@@ -208,7 +206,8 @@
             /* Link to check list */
             current_node->next_index = check_list_header->next_index;
             check_list_header->next_index = current_node - proc_table;
-        } else
+        }
+        else
             previous_node = current_node;
 
         current_node = next_node;
@@ -238,7 +237,8 @@
             /* Link to free list */
             link_node_to_list(main_server, proctable_get_free_list(),
                               current_node, proc_table);
-        } else
+        }
+        else
             previous_node = current_node;
 
         current_node = next_node;
@@ -278,8 +278,9 @@
     fcgid_procnode *free_list_header = proctable_get_free_list();
     fcgid_procnode *proc_table = proctable_get_table_array();
     fcgid_procnode temp_error_header;
-    fcgid_server_conf *sconf = ap_get_module_config(main_server->module_config,
-                                                    &fcgid_module);
+    fcgid_server_conf *sconf =
+        ap_get_module_config(main_server->module_config,
+                             &fcgid_module);
 
     /* Should I check the busy list? */
     if (procmgr_must_exit()
@@ -300,15 +301,15 @@
     while (current_node != proc_table) {
         next_node = &proc_table[current_node->next_index];
 
-        if (proc_wait_process(main_server, current_node) !=
-            APR_CHILD_NOTDONE) {
+        if (proc_wait_process(main_server, current_node) != APR_CHILD_NOTDONE) {
             /* Unlink from error list */
             previous_node->next_index = current_node->next_index;
 
             /* Link to free list */
             current_node->next_index = free_list_header->next_index;
             free_list_header->next_index = current_node - proc_table;
-        } else
+        }
+        else
             previous_node = current_node;
 
         current_node = next_node;
@@ -327,7 +328,8 @@
             apr_pool_userdata_set("set", HAS_GRACEFUL_KILL,
                                   apr_pool_cleanup_null,
                                   current_node->proc_pool);
-        } else {
+        }
+        else {
             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, main_server,
                          "mod_fcgid: process %" APR_PID_T_FMT
                          " graceful kill fail, sending SIGKILL",
@@ -373,7 +375,8 @@
                                      main_server);
                 apr_pool_destroy(proc_table[i].proc_pool);
                 proc_table[i].proc_pool = NULL;
-            } else
+            }
+            else
                 proc_kill_force(&proc_table[i], main_server);
         }
     }
@@ -397,7 +400,7 @@
  * either in the arch/ platform-specific modules or util_script.c from whence
  * it came.
  */
-static void default_proc_env(apr_table_t *e)
+static void default_proc_env(apr_table_t * e)
 {
     const char *env_temp;
 
@@ -456,6 +459,7 @@
     }
 #endif
 }
+
 /* End of common to util_script.c */
 
 static void
@@ -504,7 +508,8 @@
     procinfo.uid = command->uid;
     procinfo.gid = command->gid;
     procinfo.userdir = command->userdir;
-    if ((rv = apr_pool_create(&procnode->proc_pool, configpool)) != APR_SUCCESS
+    if ((rv =
+         apr_pool_create(&procnode->proc_pool, configpool)) != APR_SUCCESS
         || (procinfo.proc_environ =
             apr_table_make(procnode->proc_pool, INITENV_CNT)) == NULL) {
         /* Link the node back to free list in this case */
@@ -521,7 +526,7 @@
      * request-by-request variables, so if any are overriden, they preempt
      * any system default assumptions
      */
-    default_proc_env(procinfo.proc_environ);        
+    default_proc_env(procinfo.proc_environ);
     for (i = 0; i < INITENV_CNT; i++) {
         if (command->cmdenv.initenv_key[i][0] == '\0')
             break;
@@ -532,23 +537,24 @@
     /* Spawn the process now */
     /* XXX Spawn uses wrapper_cmdline, but log uses cgipath ? */
     if ((rv =
-        proc_spawn_process(command->wrapper_cmdline, &procinfo,
-                           procnode)) != APR_SUCCESS) {
+         proc_spawn_process(command->wrapper_cmdline, &procinfo,
+                            procnode)) != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_WARNING, rv, main_server,
-                     "mod_fcgid: spawn process %s error",
-                     command->cgipath);
+                     "mod_fcgid: spawn process %s error", command->cgipath);
 
         apr_pool_destroy(procnode->proc_pool);
         link_node_to_list(main_server, free_list_header,
                           procnode, proctable_array);
         return;
-    } else {
+    }
+    else {
         /* The job done */
         link_node_to_list(main_server, idle_list_header,
                           procnode, proctable_array);
         ap_log_error(APLOG_MARK, APLOG_INFO, 0, main_server,
                      "mod_fcgid: server %s:%s(%" APR_PID_T_FMT ") started",
-                     command->virtualhost, command->cgipath, procnode->proc_id.pid);
+                     command->virtualhost, command->cgipath,
+                     procnode->proc_id.pid);
         register_spawn(main_server, procnode);
     }
 }



Mime
View raw message