tomcat-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Mathias.Herbe...@iroise.net (Mathias Herberts)
Subject Load balancing patch
Date Tue, 07 May 2002 09:53:27 GMT

Here is  my patch to  the load balancing  code. I have tested  the jk1
part but not the jk2. The behavior should be identical though.

The concept of local worker is introduced. Local workers form a subset
of the  balanced workers,  those workers handle  the requests  with no
session information or requests with session info but whose worker has
failed.

The list of local workers is specified in the workers.properties files
as follows:

worker.lb.balanced_workers=w1,w2,w3,w4,w5,w6
worker.lb.local_workers=w4,w6

Internally the  module will  rearrange the workers'  list so  that the
local workers appear first in the list. For the example above the list
will be rearranged as:

     w4,w6,w3,w1,w5,w2

When a request comes in it is  either part of a session or not. If the
request  is part  of  a session  then  the worker  having created  the
session is selected  (according to the session route  appearing in the
session  id). If  this worker  has failed,  the request  is considered
sessionless.   For requests without  a session,  the workers'  list is
scanned from the beginning, selecting the worker not in error with the
lowest  lb_value.   When  the   last  local  worker  is  reached,  two
alternatives  exist  depending  on   the  value  of  the  fault_action
property.   If fault_action  is set  to reject  then if  a  worker was
selected it is  returned as the most suitable  worker for the request,
if no  worker was selected  so far, meaning  all local workers  are in
error, a null  value is returned as the  most suitable worker, meaning
the  request is  in  error. If  fault_action  is set  to balance,  the
selection processs continues.

For jk1, if fault_action is set to balance, non local workers in error
can be  selected to handle  the request. In  jk2 in error  workers are
never selected.

The fault_action is specified as follows:

worker.lb.fault_action={reject|balance}

With my patch,  the lb_value or lb_factor needs not  to have a special
value  to handle  local workers.  Thus  lb_value is  modified at  each
request handled by the worker.


Feedback is welcome, testing of jk2 needs to be done.

Mathias.

---8<------8<------8<------8<------8<------8<------8<------8<------8<---
Index: jk/native/common/jk_lb_worker.c
===================================================================
RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_lb_worker.c,v
retrieving revision 1.9
diff -r1.9 jk_lb_worker.c
97a98,110
>     /*
>      * This is the index of the last local worker in the workers' array.
>      * The workers' array is assumed sorted with the local workers first.
>      */
>     int last_local_worker_index;
> 
>     /*
>      * What to do if the local worker is unavailable,
>      *   JK_LOCAL_WORKER_FAULT_REJECT to reject the reaques thus generating an error,
>      *   JK_LOCAL_WORKER_FAULT_BALANCE to load balance the request on another worker.
>      */
>     int     local_worker_fault_action;
> 
253c266
<     unsigned i;
---
>     int i;
257c270
<         for(i = 0 ; i < p->num_of_workers ; i++) {
---
>         for(i = 0 ; (unsigned) i < p->num_of_workers ; i++) {
272c285
<     for(i = 0 ; i < p->num_of_workers ; i++) {
---
>     for(i = 0 ; (unsigned) i < p->num_of_workers ; i++) {
280,281d292
<                     p->lb_workers[i].error_time     = now;
<                     rc = &(p->lb_workers[i]);
283c294,305
<                     break;
---
> 		    /*
> 		     * A recovering worker is only eligible AFTER all
> 		     * local workers have been tried and fault_action
> 		     * is BALANCE.
> 		     */
> 
> 		    if(i > p->last_local_worker_index) {
> 		        p->lb_workers[i].error_time     = now;
>                         rc = &(p->lb_workers[i]);
> 
>                         break;
> 		    }
291,292c313
<         }            
<     }
---
>         }
294,295c315,340
<     if(rc && rc->lb_value != 0 ) {
<         rc->lb_value += rc->lb_factor;                
---
> 	/*
> 	 * If we reached the end of local workers
> 	 * use the local worker with the least lb_value if available,
> 	 * otherwise act according to local_worker_fault_action.
> 	 *
> 	 * If no local workers we specified, continue worker selection.
> 	 */
> 	
> 	if(i == p->last_local_worker_index) {
> 	    /*
> 	     * If we have selected a worker then use it as it is the local worker
> 	     * with the lowest lb_value.
> 	     *
> 	     * If rc is NULL, no worker was selected so far, all local workers are
> 	     * in error. If fault_action is to reject the request do so.
> 	     *
> 	     * If rc is NULL but fault_action is to balance the request continue.
> 	     * worker selection.
> 	     *
> 	     */
> 	    if(rc) {
> 	        break;
> 	    } else if(p->local_worker_fault_action == JK_LOCAL_WORKER_FAULT_REJECT) {
> 	        return rc;
> 	    }
> 	}            
297a343,344
>     rc->lb_value += rc->lb_factor;                
> 
332c379
<                         if(rec->in_recovering && rec->lb_value != 0) {
---
>                         if(rec->in_recovering) {
416a464
> 	char **local_worker_names;	
417a466
> 	unsigned num_local_workers;
459a509,549
> 	    /*
> 	     * Check if a list of local workers was provided.
> 	     * If so reorder the workers list so as to put the
> 	     * local workers at the beginning.
> 	     */
> 
> 	    p->last_local_worker_index = -1;
> 
> 	    if(jk_get_local_worker_list(props,
> 					p->name,
> 					&local_worker_names, 
> 					&num_local_workers) && num_local_workers) {
> 	        unsigned j = 0;
> 		unsigned k = 0;
> 		unsigned fault_action = JK_LOCAL_WORKER_FAULT_REJECT;
> 
> 	        for(j = 0; j < num_local_workers; j++) {
> 		    for(k = p->last_local_worker_index + 1; k < num_of_workers; k++) {
> 		        if(strcmp(p->lb_workers[k].name, local_worker_names[j]) == 0) {
> 			    worker_record_t tmpwkr;
> 
> 			    p->last_local_worker_index++;
> 			    
> 			    tmpwkr = p->lb_workers[p->last_local_worker_index];
> 			    p->lb_workers[p->last_local_worker_index] = p->lb_workers[k];
> 			    p->lb_workers[k] = tmpwkr;
> 		        }
> 		    }
> 	        }
> 
> 		if(jk_get_local_worker_fault_action(props,
> 						    p->name,
> 						    &fault_action) == JK_TRUE) {
> 		    p->local_worker_fault_action = fault_action;
> 		} else {
> 		    jk_log(l, JK_LOG_INFO,
> 			   "In jk_worker_t::validate: Local worker fault action defaults to REJECT for worker
%s\n",
> 			   p->name);
> 		}
> 	    }
> 
470c560
<         }        
---
> 	}        
562c652
<                 private_data->num_of_workers = 0;
---
>                 private_data->num_of_workers = 0;		
567a658,660
> 
> 		private_data->last_local_worker_index = -1;
> 		private_data->local_worker_fault_action = JK_LOCAL_WORKER_FAULT_REJECT;
Index: jk/native/common/jk_util.c
===================================================================
RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_util.c,v
retrieving revision 1.13
diff -r1.13 jk_util.c
87a88,91
> #define LOCAL_WORKERS               ("local_workers")
> #define LOCAL_WORKER_FAULT_ACTION   ("fault_action")
> #define FAULT_ACTION_REJECT         ("reject")
> #define FAULT_ACTION_BALANCE        ("balance")
468a473,521
>     return JK_FALSE;
> }
> 
> int jk_get_local_worker_list(jk_map_t *m,
> 			     const char *lb_wname,
> 			     char ***list,
> 			     unsigned *num_local_workers)
> {
>     char buf[1024];
> 
>     if(m && list && num_local_workers && lb_wname) {
>         char **ar = NULL;
> 
>         sprintf(buf, "%s.%s.%s", PREFIX_OF_WORKER, lb_wname, LOCAL_WORKERS);
>         ar = map_get_string_list(m, buf, num_local_workers, NULL);
>         if(ar)  {
>             *list = ar;     
>             return JK_TRUE;
>         }
>         *list = NULL;   
>         *num_local_workers = 0;
>     }
> 
>     return JK_FALSE;
> }
> 
> int jk_get_local_worker_fault_action(jk_map_t *m,
> 				     const char *lb_wname,
> 				     int *action)
> {
>     char buf[1024];
> 
>     if(m && action && lb_wname) {
>         char *fault_action = NULL;
> 
>         sprintf(buf, "%s.%s.%s", PREFIX_OF_WORKER, lb_wname, LOCAL_WORKER_FAULT_ACTION);
>         fault_action = map_get_string(m, buf, NULL);
> 
>         if(fault_action)  {
> 	    if(strcmp(fault_action,FAULT_ACTION_REJECT) == 0) {
> 	        *action = JK_LOCAL_WORKER_FAULT_REJECT;
> 	    } else if(strcmp(fault_action,FAULT_ACTION_BALANCE) == 0) {
> 	        *action = JK_LOCAL_WORKER_FAULT_BALANCE;
> 	    } else {
> 	        return JK_FALSE;
> 	    }
>             return JK_TRUE;
>         }
>     }
Index: jk/native/common/jk_util.h
===================================================================
RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native/common/jk_util.h,v
retrieving revision 1.5
diff -r1.5 jk_util.h
72a73,79
> /*
>  * Default actions for load balancing worker.
>  */
> 
> #define JK_LOCAL_WORKER_FAULT_REJECT  1
> #define JK_LOCAL_WORKER_FAULT_BALANCE 2
> 
128a136,144
> 
> int jk_get_local_worker_list(jk_map_t *m, 
>                              const char *lb_wname,
>                              char ***list, 
>                              unsigned *num_of_wokers);
> 
> int jk_get_local_worker_fault_action(jk_map_t *m,
> 				     const char *lb_wname,
> 				     int *action);
Index: jk/native2/common/jk_worker_lb.c
===================================================================
RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native2/common/jk_worker_lb.c,v
retrieving revision 1.9
diff -r1.9 jk_worker_lb.c
156a157
> 
158,167d158
<             if(lb->lb_workers[i]->lb_value == 0 ) {
<                 /* That's the 'default' worker, it'll take all requests.
<                  * All other workers are not used unless this is in error state.
<                  *
<                  * The 'break' will disable checking for recovery on other
<                  * workers - but that doesn't matter as long as the default is alive.
<                  */
<                 rc=lb->lb_workers[i];
<                 break;
<             }
171c162
<             rc = lb->lb_workers[i];
---
> 		rc = lb->lb_workers[i];
173a165,191
> 
> 	/*
> 	 * If  we reached the end of local workers
> 	 * use the local worker with the least lb_value if available,
> 	 * otherwise act according to local_worker_fault_action.
> 	 *
> 	 * If no local workers we specified, continue worker selection.
> 	 */
> 
> 	if(i == lb->last_local_worker_index) {
> 	    /*
> 	     * If we have selected a worker then use it as it is the local worker
> 	     * with the lowest lb_value.
> 	     *
> 	     * If rc is NULL, no worker was selected so far, all local workers are
> 	     * in error. If fault_action is to reject the request do so.
> 	     *
> 	     * If rc is NULL but fault_action is to balance the request continue.
> 	     * worker selection.
> 	     *
> 	     */
> 	    if(rc) {
> 	        break;
> 	    } else if(lb->local_worker_fault_action == JK_LOCAL_WORKER_FAULT_REJECT) {
> 	        return rc;
> 	    }
> 	}
221,226c239
<         if( rc->lb_value != 0 ) {
<             /* It it's the default, it'll remain the default - we don't
<                increase the factor
<             */
<             rc->lb_value += rc->lb_factor;
<         }
---
>       rc->lb_value += rc->lb_factor;
415a429
>     unsigned j;
433a448,452
>     /*
>      * Init the index of the local_worker to -1.
>      */
>     lb->last_local_worker_index=-1;
> 
435a455
> 
444,458c464,465
<         if( w->lb_factor != 0 ) {
<             w->lb_factor = 1/ w->lb_factor;
<             lb->lb_workers[currentWorker]=w;
<         } else {
<             /* If == 0, then this is the default worker. Switch it with the first
<                worker to avoid looking too much for it.
<              */
<             jk_worker_t *first=lb->lb_workers[0];
<             lb->lb_workers[0]=w;
<             /* Only do the exchange if the worker is not the first */
<             if( currentWorker > 0 ) {
<                 lb->lb_workers[currentWorker]=first;
<             }
<         }
<         
---
> 	w->lb_factor = 1/ w->lb_factor;
> 	lb->lb_workers[currentWorker]=w;
460,465d466
<         /* 
<          * Allow using lb in fault-tolerant mode.
<          * Just set lbfactor in worker.properties to 0 to have 
<          * a worker used only when principal is down or session route
<          * point to it. Provided by Paul Frieden <pfrieden@dchain.com>
<          */
470a472,487
> 	/*
> 	 * Move the local workers at the beginning of the workers array.
> 	 */
> 	if (lb->local_workers_names) {
> 	  for(j = 0; j < lb->num_local_workers; j++) {	    
> 	    if (strcmp(lb->local_workers_names[j], name) == 0) {
> 	      jk_worker_t * worker;
> 	      lb->last_local_worker_index++;
> 	      worker=lb->lb_workers[lb->last_local_worker_index];
> 	      lb->lb_workers[lb->last_local_worker_index]=lb->lb_workers[currentWorker];
> 	      lb->lb_workers[currentWorker]=worker;
> 	      break;
> 	    }
> 	  }
> 	}
> 
514a532,552
>     } else if(strcmp(name, "local_workers") == 0) {
>         lb->local_workers_names=jk2_config_split(env, lb->mbean->pool,
> 						 value, NULL, &lb->num_local_workers);
> 	if(lb->local_workers_names==NULL || lb->num_local_workers == 0) {
>             env->l->jkLog(env, env->l, JK_LOG_ERROR,
>                           "lb_worker.validate(): no local workers specified\n");
>             return JK_ERR;
> 	}
>         jk2_lb_refresh( env, lb );
> 	return JK_OK;
>     } else if(strcmp(name, "fault_action") == 0) {
>         if(strcmp(value, "balance") == 0) {
> 	    lb->local_worker_fault_action=JK_LOCAL_WORKER_FAULT_BALANCE;	  
> 	} else if(strcmp(value, "reject") == 0) {
> 	    lb->local_worker_fault_action=JK_LOCAL_WORKER_FAULT_REJECT;
> 	} else {
> 	    env->l->jkLog(env, env->l, JK_LOG_ERROR,
> 			"lb_worker.validate(): invalid fault_action\n");
> 	    return JK_ERR;
> 	}
> 	return JK_OK;
515a554
> 
587a627,634
>     /*
>      * Init local workers parameters.
>      */
>     w->local_workers_names = NULL;
>     w->num_local_workers = 0;
>     w->last_local_worker_index = -1;
>     w->local_worker_fault_action = JK_LOCAL_WORKER_FAULT_REJECT;
> 
Index: jk/native2/include/jk_worker.h
===================================================================
RCS file: /home/cvspublic/jakarta-tomcat-connectors/jk/native2/include/jk_worker.h,v
retrieving revision 1.17
diff -r1.17 jk_worker.h
90a91,97
>  * Possible actions when local worker fails.
>  */
> 
> #define JK_LOCAL_WORKER_FAULT_REJECT 1
> #define JK_LOCAL_WORKER_FAULT_BALANCE 2
> 
> /*
208a216,238
> 
>     /*
>      * These are the names of the local workers.
>      */
>     char ** local_workers_names;
> 
>     /*
>      * This is the number of local workers.
>      */
>     unsigned num_local_workers;
> 
>     /*
>      * This is the index of the last local worker in the workers' array.
>      * The workers' array is assumed sorted with the local workers first.
>      */
>     int last_local_worker_index;
> 
>     /*
>      * What to do if the local worker is unavailable,
>      *   JK_LOCAL_WORKER_FAULT_REJECT to reject the reaques thus generating an error,
>      *   JK_LOCAL_WORKER_FAULT_BALANCE to load balance the request on another worker.
>      */
>     int     local_worker_fault_action;


--
To unsubscribe, e-mail:   <mailto:tomcat-dev-unsubscribe@jakarta.apache.org>
For additional commands, e-mail: <mailto:tomcat-dev-help@jakarta.apache.org>


Mime
View raw message