diff --git a/modules/load_balancer/doc/load_balancer_admin.xml b/modules/load_balancer/doc/load_balancer_admin.xml index 90883d22b83..d35dbed75b2 100644 --- a/modules/load_balancer/doc/load_balancer_admin.xml +++ b/modules/load_balancer/doc/load_balancer_admin.xml @@ -91,23 +91,26 @@ - Dialog - Dialog module + dialog - Dialog module + + - freeswitch. - only if - "fetch_freeswitch_stats" is enabled. + freeswitch - only if + "fetch_freeswitch_stats" is enabled + (required for integrated estimation mode) - dialog - TM module (only if probing is + tm - TM module (only if probing is enabled) clusterer - only if "cluster_id" - option is enabled. + option is enabled @@ -332,12 +335,7 @@ modparam("load_balancer", "lb_define_blacklist", "blist2= 2,10,6") using statistics pushed by the FreeSWITCH box. - The max value of a resource is updated every event_heartbeat_interval - seconds (see the "freeswitch" OpenSIPS module for more details - regarding this setting), as the stats arrive from FreeSWITCH. - - - Given the following format for FreeSWITCH heartbeat messages: + FreeSWITCH heartbeat messages provide the following statistics: { ... @@ -349,9 +347,28 @@ modparam("load_balancer", "lb_define_blacklist", "blist2= 2,10,6") ... } - , the load balancer uses the following formula in order to periodically - update its "max_load" values for each FreeSWITCH box (FreeSWITCH data - is highlighted in bold): + + + The current/maximum sessions and CPU idle data for each instance + are updated as the stats arrive from FreeSWITCH every + event_heartbeat_interval seconds (see the "freeswitch" OpenSIPS + module for more details regarding this setting). + + + These are used according to the operational mode used in the + load balancing function calls. + + + + Relative mode + + + + The max load score for each instance is updated every + fetch_freeswitch_stats seconds. In relative mode, the load balancer + uses the following formula in order to periodically update its + "max_load" values for each FreeSWITCH box (FreeSWITCH data is + highlighted in bold): max_load = (Idle-CPU / 100) @@ -359,6 +376,54 @@ modparam("load_balancer", "lb_define_blacklist", "blist2= 2,10,6") (Session-Count - current_load)) + + + + Integrated estimation mode + + + + This mode is intended to be used in high throughput environments where + not all inbound and outbound sessions are tracked on the local + OpenSIPs instance. The heartbeat data is used as the primary source of + truth for server load. + + + In addition to the data collected in the most recent heartbeat the module + will count sessions allocated to each instance and use this data in + each subsequent calculation to track sessions and distribute the load. + Each fetch_freeswitch_stats interval the sessions since last heartbeat counters + are reset as up to data load data has been provided. It is advisable to set + event_heartbeat_interval and fetch_freeswitch_stats low to improve session + data synchronisation. + + + In integrated estimation mode, the load balancer uses the collects the session + data for each FreeSWITCH box every fetch_freeswitch_stats seconds. Rather than + maintaining a max load score this mode performs the following calculation at + the time a call is selecting a destination (FreeSWITCH data is highlighted in bold): + + + load_score = (100 - (100 * Session-Count + + sessions_since_last_heartbeat / Max-Sessions)) + * (Idle-CPU/100) + + + +Warning - heartbeat processing is asynchronous to this module + +Heartbeat data is collected in the freeswitch module upon arrival +from each FreeSWITCH instance as controlled by both the minimum interval setting +on the instance and the event_heartbeat_interval module setting. This module +will refresh its internal calculations at intervals defined by +fetch_freeswitch_stats. + +When using integrated estimation mode the sessions +since last heartbeat counter will be reset every fetch_freeswitch_stats +seconds. Keeping these values low and the same is advised for more accurate +load estimations according to your throughput requirements. + + Default value is 0 (disabled). @@ -405,7 +470,7 @@ modparam("load_balancer", "initial_freeswitch_load", 200) of the destinations and for controlling the pinging to destinations. - If clustering enbled, the module will automatically share changes + If clustering enabled, the module will automatically share changes over the status of the destinations with the other OpenSIPS instances that are part of a cluster. Whenever such a status changes (following an MI command, a probing result, a script command), @@ -474,6 +539,35 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") +
+ <varname>use_cpu_factor</varname> (integer) + + This is only relevant for "integrated estimation" mode. + + + If enabled, the CPU factor collected in the most recent heartbeat + will be used to reduce the capacity of each FreeSWITCH instance. + + + When disabled, no CPU factor will be applied in the calculation. + + + + + + + Default value is empty (disabled). + + + Set <varname>use_cpu_factor</varname> parameter + +... +modparam("load_balancer", "use_cpu_factor", 1) +... + + +
+ @@ -510,7 +604,7 @@ modparam("load_balancer", "cluster_sharing_tag", "vip")
- n - Negative availability - use + n - Negative availability - use destinations with negative availability (exceeded capacity); do not ignore resources with negative availability, and thus able to select for load balancing destinations with exceeded @@ -519,13 +613,30 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") important/high-priority calls. + + i - Integrated estimation - + intended for use in deployments + where many separate SIP proxies are feeding calls into + a pool of FreeSWITCH servers. Load calculations are + performed using the most recent heartbeat data and a + counter of all sessions allocated since the last heartbeat. + Profile counting is unused in the calculation. The reported + CPU load value is optionally used to reduce session load on systems + with high CPU utilisation. Mutually exclusive with flag "r". + + This is well suited to high performance systems where many calls + may arrive within the heartbeat period (which should be set to the + minimum value 1s when used with this algorithm). + + r - Relative value - the relative available load (how many percentages are free) is used in computing the load of each pear/resource; Without this flag, the Absolute value is assumed - the effective available load ( maximum_load - current_load) is used in - computing the load of each pear/resource. + computing the load of each pear/resource. Mutually exclusive + with flag "i". @@ -574,6 +685,11 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") (requested resources do not exist) + + -5 (false) - mutually exclusive flags + "i" and "r" were both set + + This function can be used from REQUEST_ROUTE, BRANCH_ROUTE and @@ -583,7 +699,7 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") <function>lb_start</function> usage ... -if (lb_start(1,"trascoding;conference")) { +if (lb_start(1,"transcoding;conference")) { # dst URI points to the new destination xlog("sending call to $du\n"); t_relay(); @@ -630,8 +746,8 @@ if (lb_start(1,"trascoding;conference")) {
-2 (false) - no capacity available - (detinations are up and available, but they do not have any - availabe channels) + (destinations are up and available, but they do not have any + available channels) -3 (false) - no more destinations @@ -695,7 +811,7 @@ if (t_check_status("(408)|(5[0-9][0-9])")) { Function to stop and flush a current LB session. To be used in failure route, if you want to stop the current LB session (not to try - any other destinations from this session) and to start a completly new + any other destinations from this session) and to start a completely new one. @@ -882,7 +998,7 @@ if (lb_is_destination($si,$sp) ) { <function>lb_count_call</function> usage ... -# count as load also the calls orgininated by lb destinations +# count as load also the calls originated by lb destinations if (lb_is_destination($si,$sp) ) { # inbound call from destination lb_count_call($si,$sp,-1,"conference"); @@ -911,7 +1027,7 @@ if (lb_is_destination($si,$sp) ) {
<function moreinfo="none">lb_reload</function> - Trigers the reload of the load balancing data from the DB. + Triggers the reload of the load balancing data from the DB. MI FIFO Command Format: diff --git a/modules/load_balancer/lb_data.c b/modules/load_balancer/lb_data.c index 1648779683b..ed2a9178a0a 100644 --- a/modules/load_balancer/lb_data.c +++ b/modules/load_balancer/lb_data.c @@ -38,6 +38,7 @@ /* dialog stuff */ extern struct dlg_binds lb_dlg_binds; +extern int use_cpu_factor; extern int fetch_freeswitch_stats; extern int initial_fs_load; extern struct fs_binds fs_api; @@ -308,6 +309,10 @@ int add_lb_dsturi( struct lb_data *data, int id, int group, char *uri, fs_url = r->fs_url; dst->rmap[i].max_load = initial_fs_load; dst->rmap[i].fs_enabled = 1; + + dst->rmap[i].current_sessions = 0; + dst->rmap[i].max_sessions = 0; + dst->rmap[i].cpu_idle = 100; } else { dst->rmap[i].max_load = r->val; } @@ -424,6 +429,18 @@ static int get_dst_load(struct lb_resource **res, unsigned int res_no, if( flags & LB_FLAGS_RELATIVE ) { if( dst->rmap[l].max_load ) av = 100 - (100 * lb_dlg_binds.get_profile_size(res[k]->profile, &dst->profile_id) / dst->rmap[l].max_load); + } else if( flags & LB_FLAGS_PERCENT_WITH_CPU ) { + if( dst->rmap[l].max_sessions ) { + if(use_cpu_factor) { + /* generate score based on the percentage of channels occupied, reduced by CPU idle factor */ + av = ( 100 - ( 100 * ( dst->rmap[l].current_sessions + dst->rmap[l].sessions_since_last_heartbeat ) / dst->rmap[l].max_sessions ) ) * dst->rmap[l].cpu_idle; + LM_DBG("destination %d <%.*s> availability score %d (sessions=%d since_last_hb=%d max_sess=%d cpu_idle=%.2f)", dst->id, dst->uri.len, dst->uri.s, av, dst->rmap[l].current_sessions, dst->rmap[l].sessions_since_last_heartbeat, dst->rmap[l].max_sessions, dst->rmap[l].cpu_idle); + } else { + /* generate score based on the percentage of channels occupied */ + av = 100 - ( 100 * ( dst->rmap[l].current_sessions + dst->rmap[l].sessions_since_last_heartbeat ) / dst->rmap[l].max_sessions ); + LM_DBG("destination %d <%.*s> availability score %d (sessions=%d since_last_hb=%d max_sess=%d)", dst->id, dst->uri.len, dst->uri.s, av, dst->rmap[l].current_sessions, dst->rmap[l].sessions_since_last_heartbeat, dst->rmap[l].max_sessions); + } + } } else { av = dst->rmap[l].max_load - lb_dlg_binds.get_profile_size(res[k]->profile, &dst->profile_id); } @@ -490,7 +507,7 @@ int lb_route(struct sip_msg *req, int group, struct lb_res_str_list *rl, struct lb_resource *it_r; int load, it_l; int i, j, cond, cnt_aval_dst; - + unsigned int k, l; /* init control vars state */ res_cur = NULL; @@ -756,8 +773,7 @@ int lb_route(struct sip_msg *req, int group, struct lb_res_str_list *rl, cnt_aval_dst = 0; for( it_d=data->dsts,i=0,j=0 ; it_d ; it_d=it_d->next ) { if( it_d->group == group ) { - if( (dst_bitmap_cur[i] & (1 << j)) && - ((it_d->flags & LB_DST_STAT_DSBL_FLAG) == 0) ) { + if( (dst_bitmap_cur[i] & (1 << j)) && ((it_d->flags & LB_DST_STAT_DSBL_FLAG) == 0) ) { /* valid destination (group & resources & status) */ cnt_aval_dst++; if( get_dst_load(res_cur, res_cur_n, it_d, flags, &it_l) ) { @@ -818,11 +834,29 @@ int lb_route(struct sip_msg *req, int group, struct lb_res_str_list *rl, if( dst != NULL ) { + LM_DBG("%s call of LB - winning destination %d <%.*s> selected " "for LB set with free=%d\n", (reuse ? "sequential" : "initial"), dst->id, dst->uri.len, dst->uri.s, load ); + if ( flags & LB_FLAGS_PERCENT_WITH_CPU ) { + + // find all resources used by this call, increment on each + for( k=0 ; krmap_no ; l++ ) { + if( res_cur[k] == dst->rmap[l].resource ) { + dst->rmap[l].sessions_since_last_heartbeat++; + + LM_DBG("incrementing sess since last HB for winning destination %d <%.*s> (sessions_since_last_heartbeat=%d)\n", + dst->id, dst->uri.len, dst->uri.s, dst->rmap[l].sessions_since_last_heartbeat ); + + break; // exit the loop + } + } + } + } + /* add to the profiles */ for( i=0 ; iprofile_id, diff --git a/modules/load_balancer/lb_data.h b/modules/load_balancer/lb_data.h index 71d822d39e2..5e6aa06ba3c 100644 --- a/modules/load_balancer/lb_data.h +++ b/modules/load_balancer/lb_data.h @@ -33,10 +33,11 @@ #include "../freeswitch/fs_api.h" #include "lb_parser.h" -#define LB_FLAGS_RELATIVE (1<<0) /* do relative versus absolute estimation. default is absolute */ -#define LB_FLAGS_NEGATIVE (1<<1) /* do not skip negative loads. default to skip */ -#define LB_FLAGS_RANDOM (1<<2) /* pick a random destination among all selected dsts with equal load */ -#define LB_FLAGS_DEFAULT 0 +#define LB_FLAGS_RELATIVE (1<<0) /* do relative versus absolute estimation. default is absolute */ +#define LB_FLAGS_NEGATIVE (1<<1) /* do not skip negative loads. default to skip */ +#define LB_FLAGS_RANDOM (1<<2) /* pick a random destination among all selected dsts with equal load */ +#define LB_FLAGS_PERCENT_WITH_CPU (1<<3) /* score as percentage of max sessions used + CPU util factor */ +#define LB_FLAGS_DEFAULT 0 #define LB_DST_PING_DSBL_FLAG (1<<0) #define LB_DST_PING_PERM_FLAG (1<<1) @@ -62,6 +63,14 @@ struct lb_resource_map { struct lb_resource *resource; unsigned int max_load; + /* data received in last heartbeat */ + unsigned int max_sessions; + unsigned int current_sessions; + float cpu_idle; + + /* count of sessions allocated since last FS heartbeat */ + unsigned int sessions_since_last_heartbeat; + int fs_enabled; }; diff --git a/modules/load_balancer/load_balancer.c b/modules/load_balancer/load_balancer.c index c545c2aa673..9db380a13d6 100644 --- a/modules/load_balancer/load_balancer.c +++ b/modules/load_balancer/load_balancer.c @@ -62,6 +62,8 @@ str lb_probe_from = str_init("sip:prober@localhost"); static int* probing_reply_codes = NULL; static int probing_codes_no = 0; +int use_cpu_factor; + int fetch_freeswitch_stats; int initial_fs_load = 1000; @@ -173,6 +175,7 @@ static const param_export_t mod_params[]={ { "cluster_id", INT_PARAM, &lb_cluster_id }, { "cluster_sharing_tag", STR_PARAM, &lb_cluster_shtag }, { "fetch_freeswitch_stats", INT_PARAM, &fetch_freeswitch_stats }, + { "use_cpu_factor", INT_PARAM, &use_cpu_factor }, { "initial_freeswitch_load", INT_PARAM, &initial_fs_load }, { 0,0,0 } }; @@ -299,7 +302,7 @@ static void lb_inherit_state(struct lb_data *old_data,struct lb_data *new_data) strncasecmp(new_dst->uri.s, old_dst->uri.s, old_dst->uri.len)==0) { LM_DBG("DST %d/<%.*s> found in old set, copying state\n", new_dst->group, new_dst->uri.len,new_dst->uri.s); - /* first reset the existing flags (only the flags related + /* first reset the existing flags (only the flags related * to state!!!) */ new_dst->flags &= ~(LB_DST_STAT_DSBL_FLAG|LB_DST_STAT_NOEN_FLAG); @@ -556,9 +559,21 @@ static int w_lb_start(struct sip_msg *req, int *grp_no, for( f=flstr->s ; fs+flstr->len ; f++ ) { switch( *f ) { case 'r': + if( flags & LB_FLAGS_PERCENT_WITH_CPU ) { + LM_ERR("flags i & r are mutually exclusive (r)\n"); + return -5; + } flags |= LB_FLAGS_RELATIVE; LM_DBG("using relative versus absolute estimation\n"); break; + case 'i': + if( flags & LB_FLAGS_RELATIVE ) { + LM_ERR("flags i & r are mutually exclusive (i)\n"); + return -5; + } + flags |= LB_FLAGS_PERCENT_WITH_CPU; + LM_DBG("using integrated estimation (percentage of max sessions used, tracing real time allocations) \n"); + break; case 'n': flags |= LB_FLAGS_NEGATIVE; LM_DBG("do not skip negative loads\n"); @@ -795,12 +810,28 @@ static void lb_update_max_loads(unsigned int ticks, void *param) dst->rmap[ri].resource->profile, &dst->profile_id); old = dst->rmap[ri].max_load; + // if ( flags & LB_FLAGS_PERCENT_WITH_CPU ) { todo flags not avavilable here /* - * The normal case. OpenSIPS sees, at _most_, the same number - * of sessions as FreeSWITCH does. Any differences must be - * subtracted from the remote "max sessions" value + * In LB_FLAGS_PERCENT_WITH_CPU mode we capture the raw values and use these in each LB calculation. This + * means we do not use profile counting in the load calculation. This is suitable for + * architectures where many unreplicated OpenSIPs instances feed calls into the same pool + * of FreeSWITCH instances. */ + dst->rmap[ri].max_sessions = dst->fs_sock->stats.max_sess; + dst->rmap[ri].current_sessions = dst->fs_sock->stats.sess; + dst->rmap[ri].cpu_idle = dst->fs_sock->stats.id_cpu / (float)100; + /* + * reset sessions since last heartbeat counter + * todo ideally this happens when the heartbeat arrives, this fires according to fetch_freeswitch_stats timer + */ + dst->rmap[ri].sessions_since_last_heartbeat = 0; + if (psz < dst->fs_sock->stats.max_sess) { + /* + * The normal case. OpenSIPS sees, at _most_, the same number + * of sessions as FreeSWITCH does. Any differences must be + * subtracted from the remote "max sessions" value + */ dst->rmap[ri].max_load = (dst->fs_sock->stats.id_cpu / (float)100) * (dst->fs_sock->stats.max_sess -