Commit 6518978f authored by Jonathan Jenkins's avatar Jonathan Jenkins
Browse files

handle remainders when LPs not divisible by ranks

parent a1e0df0a
......@@ -21,7 +21,6 @@ LPs to the PEs (PEs is an abstraction for MPI rank in ROSS) by placing 1 server
LP a total of 16 times. This configuration is useful if there is some form of communication involved
between the server and example_net LP types, in which case ROSS will place them on the same PE and
communication between server and example_net LPs will not involve remote messages.
The number of server and example_net LPs can be more than 1. Lets assume if we have two example_net
LPs for each server then the config file will have the following format:
......@@ -76,3 +75,13 @@ For example, to query the number of repetitions in example-test2.conf file, call
num_repetitions = codes_mapping_get_group_reps("MODELNET_GRP");
will return 16.
=== LP to PE mapping for parallel simulations ===
In the case of parallel simulations using MPI, the LP mapping explained in Step
1 still holds. However, these LPs must also be mapped to PEs, which can be an
arbitrary mapping in ROSS. We simply assign the first N LPs to the first PE, the
second N to the second PE, and so forth, where N is the floor of the LP count
and the PE count. If the number of LPs is not divisible by the number of PEs,
then the first N+1 LPs are mapped to the first PE and so on, until the remainder
has been taken care of.
......@@ -11,9 +11,16 @@
/* number of LPs assigned to the current PE (abstraction of MPI rank) */
static int lps_for_this_pe = 0;
/* number of LPs assigned to the current PE (abstraction of MPI rank).
* for lp counts which are not divisible by the number of ranks, keep
* modulus around */
static int lps_per_pe_floor = 0;
static int lps_leftover = 0;
static int mem_factor = 1024;
static inline int mini(int a, int b){ return a < b ? a : b; }
/* char arrays for holding lp type name and group name*/
char local_grp_name[MAX_NAME_LENGTH], local_lp_name[MAX_NAME_LENGTH];
......@@ -21,13 +28,25 @@ config_lpgroups_t lpconf;
int codes_mapping_get_lps_for_pe()
return lps_for_this_pe;
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
printf("%d lps for rank %d\n", lps_per_pe_floor+(g_tw_mynode < lps_leftover), rank);
return lps_per_pe_floor + (g_tw_mynode < lps_leftover);
/* Takes the global LP ID and returns the rank (PE id) on which the LP is mapped */
tw_peid codes_mapping( tw_lpid gid)
return gid / lps_for_this_pe;
int lps_on_pes_with_leftover = lps_leftover * (lps_per_pe_floor+1);
if (gid < lps_on_pes_with_leftover){
return gid / (lps_per_pe_floor+1);
return (gid-lps_on_pes_with_leftover)/lps_per_pe_floor + lps_leftover;
/*return gid / lps_per_pe_floor;*/
int codes_mapping_get_group_reps(char* grp_name)
......@@ -187,20 +206,23 @@ static void codes_mapping_init(void)
for(kpid = 0; kpid < nkp_per_pe; kpid++)
tw_kp_onpe(kpid, g_tw_pe[0]);
int lp_init_range = g_tw_mynode * lps_for_this_pe;
codes_mapping_get_lp_info(lp_init_range, grp_name, &grp_id, &lpt_id, lp_type_name, &rep_id, &offset);
int lp_start =
g_tw_mynode * lps_per_pe_floor + mini(g_tw_mynode,lps_leftover);
int lp_end =
(g_tw_mynode+1) * lps_per_pe_floor + mini(g_tw_mynode+1,lps_leftover);
codes_mapping_get_lp_info(lp_start, grp_name, &grp_id, &lpt_id, lp_type_name, &rep_id, &offset);
for (lpid = lp_init_range; lpid < lp_init_range + lps_for_this_pe; lpid++)
for (lpid = lp_start; lpid < lp_end; lpid++)
ross_gid = lpid;
ross_lid = lpid - lp_init_range;
ross_lid = lpid - lp_start;
kpid = ross_lid % g_tw_nkp;
pe = tw_getpe(kpid % g_tw_npe);
codes_mapping_get_lp_info(ross_gid, grp_name, &grp_id, &lpt_id, lp_type_name, &rep_id, &offset);
printf("lp:%lu --> kp:%lu, pe:%llu\n", ross_gid, kpid, pe->id);
tw_lp_onpe(ross_lid , pe, ross_gid);
tw_lp_onpe(ross_lid, pe, ross_gid);
tw_lp_onkp(g_tw_lp[ross_lid], g_tw_kp[kpid]);
tw_lp_settype(ross_lid, lp_type_lookup(lp_type_name));
......@@ -212,7 +234,8 @@ static void codes_mapping_init(void)
* global LP IDs are unique across all PEs, local LP IDs are unique within a PE */
static tw_lp * codes_mapping_to_lp( tw_lpid lpid)
int index = lpid - (g_tw_mynode * lps_for_this_pe);
int index = lpid - (g_tw_mynode * lps_per_pe_floor) -
mini(g_tw_mynode, lps_leftover);
// printf("\n global id %d index %d lps_before %d lps_offset %d local index %d ", lpid, index, lps_before, g_tw_mynode, local_index);
return g_tw_lp[index];
......@@ -228,10 +251,11 @@ void codes_mapping_setup()
for (grp = 0; grp < lpconf.lpgroups_count; grp++)
for (lpt = 0; lpt < lpconf.lpgroups[grp].lptypes_count; lpt++)
lps_for_this_pe += (lpconf.lpgroups[grp].lptypes[lpt].count * lpconf.lpgroups[grp].repetitions);
lps_per_pe_floor += (lpconf.lpgroups[grp].lptypes[lpt].count * lpconf.lpgroups[grp].repetitions);
lps_for_this_pe /= pes;
//printf("\n LPs for this PE are %d reps %d ", lps_for_this_pe, lpconf.lpgroups[grp].repetitions);
lps_leftover = lps_per_pe_floor % pes;
lps_per_pe_floor /= pes;
//printf("\n LPs for this PE are %d reps %d ", lps_per_pe_floor, lpconf.lpgroups[grp].repetitions);
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment