Commit d603bf86 authored by Jonathan Jenkins's avatar Jonathan Jenkins

Fleshing out example program, and other cleanups

parent 7b22be99
......@@ -16,15 +16,5 @@ LDLIBS = $(shell $(ROSS)/bin/ross-config --libs) -lcodes-net -lcodes-base
example: example.c
#$(CC) $(ROSS_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(ROSS_LDFLAGS) $^ -o $@ -L$(ROSS)/lib -lROSS -lm -L$(CODESBASE)/lib -lcodes-base -L$(CODESNET)/lib -lcodes-net
check-env:
ifndef CODESBASE
$(error CODESBASE is undefined, see README.txt)
endif
ifndef CODESNET
$(error CODESNET is undefined, see README.txt)
endif
ifndef ROSS
$(error ROSS is undefined, see README.txt)
endif
clean:
rm -f example
......@@ -24,18 +24,22 @@
#include "codes/model-net.h"
#include "codes/lp-type-lookup.h"
#define NUM_REQS 500 /* number of requests sent by each server */
#define NUM_REQS 500 /* number of requests sent by each server */
#define PAYLOAD_SZ 2048 /* size of simulated data payload, bytes */
/* model-net ID, can be either simple-net, dragonfly or torus */
/* model-net ID, can be either simple-net, dragonfly or torus (more may be
* added) */
static int net_id = 0;
static int num_servers = 0;
static int offset = 2;
/* expected group name in configure files for this program */
static char *group_name = "SERVERS";
typedef struct svr_msg svr_msg;
typedef struct svr_state svr_state;
/* types of events that will constitute triton requests */
/* types of events that will constitute server activities */
enum svr_event
{
KICKOFF, /* initial event */
......@@ -44,6 +48,9 @@ enum svr_event
LOCAL /* local event */
};
/* this struct serves as the ***persistent*** state of the LP representing the
* server in question. This struct is setup when the LP initialization function
* ptr is called */
struct svr_state
{
int msg_sent_count; /* requests sent */
......@@ -52,6 +59,8 @@ struct svr_state
tw_stime start_ts; /* time that we started sending requests */
};
/* this struct serves as the ***temporary*** event data, which can be thought
* of as a message between two LPs. */
struct svr_msg
{
enum svr_event svr_event_type;
......@@ -60,6 +69,12 @@ struct svr_msg
int incremented_flag; /* helper for reverse computation */
};
/* ROSS expects four functions per LP:
* - an LP initialization function, called for each LP
* - an event processing function
* - a *reverse* event processing function (rollback), and
* - a finalization/cleanup function when the simulation ends
*/
static void svr_init(
svr_state * ns,
tw_lp * lp);
......@@ -77,6 +92,8 @@ static void svr_finalize(
svr_state * ns,
tw_lp * lp);
/* set up the function pointers for ROSS, as well as the size of the LP state
* structure (NOTE: ROSS is in charge of event and state (de-)allocation) */
tw_lptype svr_lp = {
(init_f) svr_init,
(event_f) svr_event,
......@@ -90,6 +107,9 @@ extern const tw_lptype* svr_get_lp_type();
static void svr_add_lp_type();
static tw_stime ns_to_s(tw_stime ns);
static tw_stime s_to_ns(tw_stime ns);
/* as we only have a single event processing entry point and multiple event
* types, for clarity we define "handlers" for each (reverse) event type */
static void handle_kickoff_event(
svr_state * ns,
tw_bf * b,
......@@ -131,9 +151,22 @@ static void handle_req_rev_event(
svr_msg * m,
tw_lp * lp);
/* for this simulation, each server contacts its neighboring server in an id.
* this function shows how to use the codes_mapping API to calculate IDs when
* having to contend with multiple LP types and counts. Note that in this simple
* example codes_mapping is overkill. */
static tw_lpid get_next_server(tw_lpid sender_id);
/* arguments to be handled by ROSS - strings passed in are expected to be
* pre-allocated */
static char conf_file_name[256] = {0};
/* this struct contains default parameters used by ROSS, as well as
* user-specific arguments to be handled by the ROSS config sys. Pass it in
* prior to calling tw_init */
const tw_optdef app_opt [] =
{
TWOPT_GROUP("Model net test case" ),
TWOPT_CHAR("codes-config", conf_file_name, "name of codes configuration file"),
TWOPT_END()
};
......@@ -143,35 +176,40 @@ int main(
{
int nprocs;
int rank;
/* TODO: explain why we need this (ROSS has cutoff??) */
g_tw_ts_end = s_to_ns(60*60*24*365); /* one year, in nsecs */
/* ROSS initialization function calls */
tw_opt_add(app_opt);
tw_init(&argc, &argv);
tw_opt_add(app_opt); /* add user-defined args */
/* initialize ROSS and parse args. NOTE: tw_init calls MPI_Init */
tw_init(&argc, &argv);
if(argc < 2)
if (!conf_file_name[0])
{
printf("\n Usage: mpirun <args> --sync=2/3 mapping_file_name.conf (optional --nkp) ");
MPI_Finalize();
return 0;
fprintf(stderr, "Expected \"codes-config\" option, please see --help.\n");
MPI_Finalize();
return 1;
}
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
/* loading the config file of codes-mapping */
configuration_load(argv[2], MPI_COMM_WORLD, &config);
/* Setup the model-net parameters specified in the config file */
net_id=model_net_set_params();
/* loading the config file into the codes-mapping utility, giving us the
* parsed config object in return.
* "config" is a global var defined by codes-mapping */
if (configuration_load(conf_file_name, MPI_COMM_WORLD, &config)){
fprintf(stderr, "Error loading config file %s.\n", conf_file_name);
MPI_Finalize();
return 1;
}
/* register the server LP type (model-net LP type is registered internally in model_net_set_params() */
svr_add_lp_type();
/*Now setup codes mapping */
codes_mapping_setup();
/*query codes mapping API*/
num_servers = codes_mapping_get_group_reps("MODELNET_GRP") * codes_mapping_get_lp_count("MODELNET_GRP", "server");
/* Setup the model-net parameters specified in the global config object,
* returned is the identifier for the network type */
net_id = model_net_set_params();
/* in this example, we are using simplenet, which simulates point to point
* communication between any two entities (other networks are trickier to
* setup). Hence: */
if(net_id != SIMPLENET)
{
printf("\n The test works with simple-net configuration only! ");
......@@ -179,7 +217,23 @@ int main(
return 0;
}
/* register the server LP type with codes-base
* (model-net LP type is registered internally in model_net_set_params() */
svr_add_lp_type();
/* Setup takes the global config object, the registered LPs, and
* generates/places the LPs as specified in the configuration file.
* This should only be called after ALL LP types have been registered in
* codes */
codes_mapping_setup();
/* calculate the number of servers in this simulation */
num_servers = codes_mapping_get_group_reps(group_name) * codes_mapping_get_lp_count(group_name, "server");
/* begin simulation */
tw_run();
/* model-net has the capability of outputting network transmission stats */
model_net_report_stats(net_id);
tw_end();
......@@ -193,7 +247,9 @@ const tw_lptype* svr_get_lp_type()
static void svr_add_lp_type()
{
lp_type_register("server", svr_get_lp_type());
/* lp_type_register should be called exactly once per process per
* LP type */
lp_type_register("server", svr_get_lp_type());
}
static void svr_init(
......@@ -213,14 +269,20 @@ static void svr_init(
/* skew each kickoff event slightly to help avoid event ties later on */
kickoff_time = g_tw_lookahead + tw_rand_unif(lp->rng);
/* first create the event (time arg is an offset, not absolute time) */
e = codes_event_new(lp->gid, kickoff_time, lp);
/* after event is created, grab the allocated message and set msg-specific
* data */
m = tw_event_data(e);
m->svr_event_type = KICKOFF;
/* event is ready to be processed, send it off */
tw_event_send(e);
return;
}
/* event processing entry point
* - simply forward the message to the appropriate handler */
static void svr_event(
svr_state * ns,
tw_bf * b,
......@@ -248,6 +310,8 @@ static void svr_event(
}
}
/* reverse event processing entry point
* - simply forward the message to the appropriate handler */
static void svr_rev_event(
svr_state * ns,
tw_bf * b,
......@@ -276,11 +340,12 @@ static void svr_rev_event(
return;
}
/* once the simulation is over, do some output */
static void svr_finalize(
svr_state * ns,
tw_lp * lp)
{
printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)lp->gid, PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s((tw_now(lp)-ns->start_ts)),
printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)(lp->gid/2), PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s((tw_now(lp)-ns->start_ts)),
((double)(PAYLOAD_SZ*NUM_REQS)/(double)(1024*1024)/ns_to_s(tw_now(lp)-ns->start_ts)), ns->msg_sent_count, ns->msg_recvd_count, ns->local_recvd_count);
return;
}
......@@ -297,6 +362,28 @@ static tw_stime s_to_ns(tw_stime ns)
return(ns * (1000.0 * 1000.0 * 1000.0));
}
/* see declaration for more general info */
tw_lpid get_next_server(tw_lpid sender_id)
{
tw_lpid rtn_id;
/* first, get callers LP and group info from codes-mapping. Caching this
* info in the LP struct isn't a bad idea for preventing a huge number of
* lookups */
char grp_name[MAX_NAME_LENGTH], lp_type_name[MAX_NAME_LENGTH];
int lp_type_id, grp_id, grp_rep_id, offset, num_reps;
int dest_rep_id;
codes_mapping_get_lp_info(sender_id, grp_name, &grp_id, &lp_type_id,
lp_type_name, &grp_rep_id, &offset);
/* in this example, we assume that, for our group of servers, each
* "repetition" consists of a single server/NIC pair. Hence, we grab the
* server ID for the next repetition, looping around if necessary */
num_reps = codes_mapping_get_group_reps(grp_name);
dest_rep_id = (grp_rep_id+1) % num_reps;
/* finally, get the server (exactly 1 server per rep -> offset w/in rep = 0 */
codes_mapping_get_lp_id(grp_name, lp_type_name, dest_rep_id, 0, &rtn_id);
return rtn_id;
}
/* handle initial event */
static void handle_kickoff_event(
svr_state * ns,
......@@ -304,28 +391,46 @@ static void handle_kickoff_event(
svr_msg * m,
tw_lp * lp)
{
svr_msg * m_local = malloc(sizeof(svr_msg));
svr_msg * m_remote = malloc(sizeof(svr_msg));
/* we allocate a local message and a remote message both */
m_local->svr_event_type = LOCAL;
m_local->src = lp->gid;
memcpy(m_remote, m_local, sizeof(svr_msg));
m_remote->svr_event_type = REQ;
int dest_id;
int use_brute_force_map = 0;
/* normally, when using ROSS, events are allocated as a result of the event
* creation process. However, since we are now asking model-net to
* communicate with an entity on our behalf, we need to generate both the
* message to the recipient and an optional callback message
* - thankfully, memory need not persist past the model_net_event call - it
* copies the messages */
svr_msg m_local;
svr_msg m_remote;
m_local.svr_event_type = LOCAL;
m_local.src = lp->gid;
m_remote.svr_event_type = REQ;
m_remote.src = lp->gid;
/* record when transfers started on this server */
ns->start_ts = tw_now(lp);
/* each server sends a request to the next highest server */
int dest_id = (lp->gid + offset)%(num_servers*2 + num_routers);
/* each server sends a request to the next highest server
* In this simulation, LP determination is simple: LPs are assigned
* round robin as in serv_1, net_1, serv_2, net_2, etc.
* However, that may not always be the case, so we also show a more
* complicated way to map through codes_mapping */
if (use_brute_force_map)
dest_id = (lp->gid + offset)%(num_servers*2);
else
{
dest_id = get_next_server(lp->gid);
}
/*model-net needs to know about (1) higher-level destination LP which is a neighboring server in this case
/* model-net needs to know about (1) higher-level destination LP which is a neighboring server in this case
* (2) struct and size of remote message and (3) struct and size of local message (a local message can be null) */
model_net_event(net_id, "test", dest_id, PAYLOAD_SZ, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
model_net_event(net_id, "test", dest_id, PAYLOAD_SZ, sizeof(svr_msg),
(const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
ns->msg_sent_count++;
}
/* at the moment, no need for local callbacks from model-net, so we maintain a
* count for debugging purposes */
static void handle_local_event(
svr_state * ns,
tw_bf * b,
......@@ -335,6 +440,88 @@ static void handle_local_event(
ns->local_recvd_count++;
}
/* handle recving ack
* for this simulation, we repeatedly ping the destination server until NUM_REQS
* of size PAYLOAD_SZ have been satisfied - we begin the next req when we
* receive an ACK from the destination server */
static void handle_ack_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
/* the ACK actually doesn't come from the NIC on the other server -
* model-net "hides" the NIC LP from us so we only see the original
* destination server */
/* safety check that this request got to the right server, both with our
* brute-force lp calculation and our more generic codes-mapping
* calculation */
assert(m->src == (lp->gid + offset)%(num_servers*2) &&
m->src == get_next_server(lp->gid));
if(ns->msg_sent_count < NUM_REQS)
{
/* again, allocate our own msgs so model-net can transmit on our behalf */
svr_msg m_local;
svr_msg m_remote;
m_local.svr_event_type = LOCAL;
m_local.src = lp->gid;
m_remote.svr_event_type = REQ;
m_remote.src = lp->gid;
/* send another request */
model_net_event(net_id, "test", m->src, PAYLOAD_SZ, sizeof(svr_msg),
(const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
ns->msg_sent_count++;
m->incremented_flag = 1;
}
else
{
/* threshold count reached, stop sending messages */
m->incremented_flag = 0;
}
return;
}
/* handle receiving request */
static void handle_req_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
svr_msg m_local;
svr_msg m_remote;
m_local.svr_event_type = LOCAL;
m_local.src = lp->gid;
m_remote.svr_event_type = ACK;
m_remote.src = lp->gid;
/* safety check that this request got to the right server */
assert(lp->gid == (m->src + offset)%(num_servers*2) &&
lp->gid == get_next_server(m->src));
ns->msg_recvd_count++;
/* send ack back */
/* simulated payload of 1 MiB */
/* also trigger a local event for completion of payload msg */
/* remote host will get an ack event */
model_net_event(net_id, "test", m->src, PAYLOAD_SZ, sizeof(svr_msg),
(const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
return;
}
/* for us, reverse events are very easy, the only LP state that needs to be
* rolled back are the counts.
* for more complex simulations, this will not be the case (e.g., state
* containing queues) */
static void handle_local_rev_event(
svr_state * ns,
tw_bf * b,
......@@ -351,6 +538,7 @@ static void handle_req_rev_event(
tw_lp * lp)
{
ns->msg_recvd_count--;
/* model-net has its own reverse computation support */
model_net_event_rc(net_id, lp, PAYLOAD_SZ);
return;
......@@ -385,71 +573,6 @@ static void handle_ack_rev_event(
return;
}
/* handle recving ack */
static void handle_ack_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
svr_msg * m_local = malloc(sizeof(svr_msg));
svr_msg * m_remote = malloc(sizeof(svr_msg));
m_local->svr_event_type = LOCAL;
m_local->src = lp->gid;
memcpy(m_remote, m_local, sizeof(svr_msg));
m_remote->svr_event_type = REQ;
/* safety check that this request got to the right server */
assert(m->src == (lp->gid + offset)%(num_servers*2));
if(ns->msg_sent_count < NUM_REQS)
{
/* send another request */
model_net_event(net_id, "test", m->src, PAYLOAD_SZ, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
ns->msg_sent_count++;
m->incremented_flag = 1;
}
else
{
/* threshold count reached, stop sending messages */
m->incremented_flag = 0;
}
return;
}
/* handle receiving request */
static void handle_req_event(
svr_state * ns,
tw_bf * b,
svr_msg * m,
tw_lp * lp)
{
svr_msg * m_local = malloc(sizeof(svr_msg));
svr_msg * m_remote = malloc(sizeof(svr_msg));
m_local->svr_event_type = LOCAL;
m_local->src = lp->gid;
memcpy(m_remote, m_local, sizeof(svr_msg));
m_remote->svr_event_type = ACK;
/* safety check that this request got to the right server */
assert(lp->gid == (m->src + offset)%(num_servers*2));
ns->msg_recvd_count++;
/* send ack back */
/* simulated payload of 1 MiB */
/* also trigger a local event for completion of payload msg */
/* remote host will get an ack event */
model_net_event(net_id, "test", m->src, PAYLOAD_SZ, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
return;
}
/*
* Local variables:
* c-indent-level: 4
......
# the LPGROUPS set is required by all simulations using codes. Multiple groups
# can be entered (only one is here for our example), each consisting of a set
# of application- and codes-specific key-value pairs.
LPGROUPS
{
# in our simulation, we simply have a set of servers, each with
# point-to-point access to each other
SERVERS
{
# required: number of times to repeat the following key-value pairs
repetitions="16";
# application-specific: parsed in main
server="1";
# model-net-specific field defining the network backend. In this example,
# each server has one NIC, and each server are point-to-point connected
modelnet_simplenet="1";
}
}
# required by CODES: miscellaneous parameters used in the simulation that
# don't fit in group definition.
PARAMS
{
# ROSS-specific parmeters:
# - message_size: ROSS expects you to upper bound your event message size.
# Going over this size will crash or otherwise destroy your
# simulation.
message_size="256";
# model-net-specific parameters:
# - individual packet sizes for network operations
# (each "packet" is represented by an event)
# - independent of underlying network being used
packet_size="512";
# - type of model to use (must match with corresponding LPGROUPS entry)
modelnet="simplenet";
# - model-specific parameters
net_startup_ns="1.5";
net_bw_mbps="20000";
}
# custom parameter sets can also be added - this one isn't used in the
# simulation, but is included for illustrative purposes
blah
{
param1="hello";
param2="goodbye";
param3="0.0001";
}
LPGROUPS
{
MODELNET_GRP
{
repetitions="16";
server="1";
modelnet_simplenet="1";
}
}
PARAMS
{
packet_size="512";
message_size="256";
modelnet="simplenet";
net_startup_ns="1.5";
net_bw_mbps="20000";
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment