Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Xin Wang
codes-dev
Commits
d603bf86
Commit
d603bf86
authored
Dec 13, 2013
by
Jonathan Jenkins
Browse files
Fleshing out example program, and other cleanups
parent
7b22be99
Changes
4
Hide whitespace changes
Inline
Side-by-side
doc/example/Makefile
View file @
d603bf86
...
...
@@ -16,15 +16,5 @@ LDLIBS = $(shell $(ROSS)/bin/ross-config --libs) -lcodes-net -lcodes-base
example
:
example.c
#$(CC) $(ROSS_CPPFLAGS) $(CPPFLAGS) $(CFLAGS) $(ROSS_LDFLAGS) $^ -o $@ -L$(ROSS)/lib -lROSS -lm -L$(CODESBASE)/lib -lcodes-base -L$(CODESNET)/lib -lcodes-net
check-env
:
ifndef
CODESBASE
$(
error
CODESBASE is undefined, see README.txt
)
endif
ifndef
CODESNET
$(error
CODESNET
is
undefined,
see
README.txt)
endif
ifndef
ROSS
$(error
ROSS
is
undefined,
see
README.txt)
endif
clean
:
rm
-f
example
doc/example/example.c
View file @
d603bf86
...
...
@@ -24,18 +24,22 @@
#include "codes/model-net.h"
#include "codes/lp-type-lookup.h"
#define NUM_REQS 500
/* number of requests sent by each server */
#define NUM_REQS 500
/* number of requests sent by each server */
#define PAYLOAD_SZ 2048
/* size of simulated data payload, bytes */
/* model-net ID, can be either simple-net, dragonfly or torus */
/* model-net ID, can be either simple-net, dragonfly or torus (more may be
* added) */
static
int
net_id
=
0
;
static
int
num_servers
=
0
;
static
int
offset
=
2
;
/* expected group name in configure files for this program */
static
char
*
group_name
=
"SERVERS"
;
typedef
struct
svr_msg
svr_msg
;
typedef
struct
svr_state
svr_state
;
/* types of events that will constitute
triton request
s */
/* types of events that will constitute
server activitie
s */
enum
svr_event
{
KICKOFF
,
/* initial event */
...
...
@@ -44,6 +48,9 @@ enum svr_event
LOCAL
/* local event */
};
/* this struct serves as the ***persistent*** state of the LP representing the
* server in question. This struct is setup when the LP initialization function
* ptr is called */
struct
svr_state
{
int
msg_sent_count
;
/* requests sent */
...
...
@@ -52,6 +59,8 @@ struct svr_state
tw_stime
start_ts
;
/* time that we started sending requests */
};
/* this struct serves as the ***temporary*** event data, which can be thought
* of as a message between two LPs. */
struct
svr_msg
{
enum
svr_event
svr_event_type
;
...
...
@@ -60,6 +69,12 @@ struct svr_msg
int
incremented_flag
;
/* helper for reverse computation */
};
/* ROSS expects four functions per LP:
* - an LP initialization function, called for each LP
* - an event processing function
* - a *reverse* event processing function (rollback), and
* - a finalization/cleanup function when the simulation ends
*/
static
void
svr_init
(
svr_state
*
ns
,
tw_lp
*
lp
);
...
...
@@ -77,6 +92,8 @@ static void svr_finalize(
svr_state
*
ns
,
tw_lp
*
lp
);
/* set up the function pointers for ROSS, as well as the size of the LP state
* structure (NOTE: ROSS is in charge of event and state (de-)allocation) */
tw_lptype
svr_lp
=
{
(
init_f
)
svr_init
,
(
event_f
)
svr_event
,
...
...
@@ -90,6 +107,9 @@ extern const tw_lptype* svr_get_lp_type();
static
void
svr_add_lp_type
();
static
tw_stime
ns_to_s
(
tw_stime
ns
);
static
tw_stime
s_to_ns
(
tw_stime
ns
);
/* as we only have a single event processing entry point and multiple event
* types, for clarity we define "handlers" for each (reverse) event type */
static
void
handle_kickoff_event
(
svr_state
*
ns
,
tw_bf
*
b
,
...
...
@@ -131,9 +151,22 @@ static void handle_req_rev_event(
svr_msg
*
m
,
tw_lp
*
lp
);
/* for this simulation, each server contacts its neighboring server in an id.
* this function shows how to use the codes_mapping API to calculate IDs when
* having to contend with multiple LP types and counts. Note that in this simple
* example codes_mapping is overkill. */
static
tw_lpid
get_next_server
(
tw_lpid
sender_id
);
/* arguments to be handled by ROSS - strings passed in are expected to be
* pre-allocated */
static
char
conf_file_name
[
256
]
=
{
0
};
/* this struct contains default parameters used by ROSS, as well as
* user-specific arguments to be handled by the ROSS config sys. Pass it in
* prior to calling tw_init */
const
tw_optdef
app_opt
[]
=
{
TWOPT_GROUP
(
"Model net test case"
),
TWOPT_CHAR
(
"codes-config"
,
conf_file_name
,
"name of codes configuration file"
),
TWOPT_END
()
};
...
...
@@ -143,35 +176,40 @@ int main(
{
int
nprocs
;
int
rank
;
/* TODO: explain why we need this (ROSS has cutoff??) */
g_tw_ts_end
=
s_to_ns
(
60
*
60
*
24
*
365
);
/* one year, in nsecs */
/* ROSS initialization function calls */
tw_opt_add
(
app_opt
);
tw_init
(
&
argc
,
&
argv
);
tw_opt_add
(
app_opt
);
/* add user-defined args */
/* initialize ROSS and parse args. NOTE: tw_init calls MPI_Init */
tw_init
(
&
argc
,
&
argv
);
if
(
argc
<
2
)
if
(
!
conf_file_name
[
0
])
{
printf
(
"
\n
Usage: mpirun <args> --sync=2/3 mapping_file_name.conf (optional --nkp)
"
);
MPI_Finalize
();
return
0
;
f
printf
(
stderr
,
"Expected
\"
codes-config
\"
option, please see --help.
\n
"
);
MPI_Finalize
();
return
1
;
}
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
nprocs
);
/* loading the config file of codes-mapping */
configuration_load
(
argv
[
2
],
MPI_COMM_WORLD
,
&
config
);
/* Setup the model-net parameters specified in the config file */
net_id
=
model_net_set_params
();
/* loading the config file into the codes-mapping utility, giving us the
* parsed config object in return.
* "config" is a global var defined by codes-mapping */
if
(
configuration_load
(
conf_file_name
,
MPI_COMM_WORLD
,
&
config
)){
fprintf
(
stderr
,
"Error loading config file %s.
\n
"
,
conf_file_name
);
MPI_Finalize
();
return
1
;
}
/* register the server LP type (model-net LP type is registered internally in model_net_set_params() */
svr_add_lp_type
();
/*Now setup codes mapping */
codes_mapping_setup
();
/*query codes mapping API*/
num_servers
=
codes_mapping_get_group_reps
(
"MODELNET_GRP"
)
*
codes_mapping_get_lp_count
(
"MODELNET_GRP"
,
"server"
);
/* Setup the model-net parameters specified in the global config object,
* returned is the identifier for the network type */
net_id
=
model_net_set_params
();
/* in this example, we are using simplenet, which simulates point to point
* communication between any two entities (other networks are trickier to
* setup). Hence: */
if
(
net_id
!=
SIMPLENET
)
{
printf
(
"
\n
The test works with simple-net configuration only! "
);
...
...
@@ -179,7 +217,23 @@ int main(
return
0
;
}
/* register the server LP type with codes-base
* (model-net LP type is registered internally in model_net_set_params() */
svr_add_lp_type
();
/* Setup takes the global config object, the registered LPs, and
* generates/places the LPs as specified in the configuration file.
* This should only be called after ALL LP types have been registered in
* codes */
codes_mapping_setup
();
/* calculate the number of servers in this simulation */
num_servers
=
codes_mapping_get_group_reps
(
group_name
)
*
codes_mapping_get_lp_count
(
group_name
,
"server"
);
/* begin simulation */
tw_run
();
/* model-net has the capability of outputting network transmission stats */
model_net_report_stats
(
net_id
);
tw_end
();
...
...
@@ -193,7 +247,9 @@ const tw_lptype* svr_get_lp_type()
static
void
svr_add_lp_type
()
{
lp_type_register
(
"server"
,
svr_get_lp_type
());
/* lp_type_register should be called exactly once per process per
* LP type */
lp_type_register
(
"server"
,
svr_get_lp_type
());
}
static
void
svr_init
(
...
...
@@ -213,14 +269,20 @@ static void svr_init(
/* skew each kickoff event slightly to help avoid event ties later on */
kickoff_time
=
g_tw_lookahead
+
tw_rand_unif
(
lp
->
rng
);
/* first create the event (time arg is an offset, not absolute time) */
e
=
codes_event_new
(
lp
->
gid
,
kickoff_time
,
lp
);
/* after event is created, grab the allocated message and set msg-specific
* data */
m
=
tw_event_data
(
e
);
m
->
svr_event_type
=
KICKOFF
;
/* event is ready to be processed, send it off */
tw_event_send
(
e
);
return
;
}
/* event processing entry point
* - simply forward the message to the appropriate handler */
static
void
svr_event
(
svr_state
*
ns
,
tw_bf
*
b
,
...
...
@@ -248,6 +310,8 @@ static void svr_event(
}
}
/* reverse event processing entry point
* - simply forward the message to the appropriate handler */
static
void
svr_rev_event
(
svr_state
*
ns
,
tw_bf
*
b
,
...
...
@@ -276,11 +340,12 @@ static void svr_rev_event(
return
;
}
/* once the simulation is over, do some output */
static
void
svr_finalize
(
svr_state
*
ns
,
tw_lp
*
lp
)
{
printf
(
"server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d
\n
"
,
(
unsigned
long
long
)
lp
->
gid
,
PAYLOAD_SZ
*
ns
->
msg_recvd_count
,
ns_to_s
((
tw_now
(
lp
)
-
ns
->
start_ts
)),
printf
(
"server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d
\n
"
,
(
unsigned
long
long
)
(
lp
->
gid
/
2
)
,
PAYLOAD_SZ
*
ns
->
msg_recvd_count
,
ns_to_s
((
tw_now
(
lp
)
-
ns
->
start_ts
)),
((
double
)(
PAYLOAD_SZ
*
NUM_REQS
)
/
(
double
)(
1024
*
1024
)
/
ns_to_s
(
tw_now
(
lp
)
-
ns
->
start_ts
)),
ns
->
msg_sent_count
,
ns
->
msg_recvd_count
,
ns
->
local_recvd_count
);
return
;
}
...
...
@@ -297,6 +362,28 @@ static tw_stime s_to_ns(tw_stime ns)
return
(
ns
*
(
1000
.
0
*
1000
.
0
*
1000
.
0
));
}
/* see declaration for more general info */
tw_lpid
get_next_server
(
tw_lpid
sender_id
)
{
tw_lpid
rtn_id
;
/* first, get callers LP and group info from codes-mapping. Caching this
* info in the LP struct isn't a bad idea for preventing a huge number of
* lookups */
char
grp_name
[
MAX_NAME_LENGTH
],
lp_type_name
[
MAX_NAME_LENGTH
];
int
lp_type_id
,
grp_id
,
grp_rep_id
,
offset
,
num_reps
;
int
dest_rep_id
;
codes_mapping_get_lp_info
(
sender_id
,
grp_name
,
&
grp_id
,
&
lp_type_id
,
lp_type_name
,
&
grp_rep_id
,
&
offset
);
/* in this example, we assume that, for our group of servers, each
* "repetition" consists of a single server/NIC pair. Hence, we grab the
* server ID for the next repetition, looping around if necessary */
num_reps
=
codes_mapping_get_group_reps
(
grp_name
);
dest_rep_id
=
(
grp_rep_id
+
1
)
%
num_reps
;
/* finally, get the server (exactly 1 server per rep -> offset w/in rep = 0 */
codes_mapping_get_lp_id
(
grp_name
,
lp_type_name
,
dest_rep_id
,
0
,
&
rtn_id
);
return
rtn_id
;
}
/* handle initial event */
static
void
handle_kickoff_event
(
svr_state
*
ns
,
...
...
@@ -304,28 +391,46 @@ static void handle_kickoff_event(
svr_msg
*
m
,
tw_lp
*
lp
)
{
svr_msg
*
m_local
=
malloc
(
sizeof
(
svr_msg
));
svr_msg
*
m_remote
=
malloc
(
sizeof
(
svr_msg
));
/* we allocate a local message and a remote message both */
m_local
->
svr_event_type
=
LOCAL
;
m_local
->
src
=
lp
->
gid
;
memcpy
(
m_remote
,
m_local
,
sizeof
(
svr_msg
));
m_remote
->
svr_event_type
=
REQ
;
int
dest_id
;
int
use_brute_force_map
=
0
;
/* normally, when using ROSS, events are allocated as a result of the event
* creation process. However, since we are now asking model-net to
* communicate with an entity on our behalf, we need to generate both the
* message to the recipient and an optional callback message
* - thankfully, memory need not persist past the model_net_event call - it
* copies the messages */
svr_msg
m_local
;
svr_msg
m_remote
;
m_local
.
svr_event_type
=
LOCAL
;
m_local
.
src
=
lp
->
gid
;
m_remote
.
svr_event_type
=
REQ
;
m_remote
.
src
=
lp
->
gid
;
/* record when transfers started on this server */
ns
->
start_ts
=
tw_now
(
lp
);
/* each server sends a request to the next highest server */
int
dest_id
=
(
lp
->
gid
+
offset
)
%
(
num_servers
*
2
+
num_routers
);
/* each server sends a request to the next highest server
* In this simulation, LP determination is simple: LPs are assigned
* round robin as in serv_1, net_1, serv_2, net_2, etc.
* However, that may not always be the case, so we also show a more
* complicated way to map through codes_mapping */
if
(
use_brute_force_map
)
dest_id
=
(
lp
->
gid
+
offset
)
%
(
num_servers
*
2
);
else
{
dest_id
=
get_next_server
(
lp
->
gid
);
}
/*model-net needs to know about (1) higher-level destination LP which is a neighboring server in this case
/*
model-net needs to know about (1) higher-level destination LP which is a neighboring server in this case
* (2) struct and size of remote message and (3) struct and size of local message (a local message can be null) */
model_net_event
(
net_id
,
"test"
,
dest_id
,
PAYLOAD_SZ
,
sizeof
(
svr_msg
),
(
const
void
*
)
m_remote
,
sizeof
(
svr_msg
),
(
const
void
*
)
m_local
,
lp
);
model_net_event
(
net_id
,
"test"
,
dest_id
,
PAYLOAD_SZ
,
sizeof
(
svr_msg
),
(
const
void
*
)
&
m_remote
,
sizeof
(
svr_msg
),
(
const
void
*
)
&
m_local
,
lp
);
ns
->
msg_sent_count
++
;
}
/* at the moment, no need for local callbacks from model-net, so we maintain a
* count for debugging purposes */
static
void
handle_local_event
(
svr_state
*
ns
,
tw_bf
*
b
,
...
...
@@ -335,6 +440,88 @@ static void handle_local_event(
ns
->
local_recvd_count
++
;
}
/* handle recving ack
* for this simulation, we repeatedly ping the destination server until NUM_REQS
* of size PAYLOAD_SZ have been satisfied - we begin the next req when we
* receive an ACK from the destination server */
static
void
handle_ack_event
(
svr_state
*
ns
,
tw_bf
*
b
,
svr_msg
*
m
,
tw_lp
*
lp
)
{
/* the ACK actually doesn't come from the NIC on the other server -
* model-net "hides" the NIC LP from us so we only see the original
* destination server */
/* safety check that this request got to the right server, both with our
* brute-force lp calculation and our more generic codes-mapping
* calculation */
assert
(
m
->
src
==
(
lp
->
gid
+
offset
)
%
(
num_servers
*
2
)
&&
m
->
src
==
get_next_server
(
lp
->
gid
));
if
(
ns
->
msg_sent_count
<
NUM_REQS
)
{
/* again, allocate our own msgs so model-net can transmit on our behalf */
svr_msg
m_local
;
svr_msg
m_remote
;
m_local
.
svr_event_type
=
LOCAL
;
m_local
.
src
=
lp
->
gid
;
m_remote
.
svr_event_type
=
REQ
;
m_remote
.
src
=
lp
->
gid
;
/* send another request */
model_net_event
(
net_id
,
"test"
,
m
->
src
,
PAYLOAD_SZ
,
sizeof
(
svr_msg
),
(
const
void
*
)
&
m_remote
,
sizeof
(
svr_msg
),
(
const
void
*
)
&
m_local
,
lp
);
ns
->
msg_sent_count
++
;
m
->
incremented_flag
=
1
;
}
else
{
/* threshold count reached, stop sending messages */
m
->
incremented_flag
=
0
;
}
return
;
}
/* handle receiving request */
static
void
handle_req_event
(
svr_state
*
ns
,
tw_bf
*
b
,
svr_msg
*
m
,
tw_lp
*
lp
)
{
svr_msg
m_local
;
svr_msg
m_remote
;
m_local
.
svr_event_type
=
LOCAL
;
m_local
.
src
=
lp
->
gid
;
m_remote
.
svr_event_type
=
ACK
;
m_remote
.
src
=
lp
->
gid
;
/* safety check that this request got to the right server */
assert
(
lp
->
gid
==
(
m
->
src
+
offset
)
%
(
num_servers
*
2
)
&&
lp
->
gid
==
get_next_server
(
m
->
src
));
ns
->
msg_recvd_count
++
;
/* send ack back */
/* simulated payload of 1 MiB */
/* also trigger a local event for completion of payload msg */
/* remote host will get an ack event */
model_net_event
(
net_id
,
"test"
,
m
->
src
,
PAYLOAD_SZ
,
sizeof
(
svr_msg
),
(
const
void
*
)
&
m_remote
,
sizeof
(
svr_msg
),
(
const
void
*
)
&
m_local
,
lp
);
return
;
}
/* for us, reverse events are very easy, the only LP state that needs to be
* rolled back are the counts.
* for more complex simulations, this will not be the case (e.g., state
* containing queues) */
static
void
handle_local_rev_event
(
svr_state
*
ns
,
tw_bf
*
b
,
...
...
@@ -351,6 +538,7 @@ static void handle_req_rev_event(
tw_lp
*
lp
)
{
ns
->
msg_recvd_count
--
;
/* model-net has its own reverse computation support */
model_net_event_rc
(
net_id
,
lp
,
PAYLOAD_SZ
);
return
;
...
...
@@ -385,71 +573,6 @@ static void handle_ack_rev_event(
return
;
}
/* handle recving ack */
static
void
handle_ack_event
(
svr_state
*
ns
,
tw_bf
*
b
,
svr_msg
*
m
,
tw_lp
*
lp
)
{
svr_msg
*
m_local
=
malloc
(
sizeof
(
svr_msg
));
svr_msg
*
m_remote
=
malloc
(
sizeof
(
svr_msg
));
m_local
->
svr_event_type
=
LOCAL
;
m_local
->
src
=
lp
->
gid
;
memcpy
(
m_remote
,
m_local
,
sizeof
(
svr_msg
));
m_remote
->
svr_event_type
=
REQ
;
/* safety check that this request got to the right server */
assert
(
m
->
src
==
(
lp
->
gid
+
offset
)
%
(
num_servers
*
2
));
if
(
ns
->
msg_sent_count
<
NUM_REQS
)
{
/* send another request */
model_net_event
(
net_id
,
"test"
,
m
->
src
,
PAYLOAD_SZ
,
sizeof
(
svr_msg
),
(
const
void
*
)
m_remote
,
sizeof
(
svr_msg
),
(
const
void
*
)
m_local
,
lp
);
ns
->
msg_sent_count
++
;
m
->
incremented_flag
=
1
;
}
else
{
/* threshold count reached, stop sending messages */
m
->
incremented_flag
=
0
;
}
return
;
}
/* handle receiving request */
static
void
handle_req_event
(
svr_state
*
ns
,
tw_bf
*
b
,
svr_msg
*
m
,
tw_lp
*
lp
)
{
svr_msg
*
m_local
=
malloc
(
sizeof
(
svr_msg
));
svr_msg
*
m_remote
=
malloc
(
sizeof
(
svr_msg
));
m_local
->
svr_event_type
=
LOCAL
;
m_local
->
src
=
lp
->
gid
;
memcpy
(
m_remote
,
m_local
,
sizeof
(
svr_msg
));
m_remote
->
svr_event_type
=
ACK
;
/* safety check that this request got to the right server */
assert
(
lp
->
gid
==
(
m
->
src
+
offset
)
%
(
num_servers
*
2
));
ns
->
msg_recvd_count
++
;
/* send ack back */
/* simulated payload of 1 MiB */
/* also trigger a local event for completion of payload msg */
/* remote host will get an ack event */
model_net_event
(
net_id
,
"test"
,
m
->
src
,
PAYLOAD_SZ
,
sizeof
(
svr_msg
),
(
const
void
*
)
m_remote
,
sizeof
(
svr_msg
),
(
const
void
*
)
m_local
,
lp
);
return
;
}
/*
* Local variables:
* c-indent-level: 4
...
...
doc/example/example.conf
0 → 100644
View file @
d603bf86
# the LPGROUPS set is required by all simulations using codes. Multiple groups
# can be entered (only one is here for our example), each consisting of a set
# of application- and codes-specific key-value pairs.
LPGROUPS
{
# in our simulation, we simply have a set of servers, each with
# point-to-point access to each other
SERVERS
{
# required: number of times to repeat the following key-value pairs
repetitions
=
"16"
;
# application-specific: parsed in main
server
=
"1"
;
# model-net-specific field defining the network backend. In this example,
# each server has one NIC, and each server are point-to-point connected
modelnet_simplenet
=
"1"
;
}
}
# required by CODES: miscellaneous parameters used in the simulation that
# don't fit in group definition.
PARAMS
{
# ROSS-specific parmeters:
# - message_size: ROSS expects you to upper bound your event message size.
# Going over this size will crash or otherwise destroy your
# simulation.
message_size
=
"256"
;
# model-net-specific parameters:
# - individual packet sizes for network operations
# (each "packet" is represented by an event)
# - independent of underlying network being used
packet_size
=
"512"
;
# - type of model to use (must match with corresponding LPGROUPS entry)
modelnet
=
"simplenet"
;
# - model-specific parameters
net_startup_ns
=
"1.5"
;
net_bw_mbps
=
"20000"
;
}
# custom parameter sets can also be added - this one isn't used in the
# simulation, but is included for illustrative purposes
blah
{
param1
=
"hello"
;
param2
=
"goodbye"
;
param3
=
"0.0001"
;
}
doc/example/modelnet-test.conf
deleted
100644 → 0
View file @
7b22be99
LPGROUPS
{
MODELNET_GRP
{
repetitions
=
"16"
;
server
=
"1"
;
modelnet_simplenet
=
"1"
;
}
}
PARAMS
{
packet_size
=
"512"
;
message_size
=
"256"
;
modelnet
=
"simplenet"
;
net_startup_ns
=
"1.5"
;
net_bw_mbps
=
"20000"
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment