Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
ssg
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
19
Issues
19
List
Boards
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
sds
ssg
Commits
d76bef34
Commit
d76bef34
authored
Oct 08, 2018
by
Shane Snyder
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dynamic leaves working + more test infrastructure
parent
3c2ab846
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
666 additions
and
206 deletions
+666
-206
include/ssg.h
include/ssg.h
+2
-1
src/ssg-internal.h
src/ssg-internal.h
+7
-2
src/ssg-rpc.c
src/ssg-rpc.c
+133
-58
src/ssg.c
src/ssg.c
+311
-99
src/swim-fd/swim-fd.c
src/swim-fd/swim-fd.c
+4
-2
src/swim-fd/swim-fd.h
src/swim-fd/swim-fd.h
+5
-1
tests/Makefile.subdir
tests/Makefile.subdir
+3
-3
tests/join-leave-group.sh
tests/join-leave-group.sh
+5
-8
tests/simple-group.sh
tests/simple-group.sh
+0
-1
tests/ssg-join-leave-group.c
tests/ssg-join-leave-group.c
+176
-0
tests/ssg-launch-group.c
tests/ssg-launch-group.c
+19
-11
tests/test-util.sh
tests/test-util.sh
+1
-20
No files found.
include/ssg.h
View file @
d76bef34
...
...
@@ -141,7 +141,8 @@ int ssg_group_destroy(
* @param[in] update_cb_dat User data pointer passed to membership update callback
* @returns SSG group identifier for joined group on success, SSG_GROUP_ID_NULL otherwise
*
* NOTE: XXX in and out group ids
* NOTE: Use the returned group ID to refer to the group, as the input group ID
* becomes stale after the join is completed.
*/
ssg_group_id_t
ssg_group_join
(
ssg_group_id_t
in_group_id
,
...
...
src/ssg-internal.h
View file @
d76bef34
...
...
@@ -48,7 +48,6 @@ extern "C" {
} while(0)
/* debug printing macro for SSG */
/* TODO: direct debug output to file? */
/* TODO: how do we debug attachers? */
#ifdef DEBUG
#define SSG_DEBUG(__g, __fmt, ...) do { \
...
...
@@ -103,6 +102,7 @@ typedef struct ssg_group
ssg_member_id_t
self_id
;
ssg_group_view_t
view
;
ssg_group_target_list_t
target_list
;
ssg_member_state_t
*
dead_members
;
ssg_group_descriptor_t
*
descriptor
;
swim_context_t
*
swim_ctx
;
ABT_rwlock
lock
;
...
...
@@ -128,6 +128,7 @@ typedef struct ssg_instance
margo_instance_id
mid
;
ssg_group_t
*
group_table
;
ssg_attached_group_t
*
attached_group_table
;
ABT_rwlock
lock
;
}
ssg_instance_t
;
enum
ssg_group_descriptor_owner_status
...
...
@@ -154,9 +155,14 @@ void ssg_register_rpcs(
void
);
int
ssg_group_join_send
(
ssg_group_descriptor_t
*
group_descriptor
,
hg_addr_t
group_target_addr
,
char
**
group_name
,
int
*
group_size
,
void
**
view_buf
);
int
ssg_group_leave_send
(
ssg_group_descriptor_t
*
group_descriptor
,
ssg_member_id_t
self_id
,
hg_addr_t
group_target_addr
);
int
ssg_group_attach_send
(
ssg_group_descriptor_t
*
group_descriptor
,
char
**
group_name
,
...
...
@@ -167,7 +173,6 @@ void ssg_apply_swim_user_updates(
swim_user_update_t
*
updates
,
hg_size_t
update_count
);
/* XXX: is this right? can this be a global? */
extern
ssg_instance_t
*
ssg_inst
;
#ifdef __cplusplus
...
...
src/ssg-rpc.c
View file @
d76bef34
This diff is collapsed.
Click to expand it.
src/ssg.c
View file @
d76bef34
This diff is collapsed.
Click to expand it.
src/swim-fd/swim-fd.c
View file @
d76bef34
...
...
@@ -193,6 +193,7 @@ static void swim_tick_ult(
/* sleep for an RTT and wait for an ack for this dping req */
margo_thread_sleep
(
swim_ctx
->
mid
,
swim_ctx
->
dping_timeout
);
#if 0
/* if we don't hear back from the target after an RTT, kick off
* a set of indirect pings to a subgroup of group members
*/
...
...
@@ -222,6 +223,7 @@ static void swim_tick_ult(
}
}
}
#endif
return
;
}
...
...
@@ -231,6 +233,8 @@ void swim_finalize(swim_context_t *swim_ctx)
/* set shutdown flag so ULTs know to start wrapping up */
swim_ctx
->
shutdown_flag
=
1
;
SWIM_DEBUG
(
swim_ctx
,
"GOT SHUTDOWN
\n
"
);
/* XXX free lists, etc. */
if
(
swim_ctx
->
prot_thread
)
...
...
@@ -398,8 +402,6 @@ void swim_register_user_update(
/* add to recent update list */
LL_APPEND
(
*
user_update_list
,
update_link
);
SWIM_DEBUG
(
swim_ctx
,
"REGISTERED UPDATE *******************
\n
"
);
return
;
}
...
...
src/swim-fd/swim-fd.h
View file @
d76bef34
...
...
@@ -47,10 +47,14 @@ typedef struct swim_user_update
void
*
data
;
}
swim_user_update_t
;
#define SWIM_MEMBER_S
TATE_INIT
(__ms) do { \
#define SWIM_MEMBER_S
ET_ALIVE
(__ms) do { \
__ms.inc_nr = 0; \
__ms.status = SWIM_MEMBER_ALIVE; \
} while(0)
#define SWIM_MEMBER_SET_DEAD(__ms) do { \
__ms.status = SWIM_MEMBER_DEAD; \
} while(0)
#define SWIM_MEMBER_IS_DEAD(__ms) (__ms.status == SWIM_MEMBER_DEAD)
/* SWIM callbacks for integrating with an overlying group management layer */
typedef
struct
swim_group_mgmt_callbacks
...
...
tests/Makefile.subdir
View file @
d76bef34
...
...
@@ -6,15 +6,15 @@ TESTS_ENVIRONMENT += \
check_PROGRAMS
+=
\
tests/ssg-launch-group
\
tests/ssg-join-group
tests/ssg-join-
leave-
group
TESTS
+=
\
tests/simple-group.sh
\
tests/join-group.sh
tests/join-
leave-
group.sh
EXTRA_DIST
+=
\
tests/simple-group.sh
\
tests/join-group.sh
tests/join-
leave-
group.sh
check_PROGRAMS
+=
tests/perf-regression/margo-p2p-latency
tests_perf_regression_margo_p2p_latency_LDADD
=
src/libssg.la
...
...
tests/join-group.sh
→
tests/join-
leave-
group.sh
View file @
d76bef34
...
...
@@ -8,11 +8,10 @@ source $srcdir/tests/test-util.sh
TMPOUT
=
$(
$MKTEMP
-d
--tmpdir
test-XXXXXX
)
#
export SSG_DEBUG_LOGDIR=$TMPOUT
export
SSG_DEBUG_LOGDIR
=
$TMPOUT
# launch initial group, storing GID
export
SSG_GROUP_LAUNCH_NAME
=
simplest-group
export
SSG_GROUP_LAUNCH_DURATION
=
10
export
SSG_GROUP_LAUNCH_DURATION
=
30
export
SSG_GROUP_LAUNCH_GIDFILE
=
gid.out
launch_ssg_group_mpi 4 na+sm &
if
[
$?
-ne
0
]
;
then
...
...
@@ -21,11 +20,9 @@ if [ $? -ne 0 ]; then
exit
1
fi
sleep
2
sleep
5
# try to join running group
export
SSG_GROUP_LAUNCH_DURATION
=
8
join_ssg_group na+sm
$SSG_GROUP_LAUNCH_GIDFILE
&
tests/ssg-join-leave-group
-s
25
-l
10 na+sm
$SSG_GROUP_LAUNCH_GIDFILE
&
if
[
$?
-ne
0
]
;
then
wait
rm
-rf
$TMPOUT
...
...
@@ -38,5 +35,5 @@ if [ $? -ne 0 ]; then
exit
1
fi
#
rm -rf $TMPOUT
rm
-rf
$TMPOUT
exit
0
tests/simple-group.sh
View file @
d76bef34
...
...
@@ -7,7 +7,6 @@ fi
source
$srcdir
/tests/test-util.sh
# launch a group and wait for termination
export
SSG_GROUP_LAUNCH_NAME
=
simplest-group
export
SSG_GROUP_LAUNCH_DURATION
=
10
launch_ssg_group_mpi 4 na+sm &
if
[
$?
-ne
0
]
;
then
...
...
tests/ssg-join-group.c
→
tests/ssg-join-
leave-
group.c
View file @
d76bef34
...
...
@@ -22,10 +22,12 @@
} \
} while(0)
struct
group_join_opts
struct
group_join_
leave_
opts
{
int
join_time
;
int
leave_time
;
int
shutdown_time
;
char
*
addr_str
;
int
duration
;
char
*
gid_file
;
};
...
...
@@ -33,26 +35,45 @@ static void usage()
{
fprintf
(
stderr
,
"Usage: "
"ssg-join-group [OPTIONS] <ADDR> <GID>
\n
"
"Join an existing group given by GID using Mercury address ADDR.
\n
"
"ssg-join-
leave-
group [OPTIONS] <ADDR> <GID>
\n
"
"Join
, and potentially leave,
an existing group given by GID using Mercury address ADDR.
\n
"
"
\n
"
"OPTIONS:
\n
"
"
\t
-d DUR
\t\t
Specify a time duration (in seconds) to run the group for
\n
"
);
"
\t
-j TIME
\t\t
Specify a time (relative to program start, in seconds) to join the group [default=0]
\n
"
"
\t
-l TIME
\t\t
Specify a time (relative to program start, in seconds) to leave the group [default=never]
\n
"
"
\t
-s TIME
\t\t
Specify a time (relative to program start, in seconds) to shutdown [default=10]
\n
"
"NOTE: leave time must be after join time, and shutdown time must be after both join/leave times
\n
"
);
}
static
void
parse_args
(
int
argc
,
char
*
argv
[],
struct
group_join_opts
*
opts
)
static
void
parse_args
(
int
argc
,
char
*
argv
[],
struct
group_join_
leave_
opts
*
opts
)
{
int
c
;
const
char
*
options
=
"
d
:"
;
const
char
*
options
=
"
j:l:s
:"
;
char
*
check
=
NULL
;
while
((
c
=
getopt
(
argc
,
argv
,
options
))
!=
-
1
)
{
switch
(
c
)
{
case
'd'
:
opts
->
duration
=
(
int
)
strtol
(
optarg
,
&
check
,
0
);
if
(
opts
->
duration
<
0
||
(
check
&&
*
check
!=
'\0'
))
case
'j'
:
opts
->
join_time
=
(
int
)
strtol
(
optarg
,
&
check
,
0
);
if
(
opts
->
join_time
<
0
||
(
check
&&
*
check
!=
'\0'
))
{
usage
();
exit
(
EXIT_FAILURE
);
}
break
;
case
'l'
:
opts
->
leave_time
=
(
int
)
strtol
(
optarg
,
&
check
,
0
);
if
(
opts
->
leave_time
<
0
||
(
check
&&
*
check
!=
'\0'
))
{
usage
();
exit
(
EXIT_FAILURE
);
}
break
;
case
's'
:
opts
->
shutdown_time
=
(
int
)
strtol
(
optarg
,
&
check
,
0
);
if
(
opts
->
shutdown_time
<
0
||
(
check
&&
*
check
!=
'\0'
))
{
usage
();
exit
(
EXIT_FAILURE
);
...
...
@@ -70,6 +91,17 @@ static void parse_args(int argc, char *argv[], struct group_join_opts *opts)
exit
(
EXIT_FAILURE
);
}
if
((
opts
->
leave_time
>=
0
)
&&
(
opts
->
leave_time
<=
opts
->
join_time
))
{
usage
();
exit
(
EXIT_FAILURE
);
}
if
((
opts
->
shutdown_time
<=
opts
->
join_time
)
||
(
opts
->
shutdown_time
<=
opts
->
leave_time
))
{
usage
();
exit
(
EXIT_FAILURE
);
}
opts
->
addr_str
=
argv
[
optind
++
];
opts
->
gid_file
=
argv
[
optind
++
];
...
...
@@ -78,16 +110,16 @@ static void parse_args(int argc, char *argv[], struct group_join_opts *opts)
int
main
(
int
argc
,
char
*
argv
[])
{
struct
group_join_opts
opts
;
struct
group_join_
leave_
opts
opts
;
margo_instance_id
mid
=
MARGO_INSTANCE_NULL
;
ssg_group_id_t
in_g_id
=
SSG_GROUP_ID_NULL
;
ssg_group_id_t
out_g_id
=
SSG_GROUP_ID_NULL
;
ssg_member_id_t
my_id
;
int
group_size
;
int
sret
;
/* set any default options (that may be overwritten by cmd args) */
opts
.
duration
=
10
;
/* default to running for 10 seconds */
opts
.
join_time
=
0
;
/* join the group immediately */
opts
.
leave_time
=
-
1
;
/* default to never leaving group */
opts
.
shutdown_time
=
10
;
/* default to shutting down after 10 seconds */
/* parse cmdline arguments */
parse_args
(
argc
,
argv
,
&
opts
);
...
...
@@ -104,26 +136,38 @@ int main(int argc, char *argv[])
/* load GID from file */
ssg_group_id_load
(
opts
.
gid_file
,
&
in_g_id
);
/* sleep until time to join */
if
(
opts
.
join_time
>
0
)
margo_thread_sleep
(
mid
,
opts
.
join_time
*
1000
.
0
);
/* XXX do we want to use callback for testing anything about group??? */
out_g_id
=
ssg_group_join
(
in_g_id
,
NULL
,
NULL
);
DIE_IF
(
out_g_id
==
SSG_GROUP_ID_NULL
,
"ssg_group_join"
);
ssg_group_id_free
(
in_g_id
);
/* sleep for given duration to allow group time to run */
if
(
opts
.
duration
>
0
)
margo_thread_sleep
(
mid
,
opts
.
duration
*
1000
.
0
);
if
(
opts
.
leave_time
>
0
)
{
margo_thread_sleep
(
mid
,
(
opts
.
leave_time
-
opts
.
join_time
)
*
1000
.
0
);
/* dump group to see view prior to leaving */
ssg_group_dump
(
out_g_id
);
sret
=
ssg_group_leave
(
out_g_id
);
DIE_IF
(
sret
!=
SSG_SUCCESS
,
"ssg_group_leave"
);
goto
cleanup
;
}
/* get my group id and the size of the group */
my_id
=
ssg_get_group_self_id
(
out_g_id
);
DIE_IF
(
my_id
==
SSG_MEMBER_ID_INVALID
,
"ssg_get_group_self_id"
);
group_size
=
ssg_get_group_size
(
out_g_id
);
DIE_IF
(
group_size
==
0
,
"ssg_get_group_size"
);
printf
(
"group member %lu successfully created group (size == %d)
\n
"
,
my_id
,
group_size
);
if
(
opts
.
leave_time
>
0
)
margo_thread_sleep
(
mid
,
(
opts
.
shutdown_time
-
opts
.
leave_time
)
*
1000
.
0
);
else
margo_thread_sleep
(
mid
,
(
opts
.
shutdown_time
-
opts
.
join_time
)
*
1000
.
0
);
/* print group at each member */
ssg_group_dump
(
out_g_id
);
/** cleanup **/
cleanup:
ssg_group_destroy
(
out_g_id
);
ssg_finalize
();
margo_finalize
(
mid
);
...
...
tests/ssg-launch-group.c
View file @
d76bef34
...
...
@@ -33,7 +33,7 @@ struct group_launch_opts
char
*
addr_str
;
char
*
group_mode
;
char
*
group_addr_conf_file
;
int
duration
;
int
shutdown_time
;
char
*
gid_file
;
char
*
group_name
;
};
...
...
@@ -47,24 +47,24 @@ static void usage()
"NOTE: A path to an address CONFFILE is required when using
\"
conf
\"
mode.
\n
"
"
\n
"
"OPTIONS:
\n
"
"
\t
-
d DUR
\t\t
Specify a time duration (in seconds) to run the group for
\n
"
"
\t
-f
FILE
\t\t
Store group GID at a given file path
\n
"
"
\t
-n
NAME
\t\t
Specify the name of the launched group
\n
"
);
"
\t
-
s <TIME>
\t\t
Time duration (in seconds) to run the group before shutting down
\n
"
"
\t
-f
<FILE>
\t\t
File path to store group ID in
\n
"
"
\t
-n
<NAME>
\t\t
Name of the group to launch
\n
"
);
}
static
void
parse_args
(
int
argc
,
char
*
argv
[],
struct
group_launch_opts
*
opts
)
{
int
c
;
const
char
*
options
=
"
d
:f:n:"
;
const
char
*
options
=
"
s
:f:n:"
;
char
*
check
=
NULL
;
while
((
c
=
getopt
(
argc
,
argv
,
options
))
!=
-
1
)
{
switch
(
c
)
{
case
'
d
'
:
opts
->
duration
=
(
int
)
strtol
(
optarg
,
&
check
,
0
);
if
(
opts
->
duration
<
0
||
(
check
&&
*
check
!=
'\0'
))
case
'
s
'
:
opts
->
shutdown_time
=
(
int
)
strtol
(
optarg
,
&
check
,
0
);
if
(
opts
->
shutdown_time
<
0
||
(
check
&&
*
check
!=
'\0'
))
{
usage
();
exit
(
EXIT_FAILURE
);
...
...
@@ -92,6 +92,8 @@ static void parse_args(int argc, char *argv[], struct group_launch_opts *opts)
opts
->
group_mode
=
argv
[
optind
++
];
if
(
strcmp
(
opts
->
group_mode
,
"conf"
)
==
0
)
{
fprintf
(
stderr
,
"Error: configuration file mode not supported currently!
\n
"
);
exit
(
EXIT_FAILURE
);
if
((
argc
-
optind
)
!=
1
)
{
usage
();
...
...
@@ -131,7 +133,7 @@ int main(int argc, char *argv[])
int
sret
;
/* set any default options (that may be overwritten by cmd args) */
opts
.
duration
=
10
;
/* default to running group for 10 seconds */
opts
.
shutdown_time
=
10
;
/* default to running group for 10 seconds */
opts
.
group_name
=
"simple_group"
;
opts
.
gid_file
=
NULL
;
...
...
@@ -139,8 +141,13 @@ int main(int argc, char *argv[])
parse_args
(
argc
,
argv
,
&
opts
);
#ifdef SSG_HAVE_MPI
int
mpi_rank
,
mpi_size
;
if
(
strcmp
(
opts
.
group_mode
,
"mpi"
)
==
0
)
{
MPI_Init
(
&
argc
,
&
argv
);
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
mpi_rank
);
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
mpi_size
);
}
#endif
/* init margo */
...
...
@@ -167,7 +174,8 @@ int main(int argc, char *argv[])
ssg_group_id_store
(
opts
.
gid_file
,
g_id
);
/* sleep for given duration to allow group time to run */
if
(
opts
.
duration
>
0
)
margo_thread_sleep
(
mid
,
opts
.
duration
*
1000
.
0
);
if
(
opts
.
shutdown_time
>
0
)
margo_thread_sleep
(
mid
,
opts
.
shutdown_time
*
1000
.
0
);
/* get my group id and the size of the group */
my_id
=
ssg_get_group_self_id
(
g_id
);
...
...
@@ -179,9 +187,9 @@ int main(int argc, char *argv[])
/* print group at each member */
ssg_group_dump
(
g_id
);
ssg_group_destroy
(
g_id
);
/** cleanup **/
ssg_group_destroy
(
g_id
);
ssg_finalize
();
margo_finalize
(
mid
);
#ifdef SSG_HAVE_MPI
...
...
tests/test-util.sh
View file @
d76bef34
...
...
@@ -18,7 +18,7 @@ function launch_ssg_group_mpi ()
options
=
"
$options
-n
$SSG_GROUP_LAUNCH_NAME
"
fi
if
[
!
-z
$SSG_GROUP_LAUNCH_DURATION
]
;
then
options
=
"
$options
-
d
$SSG_GROUP_LAUNCH_DURATION
"
options
=
"
$options
-
s
$SSG_GROUP_LAUNCH_DURATION
"
fi
if
[
!
-z
$SSG_GROUP_LAUNCH_GIDFILE
]
;
then
options
=
"
$options
-f
$SSG_GROUP_LAUNCH_GIDFILE
"
...
...
@@ -27,22 +27,3 @@ function launch_ssg_group_mpi ()
# launch SSG group given options
mpirun
-np
$nmembers
tests/ssg-launch-group
$options
$hg_addr
mpi
}
function
join_ssg_group
()
{
hg_addr
=
${
1
:-
"na+sm"
}
gid_file
=
${
2
}
options
=
""
if
[
-z
"
$gid_file
"
]
;
then
echo
"Error: join_ssg_group requires a valid GID file argument"
exit
1
fi
# parse known cmdline options out of env
if
[
!
-z
$SSG_GROUP_LAUNCH_DURATION
]
;
then
options
=
"
$options
-d
$SSG_GROUP_LAUNCH_DURATION
"
fi
tests/ssg-join-group
$options
$hg_addr
$gid_file
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment