Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Xin Wang
codes-dev
Commits
8a2a0fa3
Commit
8a2a0fa3
authored
Sep 28, 2015
by
Shane Snyder
Browse files
update checkpoint workload to run for whole iters
parent
1e7328ef
Changes
3
Hide whitespace changes
Inline
Side-by-side
codes/codes-workload.h
View file @
8a2a0fa3
...
...
@@ -72,7 +72,7 @@ struct checkpoint_wrkld_params
int
nprocs
;
/* number of workload processes */
double
checkpoint_sz
;
/* size of checkpoint, in TiB */
double
checkpoint_wr_bw
;
/* checkpoint write b/w, in GiB/s */
double
app_runtime
;
/* app runtime, in hour
s */
int
total_checkpoints
;
/* total number of checkpoint phase
s */
double
mtti
;
/* mean time to interrupt, in hours */
};
...
...
src/workload/codes-workload-dump.c
View file @
8a2a0fa3
...
...
@@ -34,7 +34,7 @@ static struct option long_opts[] =
{
"dumpi-log"
,
required_argument
,
NULL
,
'w'
},
{
"chkpoint-size"
,
required_argument
,
NULL
,
'S'
},
{
"chkpoint-bw"
,
required_argument
,
NULL
,
'B'
},
{
"chkpoint-
runtime
"
,
required_argument
,
NULL
,
'
R
'
},
{
"chkpoint-
iters
"
,
required_argument
,
NULL
,
'
i
'
},
{
"chkpoint-mtti"
,
required_argument
,
NULL
,
'M'
},
{
"iomock-request-type"
,
required_argument
,
NULL
,
'Q'
},
{
"iomock-num-requests"
,
required_argument
,
NULL
,
'N'
},
...
...
@@ -64,7 +64,7 @@ void usage(){
"CHECKPOINT OPTIONS (checkpoint_io_workload)
\n
"
"--chkpoint-size: size of aggregate checkpoint to write
\n
"
"--chkpoint-bw: checkpointing bandwidth
\n
"
"--chkpoint-
runtime: desired applic
ation
r
unt
ime
\n
"
"--chkpoint-
iters: iter
ation
co
unt
for checkpoint workload
\n
"
"--chkpoint-mtti: mean time to interrupt
\n
"
"MOCK IO OPTIONS (iomock_workload)
\n
"
"--iomock-request-type: whether to write or read
\n
"
...
...
@@ -162,8 +162,8 @@ int main(int argc, char *argv[])
case
'B'
:
c_params
.
checkpoint_wr_bw
=
atof
(
optarg
);
break
;
case
'
R
'
:
c_params
.
app_runtime
=
ato
f
(
optarg
);
case
'
i
'
:
c_params
.
total_checkpoints
=
ato
i
(
optarg
);
break
;
case
'M'
:
c_params
.
mtti
=
atof
(
optarg
);
...
...
@@ -272,7 +272,7 @@ int main(int argc, char *argv[])
else
if
(
strcmp
(
type
,
"checkpoint_io_workload"
)
==
0
)
{
if
(
c_params
.
checkpoint_sz
==
0
||
c_params
.
checkpoint_wr_bw
==
0
||
c_params
.
app_runtime
==
0
||
c_params
.
mtti
==
0
)
c_params
.
total_checkpoints
==
0
||
c_params
.
mtti
==
0
)
{
fprintf
(
stderr
,
"All checkpoint workload arguments are required
\n
"
);
usage
();
...
...
src/workload/methods/codes-checkpoint-wrkld.c
View file @
8a2a0fa3
...
...
@@ -49,11 +49,11 @@ struct checkpoint_state
/* how much this rank contributes to checkpoint (bytes) */
long
long
io_per_checkpoint
;
/* which checkpointing iteration are we on */
int
checkpoint
_number
;
int
cur_
checkpoint
;
/* how much we have checkpointed to file in current iteration (bytes) */
long
long
cur_checkpoint_sz
;
/*
how many rema
ining iterations
of
compute
/
checkpoint
phases are there
*/
int
remaining_iteration
s
;
/*
the total number of checkpo
in
t
ing iterations
(
compute
+
checkpoint
) to run
*/
int
total_checkpoint
s
;
struct
qhash_head
hash_link
;
};
...
...
@@ -94,8 +94,8 @@ static void * checkpoint_workload_read_config(
"checkpoint_wr_bw"
,
annotation
,
&
p
->
checkpoint_wr_bw
);
assert
(
!
rc
);
rc
=
configuration_get_value_
double
(
&
config
,
section_name
,
"
app_run_time
"
,
annotation
,
&
p
->
app_runtime
);
rc
=
configuration_get_value_
int
(
&
config
,
section_name
,
"
total_checkpoints
"
,
annotation
,
&
p
->
total_checkpoints
);
assert
(
!
rc
);
rc
=
configuration_get_value_double
(
&
config
,
section_name
,
"mtti"
,
...
...
@@ -112,7 +112,6 @@ static int checkpoint_workload_load(const char* params, int app_id, int rank)
checkpoint_wrkld_params
*
c_params
=
(
checkpoint_wrkld_params
*
)
params
;
struct
checkpoint_state
*
new_state
;
double
checkpoint_wr_time
;
double
checkpoint_phase_time
;
struct
checkpoint_id
this_chkpoint_id
;
if
(
!
c_params
)
...
...
@@ -136,7 +135,8 @@ static int checkpoint_workload_load(const char* params, int app_id, int rank)
new_state
->
rank
=
rank
;
new_state
->
app_id
=
app_id
;
new_state
->
status
=
CHECKPOINT_COMPUTE
;
new_state
->
checkpoint_number
=
0
;
new_state
->
cur_checkpoint
=
1
;
new_state
->
total_checkpoints
=
c_params
->
total_checkpoints
;
/* calculate the time (in seconds) taken to write the checkpoint to file */
checkpoint_wr_time
=
(
c_params
->
checkpoint_sz
*
1024
)
/* checkpoint size (GiB) */
...
...
@@ -154,13 +154,6 @@ static int checkpoint_workload_load(const char* params, int app_id, int rank)
new_state
->
io_per_checkpoint
=
(
c_params
->
checkpoint_sz
*
pow
(
1024
,
4
))
/
c_params
->
nprocs
;
/* calculate how many iterations based on how long the app should run for
* and how long it takes to compute + checkpoint the file
*/
checkpoint_phase_time
=
checkpoint_wr_time
+
new_state
->
checkpoint_interval
;
new_state
->
remaining_iterations
=
round
(
c_params
->
app_runtime
/
(
checkpoint_phase_time
/
60
/
60
));
/* add state for this checkpoint to hash table */
this_chkpoint_id
.
rank
=
rank
;
this_chkpoint_id
.
app_id
=
app_id
;
...
...
@@ -215,7 +208,7 @@ static void checkpoint_workload_get_next(int app_id, int rank, struct codes_work
/* set open parameters */
op
->
op_type
=
CODES_WK_OPEN
;
op
->
u
.
open
.
file_id
=
this_state
->
checkpoint
_number
;
op
->
u
.
open
.
file_id
=
this_state
->
cur_
checkpoint
;
op
->
u
.
open
.
create_flag
=
1
;
/* set the next status */
...
...
@@ -231,7 +224,7 @@ static void checkpoint_workload_get_next(int app_id, int rank, struct codes_work
/* set write parameters */
op
->
op_type
=
CODES_WK_WRITE
;
op
->
u
.
write
.
file_id
=
this_state
->
checkpoint
_number
;
op
->
u
.
write
.
file_id
=
this_state
->
cur_
checkpoint
;
op
->
u
.
write
.
offset
=
this_state
->
cur_checkpoint_sz
;
if
(
remaining
>=
DEFAULT_WR_BUF_SIZE
)
op
->
u
.
write
.
size
=
DEFAULT_WR_BUF_SIZE
;
...
...
@@ -253,15 +246,14 @@ static void checkpoint_workload_get_next(int app_id, int rank, struct codes_work
/* set close parameters */
op
->
op_type
=
CODES_WK_CLOSE
;
op
->
u
.
close
.
file_id
=
this_state
->
checkpoint
_number
;
op
->
u
.
close
.
file_id
=
this_state
->
cur_
checkpoint
;
/* set the next status -- if there are more iterations to
* be completed, start the next compute/checkpoint phase;
* otherwise, end the workload
*/
this_state
->
remaining_iterations
--
;
this_state
->
checkpoint_number
++
;
if
(
this_state
->
remaining_iterations
==
0
)
this_state
->
cur_checkpoint
++
;
if
(
this_state
->
cur_checkpoint
>
this_state
->
total_checkpoints
)
{
this_state
->
status
=
CHECKPOINT_INACTIVE
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment