Commit 30bb3be1 authored by Misbah Mubarak's avatar Misbah Mubarak
Browse files

Merge branch 'darshan-3x-support' into 'master'

Darshan 3x support

See merge request !42
parents 96306239 24603a7f
......@@ -47,7 +47,7 @@ endif
if USE_DARSHAN
AM_CPPFLAGS += ${DARSHAN_CFLAGS} -DUSE_DARSHAN=1
src_libcodes_la_SOURCES += src/workload/methods/codes-darshan-io-wrkld.c
src_libcodes_la_SOURCES += src/workload/methods/codes-darshan3-io-wrkld.c
LDADD += ${DARSHAN_LIBS}
TESTS += tests/workload/darshan-dump.sh
endif
......
......@@ -57,7 +57,7 @@ struct iolang_params
struct darshan_params
{
char log_file_path[MAX_NAME_LENGTH_WKLD];
int64_t aggregator_cnt;
int app_cnt;
};
struct recorder_params
......@@ -146,7 +146,24 @@ enum codes_workload_op_type
/* for workloads that have events not yet handled
* (eg the workload language) */
CODES_WK_IGNORE
CODES_WK_IGNORE,
/* extended IO workload operations: MPI */
/* open */
CODES_WK_MPI_OPEN,
/* close */
CODES_WK_MPI_CLOSE,
/* write */
CODES_WK_MPI_WRITE,
/* read */
CODES_WK_MPI_READ,
/* collective open */
CODES_WK_MPI_COLL_OPEN,
/* collective_write */
CODES_WK_MPI_COLL_WRITE,
/* collective_read */
CODES_WK_MPI_COLL_READ,
};
/* I/O operation paramaters */
......@@ -157,7 +174,7 @@ struct codes_workload_op
*/
/* what type of operation this is */
int op_type;
enum codes_workload_op_type op_type;
/* currently only used by network workloads */
double start_time;
double end_time;
......@@ -313,6 +330,11 @@ void codes_workload_print_op(
int app_id,
int rank);
int codes_workload_get_time(const char *type,
const char * params,
int app_id,
int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes);
/* implementation structure */
struct codes_workload_method
{
......@@ -324,6 +346,8 @@ struct codes_workload_method
void (*codes_workload_get_next)(int app_id, int rank, struct codes_workload_op *op);
void (*codes_workload_get_next_rc2)(int app_id, int rank);
int (*codes_workload_get_rank_cnt)(const char* params, int app_id);
/* added for get all read or write time */
int (*codes_workload_get_time)(const char * params, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes);
};
......
......@@ -27,7 +27,6 @@ static struct option long_opts[] =
{"num-ranks", required_argument, NULL, 'n'},
{"start-rank", required_argument, NULL, 'r'},
{"d-log", required_argument, NULL, 'l'},
{"d-aggregator-cnt", required_argument, NULL, 'a'},
{"i-meta", required_argument, NULL, 'm'},
{"i-use-relpath", no_argument, NULL, 'p'},
{"r-trace-dir", required_argument, NULL, 'd'},
......@@ -53,7 +52,6 @@ void usage(){
"-s: print final workload stats\n"
"DARSHAN OPTIONS (darshan_io_workload)\n"
"--d-log: darshan log file\n"
"--d-aggregator-cnt: number of aggregators for collective I/O in darshan\n"
"IOLANG OPTIONS (iolang_workload)\n"
"--i-meta: i/o language kernel meta file path\n"
"--i-use-relpath: use i/o kernel path relative meta file path\n"
......@@ -133,9 +131,6 @@ int main(int argc, char *argv[])
case 'l':
strcpy(d_params.log_file_path, optarg);
break;
case 'a':
d_params.aggregator_cnt = atol(optarg);
break;
case 'm':
strcpy(i_params.io_kernel_meta_path, optarg);
break;
......@@ -206,11 +201,6 @@ int main(int argc, char *argv[])
usage();
return 1;
}
else if (d_params.aggregator_cnt == 0){
fprintf(stderr, "Expected \"--d-aggregator-cnt\" argument for darshan workload\n");
usage();
return 1;
}
else{
wparams = (char*)&d_params;
}
......@@ -294,6 +284,7 @@ int main(int argc, char *argv[])
/* if num_ranks not set, pull it from the workload */
if (n == -1){
//printf("Getting rank count\n");
n = codes_workload_get_rank_cnt(type, wparams, 0);
if (n == -1) {
fprintf(stderr,
......@@ -301,12 +292,17 @@ int main(int argc, char *argv[])
"Specify option --num-ranks\n");
return 1;
}
printf("rank count = %d\n", n);
}
for (i = start_rank ; i < start_rank+n; i++){
struct codes_workload_op op;
printf("loading %s, %d\n", type, i);
//printf("loading %s, %d\n", type, i);
int id = codes_workload_load(type, wparams, 0, i);
double total_read_time = 0.0, total_write_time = 0.0;
int64_t total_read_bytes = 0, total_written_bytes = 0;
codes_workload_get_time(type, wparams, 0, i, &total_read_time, &total_write_time, &total_read_bytes, &total_written_bytes);
printf("total_read_time = %f, total_write_time = %f\n", total_read_time, total_write_time);
assert(id != -1);
do {
codes_workload_get_next(id, 0, i, &op);
......@@ -321,16 +317,23 @@ int main(int argc, char *argv[])
num_barriers++;
break;
case CODES_WK_OPEN:
case CODES_WK_MPI_OPEN:
case CODES_WK_MPI_COLL_OPEN:
num_opens++;
break;
case CODES_WK_CLOSE:
case CODES_WK_MPI_CLOSE:
num_closes++;
break;
case CODES_WK_WRITE:
case CODES_WK_MPI_WRITE:
case CODES_WK_MPI_COLL_WRITE:
num_writes++;
write_size += op.u.write.size;
break;
case CODES_WK_READ:
case CODES_WK_MPI_READ:
case CODES_WK_MPI_COLL_READ:
num_reads++;
read_size += op.u.write.size;
break;
......@@ -392,11 +395,11 @@ int main(int argc, char *argv[])
{
if(i == 0)
{
int j;
printf("\n rank %d wait_all: ", i);
for(j = 0; j < op.u.waits.count; j++)
printf(" %d ", op.u.waits.req_ids[j]);
num_waitalls++;
int j;
printf("\n rank %d wait_all: ", i);
for(j = 0; j < op.u.waits.count; j++)
printf(" %d ", op.u.waits.req_ids[j]);
num_waitalls++;
}
}
break;
......
......@@ -13,14 +13,24 @@
/* list of available methods. These are statically compiled for now, but we
* could make generators optional via autoconf tests etc. if needed
*/
/* added by pj: differ POSIX and MPI IO in darshan 3.00*/
#define DARSHAN_POSIX_IO 1
#define DARSHAN_MPI_IO 0
extern struct codes_workload_method test_workload_method;
extern struct codes_workload_method iolang_workload_method;
#ifdef USE_DUMPI
extern struct codes_workload_method dumpi_trace_workload_method;
#endif
#ifdef USE_DARSHAN
extern struct codes_workload_method darshan_io_workload_method;
#if DARSHAN_POSIX_IO
extern struct codes_workload_method darshan_posix_io_workload_method;
#elif DARSHAN_MPI_IO
extern struct codes_workload_method darshan_mpi_io_workload_method;
#endif
#endif
#ifdef USE_RECORDER
extern struct codes_workload_method recorder_io_workload_method;
#endif
......@@ -34,8 +44,16 @@ static struct codes_workload_method const * method_array_default[] =
#ifdef USE_DUMPI
&dumpi_trace_workload_method,
#endif
#ifdef USE_DARSHAN
&darshan_io_workload_method,
/* added by pj: posix and mpi io */
#if DARSHAN_POSIX_IO
&darshan_posix_io_workload_method,
#elif DARNSHAN_MPI_IO
/* TODO: MPI_IO */
&darshan_mpi_io_workload_method,
#endif
#endif
#ifdef USE_RECORDER
&recorder_io_workload_method,
......@@ -264,15 +282,42 @@ void codes_workload_get_next_rc2(
method_array[wkld_id]->codes_workload_get_next_rc2(app_id, rank);
}
int codes_workload_get_time(const char *type, const char *params, int app_id,
int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes)
{
int i;
init_workload_methods();
//printf("entering rank count, method_array = %p \n", method_array);
for(i=0; method_array[i] != NULL; i++)
{
//printf("%p\n", method_array[i]);
//printf(" geting time:: method_array[%d]->method_name = %s, type = %s\n", i, method_array[i]->method_name, type);
if(strcmp(method_array[i]->method_name, type) == 0)
{
if (method_array[i]->codes_workload_get_time != NULL)
return method_array[i]->codes_workload_get_time(
params, app_id, rank, read_time, write_time, read_bytes, written_bytes);
else
return -1;
}
}
return 0;
}
int codes_workload_get_rank_cnt(
const char* type,
const char* params,
int app_id)
{
int i;
init_workload_methods();
//printf("entering rank count, method_array = %p \n", method_array);
for(i=0; method_array[i] != NULL; i++)
{
//printf("%p\n", method_array[i]);
//printf("method_array[%d]->method_name = %s, type = %s\n", i, method_array[i]->method_name, type);
if(strcmp(method_array[i]->method_name, type) == 0)
{
if (method_array[i]->codes_workload_get_rank_cnt != NULL)
......@@ -293,6 +338,8 @@ void codes_workload_print_op(
int app_id,
int rank)
{
char *name;
switch(op->op_type){
case CODES_WK_END:
fprintf(f, "op: app:%d rank:%d type:end\n", app_id, rank);
......@@ -306,28 +353,47 @@ void codes_workload_print_op(
app_id, rank, op->u.barrier.count, op->u.barrier.root);
break;
case CODES_WK_OPEN:
fprintf(f, "op: app:%d rank:%d type:open file_id:%llu flag:%d\n",
app_id, rank, LLU(op->u.open.file_id), op->u.open.create_flag);
case CODES_WK_MPI_OPEN:
case CODES_WK_MPI_COLL_OPEN:
if(op->op_type == CODES_WK_OPEN) name = "open";
if(op->op_type == CODES_WK_MPI_OPEN) name = "mpi_open";
if(op->op_type == CODES_WK_MPI_COLL_OPEN) name = "mpi_coll_open";
fprintf(f, "op: app:%d rank:%d type:%s file_id:%llu flag:%d\n",
app_id, rank, name, LLU(op->u.open.file_id), op->u.open.create_flag);
break;
case CODES_WK_CLOSE:
fprintf(f, "op: app:%d rank:%d type:close file_id:%llu\n",
app_id, rank, LLU(op->u.close.file_id));
case CODES_WK_MPI_CLOSE:
if(op->op_type == CODES_WK_CLOSE) name = "close";
if(op->op_type == CODES_WK_MPI_CLOSE) name = "mpi_close";
fprintf(f, "op: app:%d rank:%d type:%s file_id:%llu\n",
app_id, rank, name, LLU(op->u.close.file_id));
break;
case CODES_WK_WRITE:
fprintf(f, "op: app:%d rank:%d type:write "
case CODES_WK_MPI_WRITE:
case CODES_WK_MPI_COLL_WRITE:
if(op->op_type == CODES_WK_WRITE) name = "write";
if(op->op_type == CODES_WK_MPI_WRITE) name = "mpi_write";
if(op->op_type == CODES_WK_MPI_COLL_WRITE) name = "mpi_coll_write";
fprintf(f, "op: app:%d rank:%d type:%s "
"file_id:%llu off:%llu size:%llu\n",
app_id, rank, LLU(op->u.write.file_id), LLU(op->u.write.offset),
app_id, rank, name, LLU(op->u.write.file_id), LLU(op->u.write.offset),
LLU(op->u.write.size));
break;
case CODES_WK_READ:
fprintf(f, "op: app:%d rank:%d type:read "
case CODES_WK_MPI_READ:
case CODES_WK_MPI_COLL_READ:
if(op->op_type == CODES_WK_READ) name = "read";
if(op->op_type == CODES_WK_MPI_READ) name = "mpi_read";
if(op->op_type == CODES_WK_MPI_COLL_READ) name = "mpi_coll_read";
fprintf(f, "op: app:%d rank:%d type:%s "
"file_id:%llu off:%llu size:%llu\n",
app_id, rank, LLU(op->u.read.file_id), LLU(op->u.read.offset),
app_id, rank, name, LLU(op->u.read.file_id), LLU(op->u.read.offset),
LLU(op->u.read.size));
break;
case CODES_WK_SEND:
fprintf(f, "op: app:%d rank:%d type:send "
"src:%d dst:%d bytes:%"PRIu64" type:%d count:%d tag:%d "
"src:%d dst:%d bytes:%d type:%d count:%d tag:%d "
"start:%.5e end:%.5e\n",
app_id, rank,
op->u.send.source_rank, op->u.send.dest_rank,
......
/*
* Copyright (C) 2013 University of Chicago.
* See COPYRIGHT notice in top-level directory.
*
*/
#include <assert.h>
#include <math.h>
#include "codes/codes-workload.h"
#include "codes/quickhash.h"
#include "darshan-logutils.h"
#define DEF_INTER_IO_DELAY_PCT 0.2
#define DEF_INTER_CYC_DELAY_PCT 0.4
#define DARSHAN_NEGLIGIBLE_DELAY 0.00001
#define RANK_HASH_TABLE_SIZE 397
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define ALIGN_BY_8(x) ((x) + ((x) % 8))
/* structure for storing a darshan workload operation (a codes op with 2 timestamps) */
struct darshan_io_op
{
struct codes_workload_op codes_op;
double start_time;
double end_time;
};
/* I/O context structure managed by each rank in the darshan workload */
struct rank_io_context
{
int64_t my_rank;
double last_op_time;
void *io_op_dat;
off_t next_off;
struct qhash_head hash_link;
};
static void * darshan_io_workload_read_config(
ConfigHandle * handle,
char const * section_name,
char const * annotation,
int num_ranks);
/* Darshan workload generator's implementation of the CODES workload API */
static int darshan_io_workload_load(const char *params, int app_id, int rank);
static void darshan_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op);
static int darshan_io_workload_get_rank_cnt(const char *params, int app_id);
static int darshan_rank_hash_compare(void *key, struct qhash_head *link);
/* Darshan I/O op data structure access (insert, remove) abstraction */
static void *darshan_init_io_op_dat(void);
static void darshan_insert_next_io_op(void *io_op_dat, struct darshan_io_op *io_op);
static void darshan_remove_next_io_op(void *io_op_dat, struct darshan_io_op *io_op,
double last_op_time);
static void darshan_finalize_io_op_dat(void *io_op_dat);
static int darshan_io_op_compare(const void *p1, const void *p2);
/* Helper functions for implementing the Darshan workload generator */
static void generate_psx_ind_file_events(struct darshan_file *file,
struct rank_io_context *io_context);
static void generate_psx_coll_file_events(struct darshan_file *file,
struct rank_io_context *io_context,
int64_t nprocs, int64_t aggregator_cnt);
static double generate_psx_open_event(struct darshan_file *file, int create_flag,
double meta_op_time, double cur_time,
struct rank_io_context *io_context, int insert_flag);
static double generate_psx_close_event(struct darshan_file *file, double meta_op_time,
double cur_time, struct rank_io_context *io_context,
int insert_flag);
static double generate_barrier_event(struct darshan_file *file, int64_t root, double cur_time,
struct rank_io_context *io_context);
static double generate_psx_ind_io_events(struct darshan_file *file, int64_t io_ops_this_cycle,
double inter_io_delay, double cur_time,
struct rank_io_context *io_context);
static double generate_psx_coll_io_events(struct darshan_file *file, int64_t ind_io_ops_this_cycle,
int64_t coll_io_ops_this_cycle, int64_t nprocs,
int64_t aggregator_cnt, double inter_io_delay,
double meta_op_time, double cur_time,
struct rank_io_context *io_context);
static void determine_ind_io_params(struct darshan_file *file, int write_flag, size_t *io_sz,
off_t *io_off, struct rank_io_context *io_context);
static void determine_coll_io_params(struct darshan_file *file, int write_flag, int64_t coll_op_cnt,
int64_t agg_cnt, int64_t agg_ndx, size_t *io_sz, off_t *io_off,
struct rank_io_context *io_context);
static void calc_io_delays(struct darshan_file *file, int64_t num_opens, int64_t num_io_ops,
double total_delay, double *first_io_delay, double *close_delay,
double *inter_open_delay, double *inter_io_delay);
static void file_sanity_check(struct darshan_file *file, struct darshan_job *job);
/* workload method name and function pointers for the CODES workload API */
struct codes_workload_method darshan_io_workload_method =
{
.method_name = "darshan_io_workload",
.codes_workload_read_config = darshan_io_workload_read_config,
.codes_workload_load = darshan_io_workload_load,
.codes_workload_get_next = darshan_io_workload_get_next,
.codes_workload_get_rank_cnt = darshan_io_workload_get_rank_cnt,
};
static int total_rank_cnt = 0;
/* hash table to store per-rank workload contexts */
static struct qhash_table *rank_tbl = NULL;
static int rank_tbl_pop = 0;
static void * darshan_io_workload_read_config(
ConfigHandle * handle,
char const * section_name,
char const * annotation,
int num_ranks)
{
darshan_params *d = malloc(sizeof(*d));
assert(d);
d->log_file_path[0] = '\0';
d->aggregator_cnt = -1;
int rc = configuration_get_value_relpath(handle, section_name,
"darshan_log_file", annotation, d->log_file_path,
MAX_NAME_LENGTH_WKLD);
assert(rc > 0);
int tmp;
rc = configuration_get_value_int(&config, "workload",
"darshan_aggregator_count", annotation, &tmp);
assert(rc == 0);
d->aggregator_cnt = tmp;
return d;
}
/* load the workload generator for this rank, given input params */
static int darshan_io_workload_load(const char *params, int app_id, int rank)
{
darshan_params *d_params = (darshan_params *)params;
darshan_fd logfile_fd;
struct darshan_job job;
struct darshan_file next_file;
struct rank_io_context *my_ctx;
int ret;
APP_ID_UNSUPPORTED(app_id, "darshan")
if (!d_params)
return -1;
/* open the darshan log to begin reading in file i/o info */
logfile_fd = darshan_log_open(d_params->log_file_path, "r");
if (logfile_fd < 0)
return -1;
/* get the per-job stats from the log */
ret = darshan_log_getjob(logfile_fd, &job);
if (ret < 0)
{
darshan_log_close(logfile_fd);
return -1;
}
if (!total_rank_cnt)
{
total_rank_cnt = job.nprocs;
}
assert(rank < total_rank_cnt);
/* allocate the i/o context needed by this rank */
my_ctx = malloc(sizeof(struct rank_io_context));
if (!my_ctx)
{
darshan_log_close(logfile_fd);
return -1;
}
my_ctx->my_rank = (int64_t)rank;
my_ctx->last_op_time = 0.0;
my_ctx->io_op_dat = darshan_init_io_op_dat();
my_ctx->next_off = 0;
/* loop over all files contained in the log file */
while ((ret = darshan_log_getfile(logfile_fd, &job, &next_file)) > 0)
{
/* generate all i/o events contained in this independent file */
if (next_file.rank == rank)
{
/* make sure the file i/o counters are valid */
file_sanity_check(&next_file, &job);
/* generate i/o events and store them in this rank's workload context */
generate_psx_ind_file_events(&next_file, my_ctx);
}
/* generate all i/o events involving this rank in this collective file */
else if (next_file.rank == -1)
{
/* make sure the file i/o counters are valid */
file_sanity_check(&next_file, &job);
/* generate collective i/o events and store them in the rank context */
generate_psx_coll_file_events(&next_file, my_ctx, job.nprocs, d_params->aggregator_cnt);
}
else if (next_file.rank < rank)
continue;
else
break;
assert(next_file.counters[CP_POSIX_OPENS] == 0);
assert(next_file.counters[CP_POSIX_READS] == 0);
assert(next_file.counters[CP_POSIX_WRITES] == 0);
}
if (ret < 0)
return -1;
darshan_log_close(logfile_fd);
/* finalize the rank's i/o context so i/o ops may be retrieved later (in order) */
darshan_finalize_io_op_dat(my_ctx->io_op_dat);
/* initialize the hash table of rank contexts, if it has not been initialized */
if (!rank_tbl)
{
rank_tbl = qhash_init(darshan_rank_hash_compare, quickhash_64bit_hash, RANK_HASH_TABLE_SIZE);
if (!rank_tbl)
return -1;
}
/* add this rank context to the hash table */
qhash_add(rank_tbl, &(my_ctx->my_rank), &(my_ctx->hash_link));
rank_tbl_pop++;
return 0;
}
/* pull the next event (independent or collective) for this rank from its event context */
static void darshan_io_workload_get_next(int app_id, int rank, struct codes_workload_op *op)
{
int64_t my_rank = (int64_t)rank;
struct qhash_head *hash_link = NULL;
struct rank_io_context *tmp = NULL;
struct darshan_io_op next_io_op;
assert(rank < total_rank_cnt);
/* find i/o context for this rank in the rank hash table */
hash_link = qhash_search(rank_tbl, &my_rank);
/* terminate the workload if there is no valid rank context */
if (!hash_link)
{
op->op_type = CODES_WK_END;
return;
}
/* get access to the rank's io_context data */
tmp = qhash_entry(hash_link, struct rank_io_context, hash_link);
assert(tmp->my_rank == my_rank);
/* get the next darshan i/o op out of this rank's context */
darshan_remove_next_io_op(tmp->io_op_dat, &next_io_op, tmp->last_op_time);
/* free the rank's i/o context if this is the last i/o op */
if (next_io_op.codes_op.op_type == CODES_WK_END)
{
qhash_del(hash_link);
free(tmp);
rank_tbl_pop--;
if (!rank_tbl_pop)
{
qhash_finalize(rank_tbl);
rank_tbl = NULL;
}
}
else
{
/* else, set the last op time to be the end of the returned op */
tmp->last_op_time = next_io_op.end_time;
}
/* return the codes op contained in the darshan i/o op */
*op = next_io_op.codes_op