Commit 22732a4e authored by Kevin Harms's avatar Kevin Harms

Add min, max and variance to the darshan log that is relevant for shared files...

Add min, max and variance to the darshan log that is relevant for shared files only. This is to aid in performance calculations.


git-svn-id: https://svn.mcs.anl.gov/repos/darshan/trunk@291 3b7491f3-a168-0410-bf4b-c445ed680a29
parent 96890c5f
......@@ -182,6 +182,10 @@ enum darshan_indices
CP_ACCESS4_COUNT,
CP_DEVICE, /* device id reported by stat */
CP_SIZE_AT_OPEN,
CP_FASTEST_RANK,
CP_FASTEST_RANK_BYTES,
CP_SLOWEST_RANK,
CP_SLOWEST_RANK_BYTES,
CP_NUM_INDICES,
};
......@@ -203,6 +207,11 @@ enum f_darshan_indices
CP_F_MPI_WRITE_TIME, /* cumulative mpi-io write time */
CP_F_MAX_READ_TIME,
CP_F_MAX_WRITE_TIME,
CP_F_FASTEST_RANK_TIME,
CP_F_SLOWEST_RANK_TIME,
CP_F_VARIANCE_RANK_TIME,
CP_F_VARIANCE_RANK_BYTES,
CP_F_NUM_INDICES,
};
......
......@@ -152,6 +152,10 @@ char *darshan_names[] = {
"CP_ACCESS4_COUNT",
"CP_DEVICE",
"CP_SIZE_AT_OPEN",
"CP_FASTEST_RANK",
"CP_FASTEST_RANK_BYTES",
"CP_SLOWEST_RANK",
"CP_SLOWEST_RANK_BYTES",
"CP_NUM_INDICES"
};
......@@ -172,7 +176,12 @@ char *darshan_f_names[] = {
"CP_F_MPI_WRITE_TIME", /* cumulative mpi-io write time */
"CP_F_MAX_READ_TIME",
"CP_F_MAX_WRITE_TIME",
"CP_F_NUM_INDICES",
"CP_F_FASTEST_RANK_TIME",
"CP_F_SLOWEST_RANK_TIME",
"CP_F_VARIANCE_RANK_TIME",
"CP_F_VARIANCE_RANK_BYTES",
"CP_F_NUM_INDICES"
};
/* function pointers so that we can switch functions depending on what file
......
......@@ -139,6 +139,7 @@ int main(int argc, char **argv)
printf("# CP_ACCESS*_COUNT: count of the four most common access sizes.\n");
printf("# CP_DEVICE: device id reported by stat().\n");
printf("# CP_SIZE_AT_OPEN: size of file when first opened.\n");
printf("# CP_*_RANK_BYTES: fastest, slowest and variance of bytes transfer.\n");
printf("# CP_F_OPEN_TIMESTAMP: timestamp of first open (mpi or posix).\n");
printf("# CP_F_*_START_TIMESTAMP: timestamp of first read/write (mpi or posix).\n");
printf("# CP_F_*_END_TIMESTAMP: timestamp of last read/write (mpi or posix).\n");
......@@ -148,6 +149,7 @@ int main(int argc, char **argv)
printf("# CP_F_POSIX_META_TIME: cumulative time spent in posix open, close, fsync, stat and seek, .\n");
printf("# CP_F_MPI_META_TIME: cumulative time spent in mpi-io open, close, set_view, and sync.\n");
printf("# CP_MAX_*_TIME: duration of the slowest read and write operations.\n");
printf("# CP_*_RANK_TIME: fastest, slowest variance of transfer time.\n");
printf("\n");
......
......@@ -131,6 +131,13 @@ static int cp_log_compress(struct darshan_job_runtime* final_job,
static int file_compare(const void* a, const void* b);
static void darshan_mpi_initialize(int *argc, char ***argv);
static char* darshan_get_exe_and_mounts(struct darshan_job_runtime* final_job);
static int darshan_file_variance(
struct darshan_file *infile_array,
struct darshan_file *outfile_array,
int count, int rank);
static void pairwise_variance_reduce (
void *invec, void *inoutvec, int *len, MPI_Datatype *dt);
#define CP_MAX_MNTS 32
uint64_t mnt_hash_array[CP_MAX_MNTS] = {0};
......@@ -143,6 +150,13 @@ struct
int64_t mnt_id_root;
} mnt_mapping[CP_MAX_MNTS];
struct variance_dt
{
double n;
double T;
double S;
};
int MPI_Init(int *argc, char ***argv)
{
int ret;
......@@ -1134,6 +1148,31 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
{
if(final_job->file_array[j].hash == hash_array[i])
{
/*
* Initialize fastest/slowest info prior
* to the reduction.
*/
final_job->file_array[j].counters[CP_FASTEST_RANK] =
final_job->file_array[j].rank;
final_job->file_array[j].counters[CP_FASTEST_RANK_BYTES] =
final_job->file_array[j].counters[CP_BYTES_READ] +
final_job->file_array[j].counters[CP_BYTES_WRITTEN];
final_job->file_array[j].fcounters[CP_F_FASTEST_RANK_TIME] =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
final_job->file_array[j].counters[CP_SLOWEST_RANK] =
final_job->file_array[j].rank;
final_job->file_array[j].counters[CP_SLOWEST_RANK_BYTES] =
final_job->file_array[j].counters[CP_BYTES_READ] +
final_job->file_array[j].counters[CP_BYTES_WRITTEN];
final_job->file_array[j].fcounters[CP_F_SLOWEST_RANK_TIME] =
final_job->file_array[j].fcounters[CP_F_POSIX_META_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_READ_TIME] +
final_job->file_array[j].fcounters[CP_F_POSIX_WRITE_TIME];
final_job->file_array[j].rank = -1;
break;
}
......@@ -1169,6 +1208,14 @@ static int cp_log_reduction(struct darshan_job_runtime* final_job, int rank,
return(-1);
}
ret = darshan_file_variance(
&final_job->file_array[final_job->file_count-shared_count],
tmp_array, shared_count, rank);
if (ret)
{
return(-1);
}
if(rank == 0)
{
/* root replaces local files with shared ones */
......@@ -1383,6 +1430,48 @@ static void darshan_file_reduce(void* infile_v,
inoutfile->counters[CP_MAX_READ_TIME_SIZE];
}
/* min */
if(infile->counters[CP_F_FASTEST_RANK_TIME] <
inoutfile->counters[CP_F_FASTEST_RANK_TIME])
{
tmp_file.counters[CP_FASTEST_RANK] =
infile->counters[CP_FASTEST_RANK];
tmp_file.counters[CP_FASTEST_RANK_BYTES] =
infile->counters[CP_FASTEST_RANK_BYTES];
tmp_file.fcounters[CP_F_FASTEST_RANK_TIME] =
infile->fcounters[CP_F_FASTEST_RANK_TIME];
}
else
{
tmp_file.counters[CP_FASTEST_RANK] =
inoutfile->counters[CP_FASTEST_RANK];
tmp_file.counters[CP_FASTEST_RANK_BYTES] =
inoutfile->counters[CP_FASTEST_RANK_BYTES];
tmp_file.fcounters[CP_F_FASTEST_RANK_TIME] =
inoutfile->fcounters[CP_F_FASTEST_RANK_TIME];
}
/* max */
if(infile->fcounters[CP_F_SLOWEST_RANK_TIME] >
inoutfile->fcounters[CP_F_SLOWEST_RANK_TIME])
{
tmp_file.counters[CP_SLOWEST_RANK] =
infile->counters[CP_SLOWEST_RANK];
tmp_file.counters[CP_SLOWEST_RANK_BYTES] =
infile->counters[CP_SLOWEST_RANK_BYTES];
tmp_file.fcounters[CP_F_SLOWEST_RANK_TIME] =
infile->fcounters[CP_F_SLOWEST_RANK_TIME];
}
else
{
tmp_file.counters[CP_SLOWEST_RANK] =
inoutfile->counters[CP_SLOWEST_RANK];
tmp_file.counters[CP_SLOWEST_RANK_BYTES] =
inoutfile->counters[CP_SLOWEST_RANK_BYTES];
tmp_file.fcounters[CP_F_SLOWEST_RANK_TIME] =
inoutfile->fcounters[CP_F_SLOWEST_RANK_TIME];
}
/* pick one device id and file size */
tmp_file.counters[CP_DEVICE] = infile->counters[CP_DEVICE];
tmp_file.counters[CP_SIZE_AT_OPEN] = infile->counters[CP_SIZE_AT_OPEN];
......@@ -1793,6 +1882,146 @@ static char* darshan_get_exe_and_mounts(struct darshan_job_runtime* final_job)
return(trailing_data);
}
/*
* Computes population variance of bytes moved and total time
* for each rank on a shared file.
*/
static int darshan_file_variance(
struct darshan_file *infile_array,
struct darshan_file *outfile_array,
int count, int rank)
{
MPI_Op pw_var_op;
MPI_Datatype var_dt;
int ret;
int i;
struct variance_dt* var_array = NULL;
struct variance_dt* varres_array = NULL;
ret = MPI_Op_create(pairwise_variance_reduce, 1, &pw_var_op);
if (ret != MPI_SUCCESS)
{
goto error_handler;
}
ret = MPI_Type_contiguous(sizeof(struct variance_dt), MPI_BYTE, &var_dt);
if (ret != MPI_SUCCESS)
{
goto error_handler;
}
ret = MPI_Type_commit(&var_dt);
if (ret != MPI_SUCCESS)
{
goto error_handler;
}
var_array = malloc(count*sizeof(struct variance_dt));
if(!var_array)
{
goto error_handler;
}
if (rank == 0)
{
varres_array = malloc(count*sizeof(struct variance_dt));
if(!varres_array)
{
goto error_handler;
}
}
/*
* total time
*/
for(i=0; i<count; i++)
{
var_array[i].n = 1;
var_array[i].S = 0;
var_array[i].T = infile_array[i].fcounters[CP_F_POSIX_META_TIME] +
infile_array[i].fcounters[CP_F_POSIX_READ_TIME] +
infile_array[i].fcounters[CP_F_POSIX_WRITE_TIME];
}
ret = MPI_Reduce(
var_array, varres_array, count, var_dt, pw_var_op,
0, MPI_COMM_WORLD);
if(ret != MPI_SUCCESS)
{
goto error_handler;
}
if (rank == 0)
{
for(i=0; i<count; i++)
{
outfile_array[i].fcounters[CP_F_VARIANCE_RANK_TIME] =
(varres_array[i].S / varres_array[i].n);
}
}
/*
* total bytes
*/
for(i=0; i<count; i++)
{
var_array[i].n = 1;
var_array[i].S = 0;
var_array[i].T = (double)
infile_array[i].counters[CP_BYTES_READ] +
infile_array[i].counters[CP_BYTES_WRITTEN];
}
ret = MPI_Reduce(
var_array, varres_array, count, var_dt, pw_var_op,
0, MPI_COMM_WORLD);
if(ret != MPI_SUCCESS)
{
goto error_handler;
}
if (rank == 0)
{
for(i=0; i<count; i++)
{
outfile_array[i].fcounters[CP_F_VARIANCE_RANK_BYTES] =
(varres_array[i].S / varres_array[i].n);
}
}
MPI_Type_free(&var_dt);
ret = 0;
error_handler:
if (var_array) free(var_array);
if (varres_array) free(varres_array);
return ret;
}
static void pairwise_variance_reduce (
void *invec, void *inoutvec, int *len, MPI_Datatype *dt)
{
int i;
struct variance_dt *X = invec;
struct variance_dt *Y = inoutvec;
struct variance_dt Z;
for (i=0; i<*len; i++,X++,Y++)
{
Z.n = X->n + Y->n;
Z.T = X->T + Y->T;
Z.S = X->S + Y->S + (X->n/(Y->n*Z.n)) *
((Y->n/X->n)*X->T - Y->T) * ((Y->n/X->n)*X->T - Y->T);
*Y = Z;
}
return;
}
/*
* Local variables:
* c-indent-level: 4
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment