Commit a708b0ce authored by Shane Snyder's avatar Shane Snyder
Browse files

add new counters to mpiio reduction code

parent 73541538
...@@ -121,7 +121,7 @@ static struct mpiio_file_runtime* mpiio_file_by_name(const char *name); ...@@ -121,7 +121,7 @@ static struct mpiio_file_runtime* mpiio_file_by_name(const char *name);
static struct mpiio_file_runtime* mpiio_file_by_name_setfh(const char* name, MPI_File fh); static struct mpiio_file_runtime* mpiio_file_by_name_setfh(const char* name, MPI_File fh);
static struct mpiio_file_runtime* mpiio_file_by_fh(MPI_File fh); static struct mpiio_file_runtime* mpiio_file_by_fh(MPI_File fh);
static void mpiio_file_close_fh(MPI_File fh); static void mpiio_file_close_fh(MPI_File fh);
static int mpiio_file_compare(const void* a, const void* b); static int mpiio_record_compare(const void* a, const void* b);
static void mpiio_begin_shutdown(void); static void mpiio_begin_shutdown(void);
static void mpiio_setup_reduction(darshan_record_id *shared_recs, int *shared_rec_count, static void mpiio_setup_reduction(darshan_record_id *shared_recs, int *shared_rec_count,
...@@ -1009,7 +1009,7 @@ static void mpiio_file_close_fh(MPI_File fh) ...@@ -1009,7 +1009,7 @@ static void mpiio_file_close_fh(MPI_File fh)
} }
/* compare function for sorting file records by descending rank */ /* compare function for sorting file records by descending rank */
static int mpiio_file_compare(const void* a_p, const void* b_p) static int mpiio_record_compare(const void* a_p, const void* b_p)
{ {
const struct darshan_mpiio_file* a = a_p; const struct darshan_mpiio_file* a = a_p;
const struct darshan_mpiio_file* b = b_p; const struct darshan_mpiio_file* b = b_p;
...@@ -1077,7 +1077,7 @@ static void mpiio_setup_reduction( ...@@ -1077,7 +1077,7 @@ static void mpiio_setup_reduction(
* of the array * of the array
*/ */
qsort(mpiio_runtime->file_record_array, mpiio_runtime->file_array_ndx, qsort(mpiio_runtime->file_record_array, mpiio_runtime->file_array_ndx,
sizeof(struct darshan_mpiio_file), mpiio_file_compare); sizeof(struct darshan_mpiio_file), mpiio_record_compare);
/* make *send_buf point to the shared files at the end of sorted array */ /* make *send_buf point to the shared files at the end of sorted array */
*send_buf = *send_buf =
...@@ -1089,13 +1089,12 @@ static void mpiio_setup_reduction( ...@@ -1089,13 +1089,12 @@ static void mpiio_setup_reduction(
*recv_buf = malloc(*shared_rec_count * sizeof(struct darshan_mpiio_file)); *recv_buf = malloc(*shared_rec_count * sizeof(struct darshan_mpiio_file));
if(!(*recv_buf)) if(!(*recv_buf))
return; return;
/* TODO: cleaner way to do this? */
mpiio_runtime->red_buf = *recv_buf;
} }
*rec_size = sizeof(struct darshan_mpiio_file); *rec_size = sizeof(struct darshan_mpiio_file);
/* TODO: cleaner way to do this? */
if(my_rank == 0)
mpiio_runtime->red_buf = *recv_buf;
mpiio_runtime->shared_rec_count = *shared_rec_count; mpiio_runtime->shared_rec_count = *shared_rec_count;
return; return;
...@@ -1110,7 +1109,7 @@ static void mpiio_record_reduction_op( ...@@ -1110,7 +1109,7 @@ static void mpiio_record_reduction_op(
struct darshan_mpiio_file tmp_file; struct darshan_mpiio_file tmp_file;
struct darshan_mpiio_file *infile = infile_v; struct darshan_mpiio_file *infile = infile_v;
struct darshan_mpiio_file *inoutfile = inoutfile_v; struct darshan_mpiio_file *inoutfile = inoutfile_v;
int i, j; int i, j, k;
assert(mpiio_runtime); assert(mpiio_runtime);
...@@ -1122,19 +1121,58 @@ static void mpiio_record_reduction_op( ...@@ -1122,19 +1121,58 @@ static void mpiio_record_reduction_op(
tmp_file.rank = -1; tmp_file.rank = -1;
/* sum */ /* sum */
for(j=MPIIO_INDEP_OPENS; j<=MPIIO_HINTS; j++) for(j=MPIIO_INDEP_OPENS; j<=MPIIO_VIEWS; j++)
{
tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
}
tmp_file.counters[MPIIO_MODE] = infile->counters[MPIIO_MODE];
/* sum */
for(j=MPIIO_BYTES_READ; j<=MPIIO_RW_SWITCHES; j++)
{ {
tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j]; tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
} }
/* sum (floating point) */ /* skip MPIIO_MAX_*_TIME_SIZE; handled in floating point section */
for(j=MPIIO_F_META_TIME; j<=MPIIO_F_META_TIME; j++)
for(j=MPIIO_SIZE_READ_AGG_0_100; j<=MPIIO_SIZE_WRITE_AGG_1G_PLUS; j++)
{
tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
}
/* first collapse any duplicates */
for(j=MPIIO_ACCESS1_ACCESS; j<=MPIIO_ACCESS4_ACCESS; j++)
{
for(k=MPIIO_ACCESS1_ACCESS; k<=MPIIO_ACCESS4_ACCESS; k++)
{
if(infile->counters[j] == inoutfile->counters[k])
{
infile->counters[j+4] += inoutfile->counters[k+4];
inoutfile->counters[k] = 0;
inoutfile->counters[k+4] = 0;
}
}
}
/* first set */
for(j=MPIIO_ACCESS1_ACCESS; j<=MPIIO_ACCESS4_ACCESS; j++)
{
DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[MPIIO_ACCESS1_ACCESS]),
&(tmp_file.counters[MPIIO_ACCESS1_COUNT]), infile->counters[j],
infile->counters[j+4]);
}
/* second set */
for(j=MPIIO_ACCESS1_ACCESS; j<=MPIIO_ACCESS4_ACCESS; j++)
{ {
tmp_file.fcounters[j] = infile->fcounters[j] + inoutfile->fcounters[j]; DARSHAN_COMMON_VAL_COUNTER_INC(&(tmp_file.counters[MPIIO_ACCESS1_ACCESS]),
&(tmp_file.counters[MPIIO_ACCESS1_COUNT]), inoutfile->counters[j],
inoutfile->counters[j+4]);
} }
/* min non-zero (if available) value */ /* min non-zero (if available) value */
for(j=MPIIO_F_OPEN_TIMESTAMP; j<=MPIIO_F_OPEN_TIMESTAMP; j++) for(j=MPIIO_F_OPEN_TIMESTAMP; j<=MPIIO_F_WRITE_START_TIMESTAMP; j++)
{ {
if(infile->fcounters[j] > inoutfile->fcounters[j] && inoutfile->fcounters[j] > 0) if(infile->fcounters[j] > inoutfile->fcounters[j] && inoutfile->fcounters[j] > 0)
tmp_file.fcounters[j] = inoutfile->fcounters[j]; tmp_file.fcounters[j] = inoutfile->fcounters[j];
...@@ -1142,6 +1180,54 @@ static void mpiio_record_reduction_op( ...@@ -1142,6 +1180,54 @@ static void mpiio_record_reduction_op(
tmp_file.fcounters[j] = infile->fcounters[j]; tmp_file.fcounters[j] = infile->fcounters[j];
} }
/* max */
for(j=MPIIO_F_READ_END_TIMESTAMP; j<= MPIIO_F_CLOSE_TIMESTAMP; j++)
{
if(infile->fcounters[j] > inoutfile->fcounters[j])
tmp_file.fcounters[j] = infile->fcounters[j];
else
tmp_file.fcounters[j] = inoutfile->fcounters[j];
}
/* sum */
for(j=MPIIO_F_READ_TIME; j<=MPIIO_F_META_TIME; j++)
{
tmp_file.counters[j] = infile->fcounters[j] + inoutfile->fcounters[j];
}
/* max (special case) */
if(infile->fcounters[MPIIO_F_MAX_READ_TIME] >
inoutfile->fcounters[MPIIO_F_MAX_READ_TIME])
{
tmp_file.fcounters[MPIIO_F_MAX_READ_TIME] =
infile->fcounters[MPIIO_F_MAX_READ_TIME];
tmp_file.counters[MPIIO_MAX_READ_TIME_SIZE] =
infile->counters[MPIIO_MAX_READ_TIME_SIZE];
}
else
{
tmp_file.fcounters[MPIIO_F_MAX_READ_TIME] =
inoutfile->fcounters[MPIIO_F_MAX_READ_TIME];
tmp_file.counters[MPIIO_MAX_READ_TIME_SIZE] =
inoutfile->counters[MPIIO_MAX_READ_TIME_SIZE];
}
if(infile->fcounters[MPIIO_F_MAX_WRITE_TIME] >
inoutfile->fcounters[MPIIO_F_MAX_WRITE_TIME])
{
tmp_file.fcounters[MPIIO_F_MAX_WRITE_TIME] =
infile->fcounters[MPIIO_F_MAX_WRITE_TIME];
tmp_file.counters[MPIIO_MAX_WRITE_TIME_SIZE] =
infile->counters[MPIIO_MAX_WRITE_TIME_SIZE];
}
else
{
tmp_file.fcounters[MPIIO_F_MAX_WRITE_TIME] =
inoutfile->fcounters[MPIIO_F_MAX_WRITE_TIME];
tmp_file.counters[MPIIO_MAX_WRITE_TIME_SIZE] =
inoutfile->counters[MPIIO_MAX_WRITE_TIME_SIZE];
}
/* update pointers */ /* update pointers */
*inoutfile = tmp_file; *inoutfile = tmp_file;
inoutfile++; inoutfile++;
...@@ -1157,7 +1243,7 @@ static void mpiio_get_output_data( ...@@ -1157,7 +1243,7 @@ static void mpiio_get_output_data(
{ {
assert(mpiio_runtime); assert(mpiio_runtime);
/* TODO: clean up reduction stuff */ /* clean up reduction state */
if(my_rank == 0) if(my_rank == 0)
{ {
int tmp_ndx = mpiio_runtime->file_array_ndx - mpiio_runtime->shared_rec_count; int tmp_ndx = mpiio_runtime->file_array_ndx - mpiio_runtime->shared_rec_count;
...@@ -1180,6 +1266,8 @@ static void mpiio_shutdown() ...@@ -1180,6 +1266,8 @@ static void mpiio_shutdown()
{ {
struct mpiio_file_runtime_ref *ref, *tmp; struct mpiio_file_runtime_ref *ref, *tmp;
assert(mpiio_runtime);
HASH_ITER(hlink, mpiio_runtime->fh_hash, ref, tmp) HASH_ITER(hlink, mpiio_runtime->fh_hash, ref, tmp)
{ {
HASH_DELETE(hlink, mpiio_runtime->fh_hash, ref); HASH_DELETE(hlink, mpiio_runtime->fh_hash, ref);
......
...@@ -1802,7 +1802,6 @@ static void posix_setup_reduction( ...@@ -1802,7 +1802,6 @@ static void posix_setup_reduction(
if(!(*recv_buf)) if(!(*recv_buf))
return; return;
/* TODO: cleaner way to do this? */
posix_runtime->red_buf = *recv_buf; posix_runtime->red_buf = *recv_buf;
} }
...@@ -1858,8 +1857,7 @@ static void posix_record_reduction_op( ...@@ -1858,8 +1857,7 @@ static void posix_record_reduction_op(
/* sum */ /* sum */
for(j=POSIX_CONSEC_READS; j<=POSIX_MEM_NOT_ALIGNED; j++) for(j=POSIX_CONSEC_READS; j<=POSIX_MEM_NOT_ALIGNED; j++)
{ {
tmp_file.counters[j] = infile->counters[j] + tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
inoutfile->counters[j];
} }
tmp_file.counters[POSIX_MEM_ALIGNMENT] = infile->counters[POSIX_MEM_ALIGNMENT]; tmp_file.counters[POSIX_MEM_ALIGNMENT] = infile->counters[POSIX_MEM_ALIGNMENT];
...@@ -1868,16 +1866,14 @@ static void posix_record_reduction_op( ...@@ -1868,16 +1866,14 @@ static void posix_record_reduction_op(
/* sum */ /* sum */
for(j=POSIX_FILE_NOT_ALIGNED; j<=POSIX_FILE_NOT_ALIGNED; j++) for(j=POSIX_FILE_NOT_ALIGNED; j<=POSIX_FILE_NOT_ALIGNED; j++)
{ {
tmp_file.counters[j] = infile->counters[j] + tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
inoutfile->counters[j];
} }
/* skip POSIX_MAX_*_TIME_SIZE; handled in floating point section */ /* skip POSIX_MAX_*_TIME_SIZE; handled in floating point section */
for(j=POSIX_SIZE_READ_0_100; j<=POSIX_SIZE_WRITE_1G_PLUS; j++) for(j=POSIX_SIZE_READ_0_100; j<=POSIX_SIZE_WRITE_1G_PLUS; j++)
{ {
tmp_file.counters[j] = infile->counters[j] + tmp_file.counters[j] = infile->counters[j] + inoutfile->counters[j];
inoutfile->counters[j];
} }
/* first collapse any duplicates */ /* first collapse any duplicates */
...@@ -2054,13 +2050,13 @@ static void posix_get_output_data( ...@@ -2054,13 +2050,13 @@ static void posix_get_output_data(
{ {
assert(posix_runtime); assert(posix_runtime);
/* TODO: cleaner way to do this? */
/* clean up reduction state */ /* clean up reduction state */
if(my_rank == 0) if(my_rank == 0)
{ {
int tmp_ndx = posix_runtime->file_array_ndx - posix_runtime->shared_rec_count; int tmp_ndx = posix_runtime->file_array_ndx - posix_runtime->shared_rec_count;
memcpy(&(posix_runtime->file_record_array[tmp_ndx]), posix_runtime->red_buf, memcpy(&(posix_runtime->file_record_array[tmp_ndx]), posix_runtime->red_buf,
posix_runtime->shared_rec_count * sizeof(struct darshan_posix_file)); posix_runtime->shared_rec_count * sizeof(struct darshan_posix_file));
free(posix_runtime->red_buf);
} }
else else
{ {
...@@ -2087,9 +2083,6 @@ static void posix_shutdown() ...@@ -2087,9 +2083,6 @@ static void posix_shutdown()
HASH_CLEAR(hlink, posix_runtime->file_hash); /* these entries are freed all at once below */ HASH_CLEAR(hlink, posix_runtime->file_hash); /* these entries are freed all at once below */
if(my_rank == 0 && posix_runtime->red_buf)
free(posix_runtime->red_buf);
free(posix_runtime->file_runtime_array); free(posix_runtime->file_runtime_array);
free(posix_runtime->file_record_array); free(posix_runtime->file_record_array);
free(posix_runtime); free(posix_runtime);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment