Commit 73541538 authored by Shane Snyder's avatar Shane Snyder

refactor and add common access counts to mpiio

parent 6406c872
......@@ -10,7 +10,6 @@
#include "darshan-log-format.h"
/* TODO: slowest/fastest rank (f)counters */
/* TODO: access size common counters */
/* TODO: maybe use a counter to track cases in which a derived datatype is used? */
#define MPIIO_COUNTERS \
......
......@@ -7,54 +7,19 @@
#ifndef __DARSHAN_COMMON_H
#define __DARSHAN_COMMON_H
/* simple macros for manipulating a module's counters
*
* NOTE: These macros assume a module's record stores integer
* and floating point counters in arrays, named counters and
* fcounters, respectively. __rec_p is the a pointer to the
* data record, __counter is the counter in question, and
* __value is the corresponding data value.
*/
#define DARSHAN_COUNTER_SET(__rec_p, __counter, __value) do{ \
(__rec_p)->counters[__counter] = __value; \
} while(0)
#define DARSHAN_COUNTER_F_SET(__rec_p, __counter, __value) do{ \
(__rec_p)->fcounters[__counter] = __value; \
} while(0)
#define DARSHAN_COUNTER_INC(__rec_p, __counter, __value) do{ \
(__rec_p)->counters[__counter] += __value; \
} while(0)
#define DARSHAN_COUNTER_F_INC(__rec_p, __counter, __value) do{ \
(__rec_p)->fcounters[__counter] += __value; \
} while(0)
#define DARSHAN_COUNTER_VALUE(__rec_p, __counter) \
((__rec_p)->counters[__counter])
#define DARSHAN_COUNTER_F_VALUE(__rec_p, __counter) \
((__rec_p)->fcounters[__counter])
/* set __counter equal to the max of __counter or the passed in __value */
#define DARSHAN_COUNTER_MAX(__rec_p, __counter, __value) do{ \
if((__rec_p)->counters[__counter] < __value) \
(__rec_p)->counters[__counter] = __value; \
} while(0)
/* increment a timer counter, making sure not to account for overlap
* with previous operations
*
* NOTE: __tm1 is the start timestamp of the operation, __tm2 is the end
* timestamp of the operation, and __last is the timestamp of the end of
* the previous I/O operation (which we don't want to overlap with).
* NOTE: __timer is the corresponding timer counter variable, __tm1 is
* the start timestamp of the operation, __tm2 is the end timestamp of
* the operation, and __last is the timestamp of the end of the previous
* I/O operation (which we don't want to overlap with).
*/
#define DARSHAN_COUNTER_F_INC_NO_OVERLAP(__rec_p, __tm1, __tm2, __last, __counter) do{ \
#define DARSHAN_TIMER_INC_NO_OVERLAP(__timer, __tm1, __tm2, __last) do{ \
if(__tm1 > __last) \
DARSHAN_COUNTER_F_INC(__rec_p, __counter, (__tm2 - __tm1)); \
__timer += (__tm2 - __tm1); \
else \
DARSHAN_COUNTER_F_INC(__rec_p, __counter, (__tm2 - __last)); \
__timer += (__tm2 - __last); \
if(__tm2 > __last) \
__last = __tm2; \
} while(0)
......@@ -63,7 +28,7 @@
*
* NOTE: This macro can be used to build a histogram of access
* sizes, offsets, etc. It assumes a 10-bucket histogram, with
* __counter_base representing the first counter in the sequence
* __bucket_base_p pointing to the first counter in the sequence
* of buckets (i.e., the smallest bucket). The size ranges of each
* bucket are:
* * 0 - 100 bytes
......@@ -77,29 +42,75 @@
* * 100 MiB - 1 GiB
* * 1 GiB+
*/
#define DARSHAN_BUCKET_INC(__rec_p, __counter_base, __value) do {\
#define DARSHAN_BUCKET_INC(__bucket_base_p, __value) do {\
if(__value < 101) \
(__rec_p)->counters[__counter_base] += 1; \
*(__bucket_base_p) += 1; \
else if(__value < 1025) \
(__rec_p)->counters[__counter_base+1] += 1; \
*(__bucket_base_p + 1) += 1; \
else if(__value < 10241) \
(__rec_p)->counters[__counter_base+2] += 1; \
*(__bucket_base_p + 2) += 1; \
else if(__value < 102401) \
(__rec_p)->counters[__counter_base+3] += 1; \
*(__bucket_base_p + 3) += 1; \
else if(__value < 1048577) \
(__rec_p)->counters[__counter_base+4] += 1; \
*(__bucket_base_p + 4) += 1; \
else if(__value < 4194305) \
(__rec_p)->counters[__counter_base+5] += 1; \
*(__bucket_base_p + 5) += 1; \
else if(__value < 10485761) \
(__rec_p)->counters[__counter_base+6] += 1; \
*(__bucket_base_p + 6) += 1; \
else if(__value < 104857601) \
(__rec_p)->counters[__counter_base+7] += 1; \
*(__bucket_base_p + 7) += 1; \
else if(__value < 1073741825) \
(__rec_p)->counters[__counter_base+8] += 1; \
*(__bucket_base_p + 8) += 1; \
else \
(__rec_p)->counters[__counter_base+9] += 1; \
*(__bucket_base_p + 9) += 1; \
} while(0)
/* potentially set or increment a common value counter, depending on the __count
* for the given __value
*
* NOTE: This macro is hardcoded to expect that Darshan will only track the 4
* most common (i.e., frequently occuring) values. __val_p is a pointer to the
* base of the value counters (i.e., the first of 4 contiguous common value
* counters) and __cnt_p is a pointer to the base of the count counters (i.e.
* the first of 4 contiguous common count counters). It is assumed your counters
* are stored as int64_t types.
*/
#define DARSHAN_COMMON_VAL_COUNTER_INC(__val_p, __cnt_p, __value, __count) do {\
int i; \
int set = 0; \
int64_t min = *(__cnt_p); \
int min_index = 0; \
if(__value == 0) break; \
for(i=0; i<4; i++) { \
/* increment bucket if already exists */ \
if(*(__val_p + i) == __value) { \
*(__cnt_p + i) += __count; \
set = 1; \
break; \
} \
/* otherwise find the least frequently used bucket */ \
else if(*(__cnt_p + i) < min) { \
min = *(__cnt_p + i); \
min_index = i; \
} \
} \
if(!set && (__count > min)) { \
*(__cnt_p + min_index) = __count; \
*(__val_p + min_index) = __value; \
} \
} while(0)
/* maximum number of common values that darshan will track per file at
* runtime; at shutdown time these will be reduced to the 4 most
* frequently occuring ones
*/
#define DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT 32
struct darshan_common_val_counter
{
int64_t val;
int freq;
};
/* i/o type (read or write) */
enum darshan_io_type
{
......@@ -118,6 +129,39 @@ enum darshan_io_type
* to absolute paths and filters out some potential noise in the
* path string.
*/
char* darshan_clean_file_path(const char* path);
char* darshan_clean_file_path(
const char* path);
/* darshan_common_val_counter()
*
* Potentially increment an existing common value counter or allocate
* a new one to keep track of commonly occuring values. Example use
* cases would be to track the most frequent access sizes or strides
* used by a specific module, for instance. 'common_val_root' is the
* root pointer for the tree which stores common value info,
* 'common_val_count' is a pointer to the number of nodes in the
* tree (i.e., the number of allocated common value counters), and
* 'val' is the new value to attempt to add.
*/
void darshan_common_val_counter(
void** common_val_root,
int* common_val_count,
int64_t val);
/* darshan_walk_common_vals()
*
* Walks the tree of common value counters and determines the 4 most
* frequently occuring values, storing the common values in the
* appropriate counter fields of the given record. 'common_val_root'
* is the root of the tree which stores the common value info, 'val_p'
* is a pointer to the base counter (i.e., the first) of the common
* values (which are assumed to be 4 total and contiguous in memory),
* and 'cnt_p' is a pointer to the base counter of the common counts
* (which are again expected to be contiguous in memory).
*/
void darshan_walk_common_vals(
void* common_val_root,
int64_t* val_p,
int64_t* cnt_p);
#endif /* __DARSHAN_COMMON_H */
......@@ -3,6 +3,8 @@
* See COPYRIGHT notice in top-level directory.
*
*/
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#include "darshan-runtime-config.h"
......@@ -10,9 +12,15 @@
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <search.h>
#include <assert.h>
#include "darshan.h"
static int darshan_common_val_compare(const void* a_p, const void* b_p);
static void darshan_common_val_walker(const void* nodep, const VISIT which,
const int depth);
char* darshan_clean_file_path(const char* path)
{
char* newpath = NULL;
......@@ -70,6 +78,101 @@ char* darshan_clean_file_path(const char* path)
return(newpath);
}
/* HACK: global variables for determining 4 most common values */
static int64_t* walker_val_p = NULL;
static int64_t* walker_cnt_p = NULL;
void darshan_common_val_counter(void **common_val_root, int *common_val_count,
int64_t val)
{
struct darshan_common_val_counter* counter;
struct darshan_common_val_counter* found;
struct darshan_common_val_counter tmp_counter;
void* tmp;
/* don't count any values of 0 */
if(val == 0)
return;
/* check to see if this val is already recorded */
tmp_counter.val = val;
tmp_counter.freq = 1;
tmp = tfind(&tmp_counter, common_val_root, darshan_common_val_compare);
if(tmp)
{
found = *(struct darshan_common_val_counter**)tmp;
found->freq++;
return;
}
/* we can add a new one as long as we haven't hit the limit */
if(*common_val_count < DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT)
{
counter = malloc(sizeof(*counter));
if(!counter)
{
return;
}
counter->val = val;
counter->freq = 1;
tmp = tsearch(counter, common_val_root, darshan_common_val_compare);
found = *(struct darshan_common_val_counter**)tmp;
/* if we get a new answer out here we are in trouble; this was
* already checked with the tfind()
*/
assert(found == counter);
(*common_val_count)++;
}
return;
}
void darshan_walk_common_vals(void *common_val_root, int64_t* val_p,
int64_t* cnt_p)
{
walker_val_p = val_p;
walker_cnt_p = cnt_p;
twalk(common_val_root, darshan_common_val_walker);
tdestroy(common_val_root, free);
return;
}
static void darshan_common_val_walker(const void* nodep, const VISIT which,
const int depth)
{
struct darshan_common_val_counter* counter;
switch (which)
{
case postorder:
case leaf:
counter = *(struct darshan_common_val_counter**)nodep;
DARSHAN_COMMON_VAL_COUNTER_INC(walker_val_p, walker_cnt_p,
counter->val, counter->freq);
default:
break;
}
return;
}
static int darshan_common_val_compare(const void* a_p, const void* b_p)
{
const struct darshan_common_val_counter* a = a_p;
const struct darshan_common_val_counter* b = b_p;
if(a->val < b->val)
return(-1);
if(a->val > b->val)
return(1);
return(0);
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -60,6 +60,8 @@ struct mpiio_file_runtime
double last_meta_end;
double last_read_end;
double last_write_end;
void *access_root;
int access_count;
UT_hash_handle hlink;
};
......@@ -141,20 +143,20 @@ static void mpiio_shutdown(void);
if(!file) break; \
DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size); \
size = size * __count; \
DARSHAN_BUCKET_INC(file->file_record, MPIIO_SIZE_READ_AGG_0_100, size); \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_BYTES_READ, size); \
DARSHAN_COUNTER_INC(file->file_record, __counter, 1); \
DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \
darshan_common_val_counter(&file->access_root, &file->access_count, size); \
file->file_record->counters[MPIIO_BYTES_READ] += size; \
file->file_record->counters[__counter] += 1; \
if(file->last_io_type == DARSHAN_IO_WRITE) \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_RW_SWITCHES, 1); \
file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \
file->last_io_type = DARSHAN_IO_READ; \
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, __tm1, __tm2, \
file->last_read_end, MPIIO_F_READ_TIME); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_READ_START_TIMESTAMP) == 0) \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_READ_START_TIMESTAMP, __tm1); \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_READ_END_TIMESTAMP, __tm2); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_MAX_READ_TIME) < __elapsed) { \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_MAX_READ_TIME, __elapsed); \
DARSHAN_COUNTER_SET(file->file_record, MPIIO_MAX_READ_TIME_SIZE, size); } \
if(file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0) \
file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \
file->file_record->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \
if(file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \
file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \
file->file_record->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \
DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_READ_TIME], __tm1, __tm2, file->last_read_end); \
} while(0)
#define MPIIO_RECORD_WRITE(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \
......@@ -162,25 +164,24 @@ static void mpiio_shutdown(void);
int size = 0; \
double __elapsed = __tm2-__tm1; \
if(__ret != MPI_SUCCESS) break; \
if(__ret != MPI_SUCCESS) break; \
file = mpiio_file_by_fh(__fh); \
if(!file) break; \
DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size); \
size = size * __count; \
DARSHAN_BUCKET_INC(file->file_record, MPIIO_SIZE_WRITE_AGG_0_100, size); \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_BYTES_WRITTEN, size); \
DARSHAN_COUNTER_INC(file->file_record, __counter, 1); \
DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \
darshan_common_val_counter(&file->access_root, &file->access_count, size); \
file->file_record->counters[MPIIO_BYTES_WRITTEN] += size; \
file->file_record->counters[__counter] += 1; \
if(file->last_io_type == DARSHAN_IO_READ) \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_RW_SWITCHES, 1); \
file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \
file->last_io_type = DARSHAN_IO_WRITE; \
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, __tm1, __tm2, \
file->last_write_end, MPIIO_F_WRITE_TIME); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_WRITE_START_TIMESTAMP) == 0) \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_WRITE_START_TIMESTAMP, __tm1); \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_WRITE_END_TIMESTAMP, __tm2); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_MAX_WRITE_TIME) < __elapsed) { \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_MAX_WRITE_TIME, __elapsed); \
DARSHAN_COUNTER_SET(file->file_record, MPIIO_MAX_WRITE_TIME_SIZE, size); } \
if(file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] == 0) \
file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \
file->file_record->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \
if(file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \
file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \
file->file_record->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \
DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_WRITE_TIME], __tm1, __tm2, file->last_write_end); \
} while(0)
/**********************************************************
......@@ -222,24 +223,25 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F
if(file)
{
file->file_record->rank = my_rank;
DARSHAN_COUNTER_SET(file->file_record, MPIIO_MODE, amode);
file->file_record->counters[MPIIO_MODE] = amode;
DARSHAN_MPI_CALL(PMPI_Comm_size)(comm, &comm_size);
if(comm_size == 1)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_INDEP_OPENS, 1);
file->file_record->counters[MPIIO_INDEP_OPENS] += 1;
}
else
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_COLL_OPENS, 1);
file->file_record->counters[MPIIO_COLL_OPENS] += 1;
}
if(info != MPI_INFO_NULL)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_HINTS, 1);
file->file_record->counters[MPIIO_HINTS] += 1;
}
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_OPEN_TIMESTAMP) == 0)
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_OPEN_TIMESTAMP, tm1);
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_meta_end, MPIIO_F_META_TIME);
if(file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0)
file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] = tm1;
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_META_TIME],
tm1, tm2, file->last_meta_end);
}
MPIIO_UNLOCK();
......@@ -738,9 +740,10 @@ int MPI_File_sync(MPI_File fh)
file = mpiio_file_by_fh(fh);
if(file)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_SYNCS, 1);
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_write_end, MPIIO_F_WRITE_TIME);
file->file_record->counters[MPIIO_SYNCS] += 1;
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_WRITE_TIME],
tm1, tm2, file->last_write_end);
}
MPIIO_UNLOCK();
}
......@@ -772,12 +775,13 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
file = mpiio_file_by_fh(fh);
if(file)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_VIEWS, 1);
file->file_record->counters[MPIIO_VIEWS] += 1;
if(info != MPI_INFO_NULL)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_HINTS, 1);
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_meta_end, MPIIO_F_META_TIME);
file->file_record->counters[MPIIO_HINTS] += 1;
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_META_TIME],
tm1, tm2, file->last_meta_end);
}
}
MPIIO_UNLOCK();
......@@ -802,10 +806,11 @@ int MPI_File_close(MPI_File *fh)
file = mpiio_file_by_fh(tmp_fh);
if(file)
{
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_CLOSE_TIMESTAMP,
darshan_core_wtime());
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_meta_end, MPIIO_F_META_TIME);
file->file_record->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
darshan_core_wtime();
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_META_TIME],
tm1, tm2, file->last_meta_end);
mpiio_file_close_fh(tmp_fh);
}
MPIIO_UNLOCK();
......@@ -1023,10 +1028,23 @@ static int mpiio_file_compare(const void* a_p, const void* b_p)
static void mpiio_begin_shutdown()
{
int i;
struct mpiio_file_runtime* tmp;
assert(mpiio_runtime);
MPIIO_LOCK();
instrumentation_disabled = 1;
/* go through and set the 4 most common access sizes for MPI-IO */
for(i = 0; i < mpiio_runtime->file_array_ndx; i++)
{
tmp = &(mpiio_runtime->file_runtime_array[i]);
darshan_walk_common_vals(tmp->access_root,
&(tmp->file_record->counters[MPIIO_ACCESS1_ACCESS]),
&(tmp->file_record->counters[MPIIO_ACCESS1_COUNT]));
}
MPIIO_UNLOCK();
return;
......
......@@ -139,13 +139,13 @@ static void null_shutdown(void);
rec = null_record_by_name(__name); \
if(!rec) break; \
/* increment counter indicating number of calls to 'bar' */ \
DARSHAN_COUNTER_INC(rec->record_p, NULL_BARS, 1); \
rec->record_p->counters[NULL_BARS] += 1; \
/* store data value for most recent call to 'bar' */ \
DARSHAN_COUNTER_SET(rec->record_p, NULL_BAR_DAT, __dat); \
rec->record_p->counters[NULL_BAR_DAT] = __dat; \
/* store timestamp of most recent call to 'bar' */ \
DARSHAN_COUNTER_F_SET(rec->record_p, NULL_F_BAR_TIMESTAMP, __tm1); \
rec->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \
/* store duration of most recent call to 'bar' */ \
DARSHAN_COUNTER_F_SET(rec->record_p, NULL_F_BAR_DURATION, elapsed); \
rec->record_p->fcounters[NULL_F_BAR_DURATION] = elapsed; \
} while(0)
/**********************************************************
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment