Commit 73541538 authored by Shane Snyder's avatar Shane Snyder

refactor and add common access counts to mpiio

parent 6406c872
......@@ -10,7 +10,6 @@
#include "darshan-log-format.h"
/* TODO: slowest/fastest rank (f)counters */
/* TODO: access size common counters */
/* TODO: maybe use a counter to track cases in which a derived datatype is used? */
#define MPIIO_COUNTERS \
......
......@@ -7,54 +7,19 @@
#ifndef __DARSHAN_COMMON_H
#define __DARSHAN_COMMON_H
/* simple macros for manipulating a module's counters
*
* NOTE: These macros assume a module's record stores integer
* and floating point counters in arrays, named counters and
* fcounters, respectively. __rec_p is the a pointer to the
* data record, __counter is the counter in question, and
* __value is the corresponding data value.
*/
#define DARSHAN_COUNTER_SET(__rec_p, __counter, __value) do{ \
(__rec_p)->counters[__counter] = __value; \
} while(0)
#define DARSHAN_COUNTER_F_SET(__rec_p, __counter, __value) do{ \
(__rec_p)->fcounters[__counter] = __value; \
} while(0)
#define DARSHAN_COUNTER_INC(__rec_p, __counter, __value) do{ \
(__rec_p)->counters[__counter] += __value; \
} while(0)
#define DARSHAN_COUNTER_F_INC(__rec_p, __counter, __value) do{ \
(__rec_p)->fcounters[__counter] += __value; \
} while(0)
#define DARSHAN_COUNTER_VALUE(__rec_p, __counter) \
((__rec_p)->counters[__counter])
#define DARSHAN_COUNTER_F_VALUE(__rec_p, __counter) \
((__rec_p)->fcounters[__counter])
/* set __counter equal to the max of __counter or the passed in __value */
#define DARSHAN_COUNTER_MAX(__rec_p, __counter, __value) do{ \
if((__rec_p)->counters[__counter] < __value) \
(__rec_p)->counters[__counter] = __value; \
} while(0)
/* increment a timer counter, making sure not to account for overlap
* with previous operations
*
* NOTE: __tm1 is the start timestamp of the operation, __tm2 is the end
* timestamp of the operation, and __last is the timestamp of the end of
* the previous I/O operation (which we don't want to overlap with).
* NOTE: __timer is the corresponding timer counter variable, __tm1 is
* the start timestamp of the operation, __tm2 is the end timestamp of
* the operation, and __last is the timestamp of the end of the previous
* I/O operation (which we don't want to overlap with).
*/
#define DARSHAN_COUNTER_F_INC_NO_OVERLAP(__rec_p, __tm1, __tm2, __last, __counter) do{ \
#define DARSHAN_TIMER_INC_NO_OVERLAP(__timer, __tm1, __tm2, __last) do{ \
if(__tm1 > __last) \
DARSHAN_COUNTER_F_INC(__rec_p, __counter, (__tm2 - __tm1)); \
__timer += (__tm2 - __tm1); \
else \
DARSHAN_COUNTER_F_INC(__rec_p, __counter, (__tm2 - __last)); \
__timer += (__tm2 - __last); \
if(__tm2 > __last) \
__last = __tm2; \
} while(0)
......@@ -63,7 +28,7 @@
*
* NOTE: This macro can be used to build a histogram of access
* sizes, offsets, etc. It assumes a 10-bucket histogram, with
* __counter_base representing the first counter in the sequence
* __bucket_base_p pointing to the first counter in the sequence
* of buckets (i.e., the smallest bucket). The size ranges of each
* bucket are:
* * 0 - 100 bytes
......@@ -77,29 +42,75 @@
* * 100 MiB - 1 GiB
* * 1 GiB+
*/
#define DARSHAN_BUCKET_INC(__rec_p, __counter_base, __value) do {\
#define DARSHAN_BUCKET_INC(__bucket_base_p, __value) do {\
if(__value < 101) \
(__rec_p)->counters[__counter_base] += 1; \
*(__bucket_base_p) += 1; \
else if(__value < 1025) \
(__rec_p)->counters[__counter_base+1] += 1; \
*(__bucket_base_p + 1) += 1; \
else if(__value < 10241) \
(__rec_p)->counters[__counter_base+2] += 1; \
*(__bucket_base_p + 2) += 1; \
else if(__value < 102401) \
(__rec_p)->counters[__counter_base+3] += 1; \
*(__bucket_base_p + 3) += 1; \
else if(__value < 1048577) \
(__rec_p)->counters[__counter_base+4] += 1; \
*(__bucket_base_p + 4) += 1; \
else if(__value < 4194305) \
(__rec_p)->counters[__counter_base+5] += 1; \
*(__bucket_base_p + 5) += 1; \
else if(__value < 10485761) \
(__rec_p)->counters[__counter_base+6] += 1; \
*(__bucket_base_p + 6) += 1; \
else if(__value < 104857601) \
(__rec_p)->counters[__counter_base+7] += 1; \
*(__bucket_base_p + 7) += 1; \
else if(__value < 1073741825) \
(__rec_p)->counters[__counter_base+8] += 1; \
*(__bucket_base_p + 8) += 1; \
else \
(__rec_p)->counters[__counter_base+9] += 1; \
*(__bucket_base_p + 9) += 1; \
} while(0)
/* potentially set or increment a common value counter, depending on the __count
* for the given __value
*
* NOTE: This macro is hardcoded to expect that Darshan will only track the 4
* most common (i.e., frequently occuring) values. __val_p is a pointer to the
* base of the value counters (i.e., the first of 4 contiguous common value
* counters) and __cnt_p is a pointer to the base of the count counters (i.e.
* the first of 4 contiguous common count counters). It is assumed your counters
* are stored as int64_t types.
*/
#define DARSHAN_COMMON_VAL_COUNTER_INC(__val_p, __cnt_p, __value, __count) do {\
int i; \
int set = 0; \
int64_t min = *(__cnt_p); \
int min_index = 0; \
if(__value == 0) break; \
for(i=0; i<4; i++) { \
/* increment bucket if already exists */ \
if(*(__val_p + i) == __value) { \
*(__cnt_p + i) += __count; \
set = 1; \
break; \
} \
/* otherwise find the least frequently used bucket */ \
else if(*(__cnt_p + i) < min) { \
min = *(__cnt_p + i); \
min_index = i; \
} \
} \
if(!set && (__count > min)) { \
*(__cnt_p + min_index) = __count; \
*(__val_p + min_index) = __value; \
} \
} while(0)
/* maximum number of common values that darshan will track per file at
* runtime; at shutdown time these will be reduced to the 4 most
* frequently occuring ones
*/
#define DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT 32
struct darshan_common_val_counter
{
int64_t val;
int freq;
};
/* i/o type (read or write) */
enum darshan_io_type
{
......@@ -118,6 +129,39 @@ enum darshan_io_type
* to absolute paths and filters out some potential noise in the
* path string.
*/
char* darshan_clean_file_path(const char* path);
char* darshan_clean_file_path(
const char* path);
/* darshan_common_val_counter()
*
* Potentially increment an existing common value counter or allocate
* a new one to keep track of commonly occuring values. Example use
* cases would be to track the most frequent access sizes or strides
* used by a specific module, for instance. 'common_val_root' is the
* root pointer for the tree which stores common value info,
* 'common_val_count' is a pointer to the number of nodes in the
* tree (i.e., the number of allocated common value counters), and
* 'val' is the new value to attempt to add.
*/
void darshan_common_val_counter(
void** common_val_root,
int* common_val_count,
int64_t val);
/* darshan_walk_common_vals()
*
* Walks the tree of common value counters and determines the 4 most
* frequently occuring values, storing the common values in the
* appropriate counter fields of the given record. 'common_val_root'
* is the root of the tree which stores the common value info, 'val_p'
* is a pointer to the base counter (i.e., the first) of the common
* values (which are assumed to be 4 total and contiguous in memory),
* and 'cnt_p' is a pointer to the base counter of the common counts
* (which are again expected to be contiguous in memory).
*/
void darshan_walk_common_vals(
void* common_val_root,
int64_t* val_p,
int64_t* cnt_p);
#endif /* __DARSHAN_COMMON_H */
......@@ -3,6 +3,8 @@
* See COPYRIGHT notice in top-level directory.
*
*/
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE
#include "darshan-runtime-config.h"
......@@ -10,9 +12,15 @@
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <search.h>
#include <assert.h>
#include "darshan.h"
static int darshan_common_val_compare(const void* a_p, const void* b_p);
static void darshan_common_val_walker(const void* nodep, const VISIT which,
const int depth);
char* darshan_clean_file_path(const char* path)
{
char* newpath = NULL;
......@@ -70,6 +78,101 @@ char* darshan_clean_file_path(const char* path)
return(newpath);
}
/* HACK: global variables for determining 4 most common values */
static int64_t* walker_val_p = NULL;
static int64_t* walker_cnt_p = NULL;
void darshan_common_val_counter(void **common_val_root, int *common_val_count,
int64_t val)
{
struct darshan_common_val_counter* counter;
struct darshan_common_val_counter* found;
struct darshan_common_val_counter tmp_counter;
void* tmp;
/* don't count any values of 0 */
if(val == 0)
return;
/* check to see if this val is already recorded */
tmp_counter.val = val;
tmp_counter.freq = 1;
tmp = tfind(&tmp_counter, common_val_root, darshan_common_val_compare);
if(tmp)
{
found = *(struct darshan_common_val_counter**)tmp;
found->freq++;
return;
}
/* we can add a new one as long as we haven't hit the limit */
if(*common_val_count < DARSHAN_COMMON_VAL_MAX_RUNTIME_COUNT)
{
counter = malloc(sizeof(*counter));
if(!counter)
{
return;
}
counter->val = val;
counter->freq = 1;
tmp = tsearch(counter, common_val_root, darshan_common_val_compare);
found = *(struct darshan_common_val_counter**)tmp;
/* if we get a new answer out here we are in trouble; this was
* already checked with the tfind()
*/
assert(found == counter);
(*common_val_count)++;
}
return;
}
void darshan_walk_common_vals(void *common_val_root, int64_t* val_p,
int64_t* cnt_p)
{
walker_val_p = val_p;
walker_cnt_p = cnt_p;
twalk(common_val_root, darshan_common_val_walker);
tdestroy(common_val_root, free);
return;
}
static void darshan_common_val_walker(const void* nodep, const VISIT which,
const int depth)
{
struct darshan_common_val_counter* counter;
switch (which)
{
case postorder:
case leaf:
counter = *(struct darshan_common_val_counter**)nodep;
DARSHAN_COMMON_VAL_COUNTER_INC(walker_val_p, walker_cnt_p,
counter->val, counter->freq);
default:
break;
}
return;
}
static int darshan_common_val_compare(const void* a_p, const void* b_p)
{
const struct darshan_common_val_counter* a = a_p;
const struct darshan_common_val_counter* b = b_p;
if(a->val < b->val)
return(-1);
if(a->val > b->val)
return(1);
return(0);
}
/*
* Local variables:
* c-indent-level: 4
......
......@@ -60,6 +60,8 @@ struct mpiio_file_runtime
double last_meta_end;
double last_read_end;
double last_write_end;
void *access_root;
int access_count;
UT_hash_handle hlink;
};
......@@ -141,20 +143,20 @@ static void mpiio_shutdown(void);
if(!file) break; \
DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size); \
size = size * __count; \
DARSHAN_BUCKET_INC(file->file_record, MPIIO_SIZE_READ_AGG_0_100, size); \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_BYTES_READ, size); \
DARSHAN_COUNTER_INC(file->file_record, __counter, 1); \
DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \
darshan_common_val_counter(&file->access_root, &file->access_count, size); \
file->file_record->counters[MPIIO_BYTES_READ] += size; \
file->file_record->counters[__counter] += 1; \
if(file->last_io_type == DARSHAN_IO_WRITE) \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_RW_SWITCHES, 1); \
file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \
file->last_io_type = DARSHAN_IO_READ; \
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, __tm1, __tm2, \
file->last_read_end, MPIIO_F_READ_TIME); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_READ_START_TIMESTAMP) == 0) \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_READ_START_TIMESTAMP, __tm1); \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_READ_END_TIMESTAMP, __tm2); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_MAX_READ_TIME) < __elapsed) { \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_MAX_READ_TIME, __elapsed); \
DARSHAN_COUNTER_SET(file->file_record, MPIIO_MAX_READ_TIME_SIZE, size); } \
if(file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0) \
file->file_record->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \
file->file_record->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \
if(file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \
file->file_record->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \
file->file_record->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \
DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_READ_TIME], __tm1, __tm2, file->last_read_end); \
} while(0)
#define MPIIO_RECORD_WRITE(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \
......@@ -162,25 +164,24 @@ static void mpiio_shutdown(void);
int size = 0; \
double __elapsed = __tm2-__tm1; \
if(__ret != MPI_SUCCESS) break; \
if(__ret != MPI_SUCCESS) break; \
file = mpiio_file_by_fh(__fh); \
if(!file) break; \
DARSHAN_MPI_CALL(PMPI_Type_size)(__datatype, &size); \
size = size * __count; \
DARSHAN_BUCKET_INC(file->file_record, MPIIO_SIZE_WRITE_AGG_0_100, size); \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_BYTES_WRITTEN, size); \
DARSHAN_COUNTER_INC(file->file_record, __counter, 1); \
DARSHAN_BUCKET_INC(&(file->file_record->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \
darshan_common_val_counter(&file->access_root, &file->access_count, size); \
file->file_record->counters[MPIIO_BYTES_WRITTEN] += size; \
file->file_record->counters[__counter] += 1; \
if(file->last_io_type == DARSHAN_IO_READ) \
DARSHAN_COUNTER_INC(file->file_record, MPIIO_RW_SWITCHES, 1); \
file->file_record->counters[MPIIO_RW_SWITCHES] += 1; \
file->last_io_type = DARSHAN_IO_WRITE; \
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, __tm1, __tm2, \
file->last_write_end, MPIIO_F_WRITE_TIME); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_WRITE_START_TIMESTAMP) == 0) \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_WRITE_START_TIMESTAMP, __tm1); \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_WRITE_END_TIMESTAMP, __tm2); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_MAX_WRITE_TIME) < __elapsed) { \
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_MAX_WRITE_TIME, __elapsed); \
DARSHAN_COUNTER_SET(file->file_record, MPIIO_MAX_WRITE_TIME_SIZE, size); } \
if(file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] == 0) \
file->file_record->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \
file->file_record->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \
if(file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \
file->file_record->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \
file->file_record->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \
DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[MPIIO_F_WRITE_TIME], __tm1, __tm2, file->last_write_end); \
} while(0)
/**********************************************************
......@@ -222,24 +223,25 @@ int MPI_File_open(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_F
if(file)
{
file->file_record->rank = my_rank;
DARSHAN_COUNTER_SET(file->file_record, MPIIO_MODE, amode);
file->file_record->counters[MPIIO_MODE] = amode;
DARSHAN_MPI_CALL(PMPI_Comm_size)(comm, &comm_size);
if(comm_size == 1)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_INDEP_OPENS, 1);
file->file_record->counters[MPIIO_INDEP_OPENS] += 1;
}
else
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_COLL_OPENS, 1);
file->file_record->counters[MPIIO_COLL_OPENS] += 1;
}
if(info != MPI_INFO_NULL)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_HINTS, 1);
file->file_record->counters[MPIIO_HINTS] += 1;
}
if(DARSHAN_COUNTER_F_VALUE(file->file_record, MPIIO_F_OPEN_TIMESTAMP) == 0)
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_OPEN_TIMESTAMP, tm1);
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_meta_end, MPIIO_F_META_TIME);
if(file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0)
file->file_record->fcounters[MPIIO_F_OPEN_TIMESTAMP] = tm1;
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_META_TIME],
tm1, tm2, file->last_meta_end);
}
MPIIO_UNLOCK();
......@@ -738,9 +740,10 @@ int MPI_File_sync(MPI_File fh)
file = mpiio_file_by_fh(fh);
if(file)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_SYNCS, 1);
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_write_end, MPIIO_F_WRITE_TIME);
file->file_record->counters[MPIIO_SYNCS] += 1;
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_WRITE_TIME],
tm1, tm2, file->last_write_end);
}
MPIIO_UNLOCK();
}
......@@ -772,12 +775,13 @@ int MPI_File_set_view(MPI_File fh, MPI_Offset disp, MPI_Datatype etype,
file = mpiio_file_by_fh(fh);
if(file)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_VIEWS, 1);
file->file_record->counters[MPIIO_VIEWS] += 1;
if(info != MPI_INFO_NULL)
{
DARSHAN_COUNTER_INC(file->file_record, MPIIO_HINTS, 1);
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_meta_end, MPIIO_F_META_TIME);
file->file_record->counters[MPIIO_HINTS] += 1;
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_META_TIME],
tm1, tm2, file->last_meta_end);
}
}
MPIIO_UNLOCK();
......@@ -802,10 +806,11 @@ int MPI_File_close(MPI_File *fh)
file = mpiio_file_by_fh(tmp_fh);
if(file)
{
DARSHAN_COUNTER_F_SET(file->file_record, MPIIO_F_CLOSE_TIMESTAMP,
darshan_core_wtime());
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, tm1, tm2,
file->last_meta_end, MPIIO_F_META_TIME);
file->file_record->fcounters[MPIIO_F_CLOSE_TIMESTAMP] =
darshan_core_wtime();
DARSHAN_TIMER_INC_NO_OVERLAP(
file->file_record->fcounters[MPIIO_F_META_TIME],
tm1, tm2, file->last_meta_end);
mpiio_file_close_fh(tmp_fh);
}
MPIIO_UNLOCK();
......@@ -1023,10 +1028,23 @@ static int mpiio_file_compare(const void* a_p, const void* b_p)
static void mpiio_begin_shutdown()
{
int i;
struct mpiio_file_runtime* tmp;
assert(mpiio_runtime);
MPIIO_LOCK();
instrumentation_disabled = 1;
/* go through and set the 4 most common access sizes for MPI-IO */
for(i = 0; i < mpiio_runtime->file_array_ndx; i++)
{
tmp = &(mpiio_runtime->file_runtime_array[i]);
darshan_walk_common_vals(tmp->access_root,
&(tmp->file_record->counters[MPIIO_ACCESS1_ACCESS]),
&(tmp->file_record->counters[MPIIO_ACCESS1_COUNT]));
}
MPIIO_UNLOCK();
return;
......
......@@ -139,13 +139,13 @@ static void null_shutdown(void);
rec = null_record_by_name(__name); \
if(!rec) break; \
/* increment counter indicating number of calls to 'bar' */ \
DARSHAN_COUNTER_INC(rec->record_p, NULL_BARS, 1); \
rec->record_p->counters[NULL_BARS] += 1; \
/* store data value for most recent call to 'bar' */ \
DARSHAN_COUNTER_SET(rec->record_p, NULL_BAR_DAT, __dat); \
rec->record_p->counters[NULL_BAR_DAT] = __dat; \
/* store timestamp of most recent call to 'bar' */ \
DARSHAN_COUNTER_F_SET(rec->record_p, NULL_F_BAR_TIMESTAMP, __tm1); \
rec->record_p->fcounters[NULL_F_BAR_TIMESTAMP] = __tm1; \
/* store duration of most recent call to 'bar' */ \
DARSHAN_COUNTER_F_SET(rec->record_p, NULL_F_BAR_DURATION, elapsed); \
rec->record_p->fcounters[NULL_F_BAR_DURATION] = elapsed; \
} while(0)
/**********************************************************
......
......@@ -85,24 +85,6 @@ DARSHAN_FORWARD_DECL(aio_return64, ssize_t, (struct aiocb64 *aiocbp));
DARSHAN_FORWARD_DECL(lio_listio, int, (int mode, struct aiocb *const aiocb_list[], int nitems, struct sigevent *sevp));
DARSHAN_FORWARD_DECL(lio_listio64, int, (int mode, struct aiocb64 *const aiocb_list[], int nitems, struct sigevent *sevp));
/* maximum number of access sizes and stride sizes that darshan will track
* per file at runtime; at log time they will be reduced into the 4 most
* frequently occurring ones
*/
#define POSIX_MAX_ACCESS_COUNT_RUNTIME 32
enum posix_counter_type
{
POSIX_COUNTER_ACCESS,
POSIX_COUNTER_STRIDE
};
struct posix_access_counter
{
int64_t size;
int freq;
};
/* struct to track information about aio operations in flight */
struct posix_aio_tracker
{
......@@ -204,22 +186,12 @@ static int instrumentation_disabled = 0;
static int my_rank = -1;
static int darshan_mem_alignment = 1;
/* global variables for determining 4 most common accesses/strides */
static struct posix_file_runtime* walker_file = NULL;
static int walker_validx;
static int walker_cntidx;
static void posix_runtime_initialize(void);
static struct posix_file_runtime* posix_file_by_name(const char *name);
static struct posix_file_runtime* posix_file_by_name_setfd(const char* name, int fd);
static struct posix_file_runtime* posix_file_by_fd(int fd);
static void posix_file_close_fd(int fd);
static void posix_access_counter(struct posix_file_runtime* file, ssize_t size,
enum posix_counter_type type);
static void posix_access_walker(const void* nodep, const VISIT which, const int depth);
static void posix_walk_file_accesses(void);
static int posix_access_compare(const void* a_p, const void* b_p);
static int posix_file_compare(const void* a, const void* b);
static int posix_record_compare(const void* a, const void* b);
static void posix_aio_tracker_add(int fd, void *aiocbp);
static struct posix_aio_tracker* posix_aio_tracker_del(int fd, void *aiocbp);
......@@ -234,31 +206,6 @@ static void posix_shutdown(void);
#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)
#define POSIX_COMMON_COUNTER_INC(__rec_p, __value, __count, __validx, __cntidx) do {\
int i; \
int set = 0; \
int64_t min = DARSHAN_COUNTER_VALUE(__rec_p, __cntidx); \
int min_index = 0; \
if(__value == 0) break; \
for(i=0; i<4; i++) { \
/* increment bucket if already exists */ \
if(DARSHAN_COUNTER_VALUE(__rec_p, __validx + i) == __value) { \
DARSHAN_COUNTER_INC(__rec_p, __cntidx + i, __count); \
set = 1; \
break; \
} \
/* otherwise find the least frequently used bucket */ \
else if(DARSHAN_COUNTER_VALUE(__rec_p, __cntidx + i) < min) { \
min = DARSHAN_COUNTER_VALUE(__rec_p, __cntidx + i); \
min_index = i; \
} \
} \
if(!set && (__count > min)) { \
DARSHAN_COUNTER_SET(__rec_p, __cntidx+min_index, __count); \
DARSHAN_COUNTER_SET(__rec_p, __validx+min_index, __value); \
} \
} while(0)
#define POSIX_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \
struct posix_file_runtime* file; \
char* exclude; \
......@@ -274,17 +221,17 @@ static void posix_shutdown(void);
if(!file) break; \
file->file_record->rank = my_rank; \
if(__mode) \
DARSHAN_COUNTER_SET(file->file_record, POSIX_MODE, __mode); \
file->file_record->counters[POSIX_MODE] = __mode; \
file->offset = 0; \
file->last_byte_written = 0; \
file->last_byte_read = 0; \
if(__stream_flag)\
DARSHAN_COUNTER_INC(file->file_record, POSIX_FOPENS, 1); \
file->file_record->counters[POSIX_FOPENS] += 1; \
else \
DARSHAN_COUNTER_INC(file->file_record, POSIX_OPENS, 1); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, POSIX_F_OPEN_TIMESTAMP) == 0) \
DARSHAN_COUNTER_F_SET(file->file_record, POSIX_F_OPEN_TIMESTAMP, __tm1); \
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, __tm1, __tm2, file->last_meta_end, POSIX_F_META_TIME); \
file->file_record->counters[POSIX_OPENS] += 1; \
if(file->file_record->fcounters[POSIX_F_OPEN_TIMESTAMP] == 0) \
file->file_record->fcounters[POSIX_F_OPEN_TIMESTAMP] = __tm1; \
DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[POSIX_F_META_TIME], __tm1, __tm2, file->last_meta_end); \
} while(0)
#define POSIX_RECORD_READ(__ret, __fd, __pread_flag, __pread_offset, __aligned, __stream_flag, __tm1, __tm2) do{ \
......@@ -301,9 +248,9 @@ static void posix_shutdown(void);
else \
this_offset = file->offset; \
if(this_offset > file->last_byte_read) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_SEQ_READS, 1); \
file->file_record->counters[POSIX_SEQ_READS] += 1; \
if(this_offset == (file->last_byte_read + 1)) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_CONSEC_READS, 1); \
file->file_record->counters[POSIX_CONSEC_READS] += 1; \
if(this_offset > 0 && this_offset > file->last_byte_read \
&& file->last_byte_read != 0) \
stride = this_offset - file->last_byte_read - 1; \
......@@ -311,30 +258,31 @@ static void posix_shutdown(void);
stride = 0; \
file->last_byte_read = this_offset + __ret - 1; \
file->offset = this_offset + __ret; \
DARSHAN_COUNTER_MAX(file->file_record, POSIX_MAX_BYTE_READ, (this_offset + __ret - 1)); \
DARSHAN_COUNTER_INC(file->file_record, POSIX_BYTES_READ, __ret); \
if(__stream_flag)\
DARSHAN_COUNTER_INC(file->file_record, POSIX_FREADS, 1); \
else\
DARSHAN_COUNTER_INC(file->file_record, POSIX_READS, 1); \
DARSHAN_BUCKET_INC(file->file_record, POSIX_SIZE_READ_0_100, __ret); \
posix_access_counter(file, __ret, POSIX_COUNTER_ACCESS); \
posix_access_counter(file, stride, POSIX_COUNTER_STRIDE); \
if(file->file_record->counters[POSIX_MAX_BYTE_READ] < (this_offset + __ret - 1)) \
file->file_record->counters[POSIX_MAX_BYTE_READ] = (this_offset + __ret - 1); \
file->file_record->counters[POSIX_BYTES_READ] += __ret; \
if(__stream_flag) \
file->file_record->counters[POSIX_FREADS] += 1; \
else \
file->file_record->counters[POSIX_READS] += 1; \
DARSHAN_BUCKET_INC(&(file->file_record->counters[POSIX_SIZE_READ_0_100]), __ret); \
darshan_common_val_counter(&file->access_root, &file->access_count, __ret); \
darshan_common_val_counter(&file->stride_root, &file->stride_count, stride); \
if(!__aligned) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_MEM_NOT_ALIGNED, 1); \
file_alignment = DARSHAN_COUNTER_VALUE(file->file_record, POSIX_FILE_ALIGNMENT); \
file->file_record->counters[POSIX_MEM_NOT_ALIGNED] += 1; \
file_alignment = file->file_record->counters[POSIX_FILE_ALIGNMENT]; \
if(file_alignment > 0 && (this_offset % file_alignment) != 0) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_FILE_NOT_ALIGNED, 1); \
file->file_record->counters[POSIX_FILE_NOT_ALIGNED] += 1; \
if(file->last_io_type == DARSHAN_IO_WRITE) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_RW_SWITCHES, 1); \
file->file_record->counters[POSIX_RW_SWITCHES] += 1; \
file->last_io_type = DARSHAN_IO_READ; \
DARSHAN_COUNTER_F_INC_NO_OVERLAP(file->file_record, __tm1, __tm2, file->last_read_end, POSIX_F_READ_TIME); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, POSIX_F_READ_START_TIMESTAMP) == 0) \
DARSHAN_COUNTER_F_SET(file->file_record, POSIX_F_READ_START_TIMESTAMP, __tm1); \
DARSHAN_COUNTER_F_SET(file->file_record, POSIX_F_READ_END_TIMESTAMP, __tm2); \
if(DARSHAN_COUNTER_F_VALUE(file->file_record, POSIX_F_MAX_READ_TIME) < __elapsed){ \
DARSHAN_COUNTER_F_SET(file->file_record, POSIX_F_MAX_READ_TIME, __elapsed); \
DARSHAN_COUNTER_SET(file->file_record, POSIX_MAX_READ_TIME_SIZE, __ret); } \
if(file->file_record->fcounters[POSIX_F_READ_START_TIMESTAMP] == 0) \
file->file_record->fcounters[POSIX_F_READ_START_TIMESTAMP] = __tm1; \
file->file_record->fcounters[POSIX_F_READ_END_TIMESTAMP] = __tm2; \
if(file->file_record->fcounters[POSIX_F_MAX_READ_TIME] < __elapsed) { \
file->file_record->fcounters[POSIX_F_MAX_READ_TIME] = __elapsed; \
file->file_record->counters[POSIX_MAX_READ_TIME_SIZE] = __ret; } \
DARSHAN_TIMER_INC_NO_OVERLAP(file->file_record->fcounters[POSIX_F_READ_TIME], __tm1, __tm2, file->last_read_end); \
} while(0)
#define POSIX_RECORD_WRITE(__ret, __fd, __pwrite_flag, __pwrite_offset, __aligned, __stream_flag, __tm1, __tm2) do{ \
......@@ -351,9 +299,9 @@ static void posix_shutdown(void);
else \
this_offset = file->offset; \
if(this_offset > file->last_byte_written) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_SEQ_WRITES, 1); \
file->file_record->counters[POSIX_SEQ_WRITES] += 1; \
if(this_offset == (file->last_byte_written + 1)) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_CONSEC_WRITES, 1); \
file->file_record->counters[POSIX_CONSEC_WRITES] += 1; \
if(this_offset > 0 && this_offset > file->last_byte_written \
&& file->last_byte_written != 0) \
stride = this_offset - file->last_byte_written - 1; \
......@@ -361,30 +309,31 @@ static void posix_shutdown(void);
stride = 0; \
file->last_byte_written = this_offset + __ret - 1; \
file->offset = this_offset + __ret; \
DARSHAN_COUNTER_MAX(file->file_record, POSIX_MAX_BYTE_WRITTEN, (this_offset + __ret - 1)); \
DARSHAN_COUNTER_INC(file->file_record, POSIX_BYTES_WRITTEN, __ret); \
if(file->file_record->counters[POSIX_MAX_BYTE_WRITTEN] < (this_offset + __ret - 1)) \
file->file_record->counters[POSIX_MAX_BYTE_WRITTEN] = (this_offset + __ret - 1); \
file->file_record->counters[POSIX_BYTES_WRITTEN] += __ret; \
if(__stream_flag) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_FWRITES, 1); \
file->file_record->counters[POSIX_FWRITES] += 1; \
else \
DARSHAN_COUNTER_INC(file->file_record, POSIX_WRITES, 1); \
DARSHAN_BUCKET_INC(file->file_record, POSIX_SIZE_WRITE_0_100, __ret); \
posix_access_counter(file, __ret, POSIX_COUNTER_ACCESS); \
posix_access_counter(file, stride, POSIX_COUNTER_STRIDE); \
file->file_record->counters[POSIX_WRITES] += 1; \
DARSHAN_BUCKET_INC(&(file->file_record->counters[POSIX_SIZE_WRITE_0_100]), __ret); \
darshan_common_val_counter(&file->access_root, &file->access_count, __ret); \
darshan_common_val_counter(&file->stride_root, &file->stride_count, stride); \
if(!__aligned) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_MEM_NOT_ALIGNED, 1); \
file_alignment = DARSHAN_COUNTER_VALUE(file->file_record, POSIX_FILE_ALIGNMENT); \
file->file_record->counters[POSIX_MEM_NOT_ALIGNED] += 1; \
file_alignment = file->file_record->counters[POSIX_FILE_ALIGNMENT]; \
if(file_alignment > 0 && (this_offset % file_alignment) != 0) \
DARSHAN_COUNTER_INC(file->file_record, POSIX_FILE_NOT_ALIGNED, 1); \
file->file_record->counters[POSIX_FILE_NOT_ALIGNED] += 1; \
if(file->last_io_type == DARSHAN_IO_READ) \