darshan-mpiio.c 59.6 KB
Newer Older
Philip Carns's avatar
Philip Carns committed
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
Philip Carns's avatar
Philip Carns committed
5 6
 */

7 8 9
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE

Philip Carns's avatar
Philip Carns committed
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#include "darshan-runtime-config.h"
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <errno.h>
#include <search.h>
#include <assert.h>
#include <pthread.h>

#include "darshan.h"
Shane Snyder's avatar
Shane Snyder committed
26
#include "darshan-dynamic.h"
Philip Carns's avatar
Philip Carns committed
27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
DARSHAN_FORWARD_DECL(MPI_File_close, int, (MPI_File *fh));
DARSHAN_FORWARD_DECL(MPI_File_iread_at, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
DARSHAN_FORWARD_DECL(MPI_File_iread, int, (MPI_File fh, void  *buf, int  count, MPI_Datatype  datatype, __D_MPI_REQUEST  *request));
DARSHAN_FORWARD_DECL(MPI_File_iread_shared, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_iwrite_at, int, (MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#else
DARSHAN_FORWARD_DECL(MPI_File_iwrite_at, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_iwrite, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#else
DARSHAN_FORWARD_DECL(MPI_File_iwrite, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_iwrite_shared, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#else
DARSHAN_FORWARD_DECL(MPI_File_iwrite_shared, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST *request));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_open, int, (MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh));
#else
DARSHAN_FORWARD_DECL(MPI_File_open, int, (MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_File *fh));
#endif
DARSHAN_FORWARD_DECL(MPI_File_read_all_begin, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype));
DARSHAN_FORWARD_DECL(MPI_File_read_all, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
DARSHAN_FORWARD_DECL(MPI_File_read_at_all, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
DARSHAN_FORWARD_DECL(MPI_File_read_at_all_begin, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype));
DARSHAN_FORWARD_DECL(MPI_File_read_at, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
DARSHAN_FORWARD_DECL(MPI_File_read, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
DARSHAN_FORWARD_DECL(MPI_File_read_ordered_begin, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype));
DARSHAN_FORWARD_DECL(MPI_File_read_ordered, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
DARSHAN_FORWARD_DECL(MPI_File_read_shared, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_set_view, int, (MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, const char *datarep, MPI_Info info));
#else
DARSHAN_FORWARD_DECL(MPI_File_set_view, int, (MPI_File fh, MPI_Offset disp, MPI_Datatype etype, MPI_Datatype filetype, char *datarep, MPI_Info info));
#endif
DARSHAN_FORWARD_DECL(MPI_File_sync, int, (MPI_File fh));
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_all_begin, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_all_begin, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_all, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_all, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_at_all_begin, int, (MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_at_all_begin, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_at_all, int, (MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_at_all, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_at, int, (MPI_File fh, MPI_Offset offset, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_at, int, (MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#else
DARSHAN_FORWARD_DECL(MPI_File_write, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_ordered_begin, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_ordered_begin, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_ordered, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_ordered, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#endif
#ifdef HAVE_MPIIO_CONST
DARSHAN_FORWARD_DECL(MPI_File_write_shared, int, (MPI_File fh, const void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#else
DARSHAN_FORWARD_DECL(MPI_File_write_shared, int, (MPI_File fh, void *buf, int count, MPI_Datatype datatype, MPI_Status *status));
#endif

113
/* The mpiio_file_record_ref structure maintains necessary runtime metadata
114
 * for the MPIIO file record (darshan_mpiio_file structure, defined in
115
 * darshan-mpiio-log-format.h) pointed to by 'file_rec'. This metadata
116 117 118 119 120 121
 * assists with the instrumenting of specific statistics in the file record.
 *
 * RATIONALE: the MPIIO module needs to track some stateful, volatile 
 * information about each open file (like the current file offset, most recent 
 * access time, etc.) to aid in instrumentation, but this information can't be
 * stored in the darshan_mpiio_file struct because we don't want it to appear in
122 123 124 125
 * the final darshan log file.  We therefore associate a mpiio_file_record_ref
 * struct with each darshan_mpiio_file struct in order to track this information
 * (i.e., the mapping between mpiio_file_record_ref structs to darshan_mpiio_file
 * structs is one-to-one).
126
 *
127 128 129 130 131 132 133
 * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to
 * associate different types of handles with this mpiio_file_record_ref struct.
 * This allows us to index this struct (and the underlying file record) by using
 * either the corresponding Darshan record identifier (derived from the filename)
 * or by a generated MPI file handle, for instance. So, while there should only
 * be a single Darshan record identifier that indexes a mpiio_file_record_ref,
 * there could be multiple open file handles that index it.
134
 */
135
struct mpiio_file_record_ref
Philip Carns's avatar
Philip Carns committed
136
{
137
    struct darshan_mpiio_file *file_rec;
Shane Snyder's avatar
Shane Snyder committed
138
    enum darshan_io_type last_io_type;
139 140 141
    double last_meta_end;
    double last_read_end;
    double last_write_end;
142 143
    void *access_root;
    int access_count;
Philip Carns's avatar
Philip Carns committed
144 145
};

146 147 148 149
/* The mpiio_runtime structure maintains necessary state for storing
 * MPI-IO file records and for coordinating with darshan-core at 
 * shutdown time.
 */
Philip Carns's avatar
Philip Carns committed
150 151
struct mpiio_runtime
{
152 153 154
    void *rec_id_hash;
    void *fh_hash;
    int file_rec_count;
Philip Carns's avatar
Philip Carns committed
155 156
};

157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
static void mpiio_runtime_initialize(
    void);
static struct mpiio_file_record_ref *mpiio_track_new_file_record(
    darshan_record_id rec_id, const char *path);
static void mpiio_finalize_file_records(
    void *rec_ref_p);
static void mpiio_record_reduction_op(
    void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype);
static void mpiio_shared_record_variance(
    MPI_Comm mod_comm, struct darshan_mpiio_file *inrec_array,
    struct darshan_mpiio_file *outrec_array, int shared_rec_count);
static void mpiio_cleanup_runtime(
    void);

static void mpiio_shutdown(
    MPI_Comm mod_comm, darshan_record_id *shared_recs,
    int shared_rec_count, void **mpiio_buf, int *mpiio_buf_sz);

175 176
/* extern DXT function defs */
extern void dxt_mpiio_write(darshan_record_id rec_id, int64_t length,
177
    double start_time, double end_time);
178
extern void dxt_mpiio_read(darshan_record_id rec_id, int64_t length,
179 180
    double start_time, double end_time);

Philip Carns's avatar
Philip Carns committed
181 182 183
static struct mpiio_runtime *mpiio_runtime = NULL;
static pthread_mutex_t mpiio_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static int my_rank = -1;
184
static int enable_dxt_io_trace = 0;
Philip Carns's avatar
Philip Carns committed
185

186 187 188
#define MPIIO_LOCK() pthread_mutex_lock(&mpiio_runtime_mutex)
#define MPIIO_UNLOCK() pthread_mutex_unlock(&mpiio_runtime_mutex)

189 190
#define MPIIO_PRE_RECORD() do { \
    MPIIO_LOCK(); \
191
    if(!darshan_core_disabled_instrumentation()) { \
192 193 194
        if(!mpiio_runtime) { \
            mpiio_runtime_initialize(); \
        } \
195
        if(mpiio_runtime) break; \
196
    } \
197 198
    MPIIO_UNLOCK(); \
    return(ret); \
199 200 201 202 203 204
} while(0)

#define MPIIO_POST_RECORD() do { \
    MPIIO_UNLOCK(); \
} while(0)

205
#define MPIIO_RECORD_OPEN(__ret, __path, __fh, __comm, __mode, __info, __tm1, __tm2) do { \
206 207 208
    darshan_record_id rec_id; \
    struct mpiio_file_record_ref *rec_ref; \
    char *newpath; \
209 210
    int comm_size; \
    if(__ret != MPI_SUCCESS) break; \
211 212 213 214 215 216 217 218
    newpath = darshan_clean_file_path(__path); \
    if(!newpath) newpath = (char *)__path; \
    if(darshan_core_excluded_path(newpath)) { \
        if(newpath != __path) free(newpath); \
        break; \
    } \
    rec_id = darshan_core_gen_record_id(newpath); \
    rec_ref = darshan_lookup_record_ref(mpiio_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
219
    if(!rec_ref) rec_ref = mpiio_track_new_file_record(rec_id, newpath); \
220 221 222
    if(!rec_ref) { \
        if(newpath != __path) free(newpath); \
        break; \
223
    } \
224
    rec_ref->file_rec->counters[MPIIO_MODE] = __mode; \
225
    PMPI_Comm_size(__comm, &comm_size); \
226
    if(comm_size == 1) \
227
        rec_ref->file_rec->counters[MPIIO_INDEP_OPENS] += 1; \
228
    else \
229
        rec_ref->file_rec->counters[MPIIO_COLL_OPENS] += 1; \
230
    if(__info != MPI_INFO_NULL) \
231 232 233 234 235 236 237 238
        rec_ref->file_rec->counters[MPIIO_HINTS] += 1; \
    if(rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] == 0 || \
     rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] > __tm1) \
        rec_ref->file_rec->fcounters[MPIIO_F_OPEN_TIMESTAMP] = __tm1; \
    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_META_TIME], \
        __tm1, __tm2, rec_ref->last_meta_end); \
    darshan_add_record_ref(&(mpiio_runtime->fh_hash), &__fh, sizeof(MPI_File), rec_ref); \
    if(newpath != __path) free(newpath); \
239 240
} while(0)

241
#define MPIIO_RECORD_READ(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \
242
    struct mpiio_file_record_ref *rec_ref; \
243
    int size = 0; \
Shane Snyder's avatar
Shane Snyder committed
244
    double __elapsed = __tm2-__tm1; \
245
    if(__ret != MPI_SUCCESS) break; \
246 247
    rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, &(__fh), sizeof(MPI_File)); \
    if(!rec_ref) break; \
248
    PMPI_Type_size(__datatype, &size);  \
249
    size = size * __count; \
250
    /* DXT to record detailed read tracing information */ \
251 252
    if(enable_dxt_io_trace) { \
        dxt_mpiio_read(rec_ref->file_rec->base_rec.id, size, __tm1, __tm2); \
253
    } \
254 255 256 257 258 259 260 261 262
    DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[MPIIO_SIZE_READ_AGG_0_100]), size); \
    darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, size, \
        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]), \
        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT])); \
    rec_ref->file_rec->counters[MPIIO_BYTES_READ] += size; \
    rec_ref->file_rec->counters[__counter] += 1; \
    if(rec_ref->last_io_type == DARSHAN_IO_WRITE) \
        rec_ref->file_rec->counters[MPIIO_RW_SWITCHES] += 1; \
    rec_ref->last_io_type = DARSHAN_IO_READ; \
263 264
    if(rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] == 0 || \
     rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] > __tm1) \
265 266 267 268 269 270 271
        rec_ref->file_rec->fcounters[MPIIO_F_READ_START_TIMESTAMP] = __tm1; \
    rec_ref->file_rec->fcounters[MPIIO_F_READ_END_TIMESTAMP] = __tm2; \
    if(rec_ref->file_rec->fcounters[MPIIO_F_MAX_READ_TIME] < __elapsed) { \
        rec_ref->file_rec->fcounters[MPIIO_F_MAX_READ_TIME] = __elapsed; \
        rec_ref->file_rec->counters[MPIIO_MAX_READ_TIME_SIZE] = size; } \
    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_READ_TIME], \
        __tm1, __tm2, rec_ref->last_read_end); \
272
} while(0)
273

274
#define MPIIO_RECORD_WRITE(__ret, __fh, __count, __datatype, __counter, __tm1, __tm2) do { \
275
    struct mpiio_file_record_ref *rec_ref; \
276
    int size = 0; \
Shane Snyder's avatar
Shane Snyder committed
277 278
    double __elapsed = __tm2-__tm1; \
    if(__ret != MPI_SUCCESS) break; \
279 280
    rec_ref = darshan_lookup_record_ref(mpiio_runtime->fh_hash, &(__fh), sizeof(MPI_File)); \
    if(!rec_ref) break; \
281
    PMPI_Type_size(__datatype, &size);  \
282
    size = size * __count; \
283
     /* DXT to record detailed write tracing information */ \
284 285
    if(enable_dxt_io_trace) { \
        dxt_mpiio_write(rec_ref->file_rec->base_rec.id, size, __tm1, __tm2); \
286
    } \
287 288 289 290 291 292 293 294 295
    DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[MPIIO_SIZE_WRITE_AGG_0_100]), size); \
    darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, size, \
        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_ACCESS]), \
        &(rec_ref->file_rec->counters[MPIIO_ACCESS1_COUNT])); \
    rec_ref->file_rec->counters[MPIIO_BYTES_WRITTEN] += size; \
    rec_ref->file_rec->counters[__counter] += 1; \
    if(rec_ref->last_io_type == DARSHAN_IO_READ) \
        rec_ref->file_rec->counters[MPIIO_RW_SWITCHES] += 1; \
    rec_ref->last_io_type = DARSHAN_IO_WRITE; \
296
    if(rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] == 0 || \
297
     rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] > __tm1) \
298 299 300 301 302 303 304
        rec_ref->file_rec->fcounters[MPIIO_F_WRITE_START_TIMESTAMP] = __tm1; \
    rec_ref->file_rec->fcounters[MPIIO_F_WRITE_END_TIMESTAMP] = __tm2; \
    if(rec_ref->file_rec->fcounters[MPIIO_F_MAX_WRITE_TIME] < __elapsed) { \
        rec_ref->file_rec->fcounters[MPIIO_F_MAX_WRITE_TIME] = __elapsed; \
        rec_ref->file_rec->counters[MPIIO_MAX_WRITE_TIME_SIZE] = size; } \
    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[MPIIO_F_WRITE_TIME], \
        __tm1, __tm2, rec_ref->last_write_end); \
305 306
} while(0)

307 308 309
/**********************************************************
 *        Wrappers for MPI-IO functions of interest       * 
 **********************************************************/
Philip Carns's avatar
Philip Carns committed
310

Philip Carns's avatar
Philip Carns committed
311
#ifdef HAVE_MPIIO_CONST
312 313
DARSHAN_PMPI_MAP(MPI_File_open, int,  (MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh), MPI_File_open(comm,filename,amode,info,fh))
int DARSHAN_DECL(MPI_File_open)(MPI_Comm comm, const char *filename, int amode, MPI_Info info, MPI_File *fh) 
Philip Carns's avatar
Philip Carns committed
314
#else
315 316
DARSHAN_PMPI_MAP(MPI_File_open, int,  (MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_File *fh), MPI_File_open(comm,filename,amode,info,fh)) 
int DARSHAN_DECL(MPI_File_open)(MPI_Comm comm, char *filename, int amode, MPI_Info info, MPI_File *fh) 
Philip Carns's avatar
Philip Carns committed
317 318 319
#endif
{
    int ret;
320
    MPI_File tmp_fh;
Philip Carns's avatar
Philip Carns committed
321 322 323
    char* tmp;
    double tm1, tm2;

324 325
    MAP_OR_FAIL(MPI_File_open);

Philip Carns's avatar
Philip Carns committed
326
    tm1 = darshan_core_wtime();
327
    ret = __real_MPI_File_open(comm, filename, amode, info, fh);
Philip Carns's avatar
Philip Carns committed
328 329
    tm2 = darshan_core_wtime();

330 331 332 333 334 335 336 337
    /* use ROMIO approach to strip prefix if present */
    /* strip off prefix if there is one, but only skip prefixes
     * if they are greater than length one to allow for windows
     * drive specifications (e.g. c:\...) 
     */
    tmp = strchr(filename, ':');
    if (tmp > filename + 1) {
        filename = tmp + 1;
Philip Carns's avatar
Philip Carns committed
338 339
    }

340 341 342 343 344
    MPIIO_PRE_RECORD();
    tmp_fh = *fh;
    MPIIO_RECORD_OPEN(ret, filename, tmp_fh, comm, amode, info, tm1, tm2);
    MPIIO_POST_RECORD();

Philip Carns's avatar
Philip Carns committed
345 346 347
    return(ret);
}

348
DARSHAN_PMPI_MAP(MPI_File_read, int, (MPI_File fh, void *buf, int count,
349 350
    MPI_Datatype datatype, MPI_Status *status), MPI_File_read(fh,buf,count,datatype,status))

351
int DARSHAN_DECL(MPI_File_read)(MPI_File fh, void *buf, int count,
352 353 354 355 356
    MPI_Datatype datatype, MPI_Status *status)
{
    int ret;
    double tm1, tm2;

357 358
    MAP_OR_FAIL(MPI_File_read);

359
    tm1 = darshan_core_wtime();
360
    ret = __real_MPI_File_read(fh, buf, count, datatype, status);
361 362
    tm2 = darshan_core_wtime();

363
    MPIIO_PRE_RECORD();
364
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2);
365 366
    MPIIO_POST_RECORD();

367 368 369 370
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
371
DARSHAN_PMPI_MAP(MPI_File_write, int, (MPI_File fh, const void *buf, int count,
372 373
    MPI_Datatype datatype, MPI_Status *status), MPI_File_write(fh,buf,count,datatype,status))

374
int DARSHAN_DECL(MPI_File_write)(MPI_File fh, const void *buf, int count,
375 376
    MPI_Datatype datatype, MPI_Status *status)
#else
377
DARSHAN_PMPI_MAP(MPI_File_write, int, (MPI_File fh, void *buf, int count,
378 379
    MPI_Datatype datatype, MPI_Status *status), MPI_File_write(fh,buf,count,datatype,status))

380
int DARSHAN_DECL(MPI_File_write)(MPI_File fh, void *buf, int count,
381 382 383 384 385 386
    MPI_Datatype datatype, MPI_Status *status)
#endif
{
    int ret;
    double tm1, tm2;

387 388
    MAP_OR_FAIL(MPI_File_write);

389
    tm1 = darshan_core_wtime();
390
    ret = __real_MPI_File_write(fh, buf, count, datatype, status);
391 392
    tm2 = darshan_core_wtime();

393
    MPIIO_PRE_RECORD();
394
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2);
395 396
    MPIIO_POST_RECORD();

397 398 399
    return(ret);
}

400
DARSHAN_PMPI_MAP(MPI_File_read_at, int, (MPI_File fh, MPI_Offset offset, void *buf,
401 402
    int count, MPI_Datatype datatype, MPI_Status *status), MPI_File_read_at(fh, offset, buf, count, datatype, status))

403
int DARSHAN_DECL(MPI_File_read_at)(MPI_File fh, MPI_Offset offset, void *buf,
404 405 406 407 408
    int count, MPI_Datatype datatype, MPI_Status *status)
{
    int ret;
    double tm1, tm2;

409 410
    MAP_OR_FAIL(MPI_File_read_at);

411
    tm1 = darshan_core_wtime();
412
    ret = __real_MPI_File_read_at(fh, offset, buf,
413 414 415
        count, datatype, status);
    tm2 = darshan_core_wtime();

416
    MPIIO_PRE_RECORD();
417
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2);
418 419
    MPIIO_POST_RECORD();

420 421 422 423
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
424
DARSHAN_PMPI_MAP(MPI_File_write_at, int, (MPI_File fh, MPI_Offset offset, const void *buf,
425 426
    int count, MPI_Datatype datatype, MPI_Status *status), MPI_File_write_at(fh, offset, buf, count, datatype, status))

427
int DARSHAN_DECL(MPI_File_write_at)(MPI_File fh, MPI_Offset offset, const void *buf,
428 429
    int count, MPI_Datatype datatype, MPI_Status *status)
#else
430
DARSHAN_PMPI_MAP(MPI_File_write_at, int, (MPI_File fh, MPI_Offset offset, void *buf,
431 432
    int count, MPI_Datatype datatype, MPI_Status *status), MPI_File_write_at(fh, offset, buf, count, datatype, status))

433
int DARSHAN_DECL(MPI_File_write_at)(MPI_File fh, MPI_Offset offset, void *buf,
434 435 436 437 438 439
    int count, MPI_Datatype datatype, MPI_Status *status)
#endif
{
    int ret;
    double tm1, tm2;

440 441
    MAP_OR_FAIL(MPI_File_write_at);

442
    tm1 = darshan_core_wtime();
443
    ret = __real_MPI_File_write_at(fh, offset, buf,
444 445 446
        count, datatype, status);
    tm2 = darshan_core_wtime();

447
    MPIIO_PRE_RECORD();
448
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2);
449 450
    MPIIO_POST_RECORD();

451 452 453
    return(ret);
}

454
DARSHAN_PMPI_MAP(MPI_File_read_all, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
455 456
        MPI_File_read_all(fh,buf,count,datatype,status))

457
int DARSHAN_DECL(MPI_File_read_all)(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status)
458 459 460 461
{
    int ret;
    double tm1, tm2;

462 463
    MAP_OR_FAIL(MPI_File_write_at);

464
    tm1 = darshan_core_wtime();
465
    ret = __real_MPI_File_read_all(fh, buf, count,
466 467 468
        datatype, status);
    tm2 = darshan_core_wtime();

469
    MPIIO_PRE_RECORD();
470
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2);
471 472
    MPIIO_POST_RECORD();

473 474 475 476
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
477
DARSHAN_PMPI_MAP(MPI_File_write_all, int, (MPI_File fh, const void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
478 479
        MPI_File_write_all(fh, buf, count, datatype, status))

480
int DARSHAN_DECL(MPI_File_write_all)(MPI_File fh, const void * buf, int count, MPI_Datatype datatype, MPI_Status *status)
481
#else
482
DARSHAN_PMPI_MAP(MPI_File_write_all, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
483
        MPI_File_write_all(fh, buf, count, datatype, status))
484
DARSHAN_PMPI_MAP(PMPI_File_write_all, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
485 486
        MPI_File_write_all(fh, buf, count, datatype, status))

487
int DARSHAN_DECL(MPI_File_write_all)(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status)
488 489 490 491 492
#endif
{
    int ret;
    double tm1, tm2;

493 494
    MAP_OR_FAIL(MPI_File_write_all);

495
    tm1 = darshan_core_wtime();
496
    ret = __real_MPI_File_write_all(fh, buf, count,
497 498 499
        datatype, status);
    tm2 = darshan_core_wtime();

500
    MPIIO_PRE_RECORD();
501
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2);
502 503
    MPIIO_POST_RECORD();

504 505 506
    return(ret);
}

507
DARSHAN_PMPI_MAP(MPI_File_read_at_all, int, (MPI_File fh, MPI_Offset offset, void * buf,
508 509 510
    int count, MPI_Datatype datatype, MPI_Status * status),
        MPI_File_read_at_all(fh,offset,buf,count,datatype,status))

511
int DARSHAN_DECL(MPI_File_read_at_all)(MPI_File fh, MPI_Offset offset, void * buf,
512 513 514 515 516
    int count, MPI_Datatype datatype, MPI_Status * status)
{
    int ret;
    double tm1, tm2;

517 518
    MAP_OR_FAIL(MPI_File_read_at_all);

519
    tm1 = darshan_core_wtime();
520
    ret = __real_MPI_File_read_at_all(fh, offset, buf,
521 522 523
        count, datatype, status);
    tm2 = darshan_core_wtime();

524
    MPIIO_PRE_RECORD();
525
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2);
526 527
    MPIIO_POST_RECORD();

528 529 530 531
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
532
DARSHAN_PMPI_MAP(MPI_File_write_at_all, int, (MPI_File fh, MPI_Offset offset, const void * buf,
533 534 535
    int count, MPI_Datatype datatype, MPI_Status * status),
        MPI_File_write_at_all(fh, offset, buf, count, datatype, status))

536
int DARSHAN_DECL(MPI_File_write_at_all)(MPI_File fh, MPI_Offset offset, const void * buf,
537 538
    int count, MPI_Datatype datatype, MPI_Status * status)
#else
539
DARSHAN_PMPI_MAP(MPI_File_write_at_all, int, (MPI_File fh, MPI_Offset offset, void * buf,
540 541 542
    int count, MPI_Datatype datatype, MPI_Status * status),
        MPI_File_write_at_all(fh, offset, buf, count, datatype, status))

543
int DARSHAN_DECL(MPI_File_write_at_all)(MPI_File fh, MPI_Offset offset, void * buf,
544 545 546 547 548 549
    int count, MPI_Datatype datatype, MPI_Status * status)
#endif
{
    int ret;
    double tm1, tm2;

550 551
    MAP_OR_FAIL(MPI_File_write_at_all);

552
    tm1 = darshan_core_wtime();
553
    ret = __real_MPI_File_write_at_all(fh, offset, buf,
554 555 556
        count, datatype, status);
    tm2 = darshan_core_wtime();

557
    MPIIO_PRE_RECORD();
558
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2);
559 560
    MPIIO_POST_RECORD();

561 562 563
    return(ret);
}

564

565
DARSHAN_PMPI_MAP(MPI_File_read_shared, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
566 567
        MPI_File_read_shared(fh, buf, count, datatype, status))

568
int DARSHAN_DECL(MPI_File_read_shared)(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status)
569 570 571 572
{
    int ret;
    double tm1, tm2;

573 574
    MAP_OR_FAIL(MPI_File_read_shared);

575
    tm1 = darshan_core_wtime();
576
    ret = __real_MPI_File_read_shared(fh, buf, count,
577 578 579
        datatype, status);
    tm2 = darshan_core_wtime();

580
    MPIIO_PRE_RECORD();
581
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_INDEP_READS, tm1, tm2);
582 583
    MPIIO_POST_RECORD();

584 585 586 587
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
588
DARSHAN_PMPI_MAP(MPI_File_write_shared, int, (MPI_File fh, const void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
589 590
        MPI_File_write_shared(fh, buf, count, datatype, status))

591
int DARSHAN_DECL(MPI_File_write_shared)(MPI_File fh, const void * buf, int count, MPI_Datatype datatype, MPI_Status *status)
592
#else
593
DARSHAN_PMPI_MAP(MPI_File_write_shared, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status),
594 595
        MPI_File_write_shared(fh, buf, count, datatype, status))

596
int DARSHAN_DECL(MPI_File_write_shared)(MPI_File fh, void * buf, int count, MPI_Datatype datatype, MPI_Status *status)
597 598 599 600 601
#endif
{
    int ret;
    double tm1, tm2;

602 603
    MAP_OR_FAIL(MPI_File_write_shared);

604
    tm1 = darshan_core_wtime();
605
    ret = __real_MPI_File_write_shared(fh, buf, count,
606 607 608
        datatype, status);
    tm2 = darshan_core_wtime();

609
    MPIIO_PRE_RECORD();
610
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_INDEP_WRITES, tm1, tm2);
611 612
    MPIIO_POST_RECORD();

613 614 615
    return(ret);
}

616
DARSHAN_PMPI_MAP(MPI_File_read_ordered, int, (MPI_File fh, void * buf, int count,
617 618 619
    MPI_Datatype datatype, MPI_Status * status),
        MPI_File_read_ordered(fh, buf, count, datatype, status))

620
int DARSHAN_DECL(MPI_File_read_ordered)(MPI_File fh, void * buf, int count,
621 622 623 624 625
    MPI_Datatype datatype, MPI_Status * status)
{
    int ret;
    double tm1, tm2;

626 627
    MAP_OR_FAIL(MPI_File_read_ordered);

628
    tm1 = darshan_core_wtime();
629
    ret = __real_MPI_File_read_ordered(fh, buf, count,
630 631 632
        datatype, status);
    tm2 = darshan_core_wtime();

633
    MPIIO_PRE_RECORD();
634
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_COLL_READS, tm1, tm2);
635 636
    MPIIO_POST_RECORD();

637 638 639 640
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
641
DARSHAN_PMPI_MAP(MPI_File_write_ordered, int, (MPI_File fh, const void * buf, int count,
642 643 644
    MPI_Datatype datatype, MPI_Status * status),
        MPI_File_write_ordered(fh, buf, count, datatype, status))

645
int DARSHAN_DECL(MPI_File_write_ordered)(MPI_File fh, const void * buf, int count,
646 647
    MPI_Datatype datatype, MPI_Status * status)
#else
648
DARSHAN_PMPI_MAP(MPI_File_write_ordered, int, (MPI_File fh, void * buf, int count,
649 650 651
    MPI_Datatype datatype, MPI_Status * status),
        MPI_File_write_ordered(fh, buf, count, datatype, status))

652
int DARSHAN_DECL(MPI_File_write_ordered)(MPI_File fh, void * buf, int count,
653 654 655 656 657 658
    MPI_Datatype datatype, MPI_Status * status)
#endif
{
    int ret;
    double tm1, tm2;

659 660
    MAP_OR_FAIL(MPI_File_write_ordered);

661
    tm1 = darshan_core_wtime();
662
    ret = __real_MPI_File_write_ordered(fh, buf, count,
663 664 665
         datatype, status);
    tm2 = darshan_core_wtime();

666
    MPIIO_PRE_RECORD();
667
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_COLL_WRITES, tm1, tm2);
668 669
    MPIIO_POST_RECORD();

670 671 672
    return(ret);
}

673

674
DARSHAN_PMPI_MAP(MPI_File_read_all_begin, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype),
675
        MPI_File_read_all_begin(fh, buf, count, datatype))
676
int DARSHAN_DECL(MPI_File_read_all_begin)(MPI_File fh, void * buf, int count, MPI_Datatype datatype)
677 678 679 680
{
    int ret;
    double tm1, tm2;

681 682
    MAP_OR_FAIL(MPI_File_read_all_begin);

683
    tm1 = darshan_core_wtime();
684
    ret = __real_MPI_File_read_all_begin(fh, buf, count, datatype);
685 686
    tm2 = darshan_core_wtime();

687
    MPIIO_PRE_RECORD();
688
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2);
689 690
    MPIIO_POST_RECORD();

691 692 693 694
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
695
DARSHAN_PMPI_MAP(MPI_File_write_all_begin, int, (MPI_File fh, const void * buf, int count, MPI_Datatype datatype),
696
        MPI_File_write_all_begin(fh, buf, count, datatype))
697
int DARSHAN_DECL(MPI_File_write_all_begin)(MPI_File fh, const void * buf, int count, MPI_Datatype datatype)
698
#else
699
DARSHAN_PMPI_MAP(MPI_File_write_all_begin, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype),
700
        MPI_File_write_all_begin(fh, buf, count, datatype))
701
int DARSHAN_DECL(MPI_File_write_all_begin)(MPI_File fh, void * buf, int count, MPI_Datatype datatype)
702 703 704 705 706
#endif
{
    int ret;
    double tm1, tm2;

707 708
    MAP_OR_FAIL(MPI_File_write_all_begin);

709
    tm1 = darshan_core_wtime();
710
    ret = __real_MPI_File_write_all_begin(fh, buf, count, datatype);
711 712
    tm2 = darshan_core_wtime();

713
    MPIIO_PRE_RECORD();
714
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2);
715 716
    MPIIO_POST_RECORD();

717 718 719
    return(ret);
}

720
DARSHAN_PMPI_MAP(MPI_File_read_at_all_begin, int, (MPI_File fh, MPI_Offset offset, void * buf,
721 722
    int count, MPI_Datatype datatype), MPI_File_read_at_all_begin(fh, offset, buf, count,
        datatype))
723
int DARSHAN_DECL(MPI_File_read_at_all_begin)(MPI_File fh, MPI_Offset offset, void * buf,
724 725 726 727 728
    int count, MPI_Datatype datatype)
{
    int ret;
    double tm1, tm2;

729 730
    MAP_OR_FAIL(MPI_File_read_at_all_begin);

731
    tm1 = darshan_core_wtime();
732
    ret = __real_MPI_File_read_at_all_begin(fh, offset, buf,
733 734 735
        count, datatype);
    tm2 = darshan_core_wtime();
    
736
    MPIIO_PRE_RECORD();
737
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2);
738 739
    MPIIO_POST_RECORD();

740 741 742 743
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
744
DARSHAN_PMPI_MAP(MPI_File_write_at_all_begin, int, (MPI_File fh, MPI_Offset offset, const void * buf,
745 746
    int count, MPI_Datatype datatype), MPI_File_write_at_all_begin( fh, offset, buf, count, datatype))

747
int DARSHAN_DECL(MPI_File_write_at_all_begin)(MPI_File fh, MPI_Offset offset, const void * buf,
748 749
    int count, MPI_Datatype datatype)
#else
750
DARSHAN_PMPI_MAP(MPI_File_write_at_all_begin, int, (MPI_File fh, MPI_Offset offset, void * buf,
751 752
    int count, MPI_Datatype datatype), MPI_File_write_at_all_begin( fh, offset, buf, count, datatype))

753
int DARSHAN_DECL(MPI_File_write_at_all_begin)(MPI_File fh, MPI_Offset offset, void * buf,
754 755 756 757 758 759
    int count, MPI_Datatype datatype)
#endif
{
    int ret;
    double tm1, tm2;

760 761
    MAP_OR_FAIL(MPI_File_write_at_all_begin);

762
    tm1 = darshan_core_wtime();
763
    ret = __real_MPI_File_write_at_all_begin(fh, offset,
764 765 766
        buf, count, datatype);
    tm2 = darshan_core_wtime();

767
    MPIIO_PRE_RECORD();
768
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2);
769 770
    MPIIO_POST_RECORD();

771 772 773
    return(ret);
}

774

775
DARSHAN_PMPI_MAP(MPI_File_read_ordered_begin, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype),
776
        MPI_File_read_ordered_begin(fh, buf, count, datatype))
777
int DARSHAN_DECL(MPI_File_read_ordered_begin)(MPI_File fh, void * buf, int count, MPI_Datatype datatype)
778 779 780 781
{
    int ret;
    double tm1, tm2;

782 783
    MAP_OR_FAIL(MPI_File_read_ordered_begin);

784
    tm1 = darshan_core_wtime();
785
    ret = __real_MPI_File_read_ordered_begin(fh, buf, count,
786 787 788
        datatype);
    tm2 = darshan_core_wtime();

789
    MPIIO_PRE_RECORD();
790
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_SPLIT_READS, tm1, tm2);
791 792
    MPIIO_POST_RECORD();

793 794 795 796
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
797
DARSHAN_PMPI_MAP(MPI_File_write_ordered_begin, int, (MPI_File fh, const void * buf, int count, MPI_Datatype datatype),
798
        MPI_File_write_ordered_begin(fh, buf, count, datatype))
799
int DARSHAN_DECL(MPI_File_write_ordered_begin)(MPI_File fh, const void * buf, int count, MPI_Datatype datatype)
800
#else
801
int DARSHAN_DECL(MPI_File_write_ordered_begin)(MPI_File fh, void * buf, int count, MPI_Datatype datatype)
802 803 804 805 806
#endif
{
    int ret;
    double tm1, tm2;

807 808
    MAP_OR_FAIL(MPI_File_write_ordered_begin);

809
    tm1 = darshan_core_wtime();
810
    ret = __real_MPI_File_write_ordered_begin(fh, buf, count,
811 812 813
        datatype);
    tm2 = darshan_core_wtime();

814
    MPIIO_PRE_RECORD();
815
    MPIIO_RECORD_WRITE(ret, fh, count, datatype, MPIIO_SPLIT_WRITES, tm1, tm2);
816 817
    MPIIO_POST_RECORD();

818 819 820
    return(ret);
}

821
DARSHAN_PMPI_MAP(MPI_File_iread, int, (MPI_File fh, void * buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST * request),
822
        MPI_File_iread(fh, buf, count, datatype, request))
823
int DARSHAN_DECL(MPI_File_iread)(MPI_File fh, void * buf, int count, MPI_Datatype datatype, __D_MPI_REQUEST * request)
824 825 826 827
{
    int ret;
    double tm1, tm2;

828 829
    MAP_OR_FAIL(MPI_File_iread);

830
    tm1 = darshan_core_wtime();
831
    ret = __real_MPI_File_iread(fh, buf, count, datatype, request);
832 833
    tm2 = darshan_core_wtime();

834
    MPIIO_PRE_RECORD();
835
    MPIIO_RECORD_READ(ret, fh, count, datatype, MPIIO_NB_READS, tm1, tm2);
836 837
    MPIIO_POST_RECORD();

838 839 840 841
    return(ret);
}

#ifdef HAVE_MPIIO_CONST
842
DARSHAN_PMPI_MAP(MPI_File_iwrite, int, (MPI_File fh, const void * buf, int count,
843 844 845
    MPI_Datatype datatype, __D_MPI_REQUEST * request),
        MPI_File_iwrite(fh, buf, count, datatype, request))

846
int DARSHAN_DECL(MPI_File_iwrite)(MPI_File fh, const void * buf, int count,
847 848
    MPI_Datatype datatype, __D_MPI_REQUEST * request)
#else
849
DARSHAN_PMPI_MAP(MPI_File_iwrite, int, (MPI_File fh, void * buf, int count,
850 851 852
    MPI_Datatype datatype, __D_MPI_REQUEST * request),
        MPI_File_iwrite(fh, buf, count, datatype, request))

853
int DARSHAN_DECL(MPI_File_iwrite)(MPI_File fh, void * buf, int count,
854 855 856 857 858 859
    MPI_Datatype datatype, __D_MPI_REQUEST * request)
#endif
{
    int ret;
    double tm1, tm2;