darshan-posix.c 58.8 KB
Newer Older
1
/*
Shane Snyder's avatar
Shane Snyder committed
2 3 4
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
5 6
 */

7 8 9
#define _XOPEN_SOURCE 500
#define _GNU_SOURCE

10
#include "darshan-runtime-config.h"
11 12 13 14 15 16 17 18 19 20 21 22 23 24
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <search.h>
#include <assert.h>
25
#include <libgen.h>
26
#include <aio.h>
27
#include <pthread.h>
28

29
#include "utlist.h"
30
#include "darshan.h"
31
#include "darshan-dynamic.h"
32

33
#ifndef HAVE_OFF64_T
34 35
typedef int64_t off64_t;
#endif
36 37 38
#ifndef HAVE_AIOCB64
#define aiocb64 aiocb
#endif
39

40

41 42
DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
DARSHAN_FORWARD_DECL(open64, int, (const char *path, int flags, ...));
43 44
DARSHAN_FORWARD_DECL(creat, int, (const char* path, mode_t mode));
DARSHAN_FORWARD_DECL(creat64, int, (const char* path, mode_t mode));
45 46 47 48
DARSHAN_FORWARD_DECL(mkstemp, int, (char *template));
DARSHAN_FORWARD_DECL(mkostemp, int, (char *template, int flags));
DARSHAN_FORWARD_DECL(mkstemps, int, (char *template, int suffixlen));
DARSHAN_FORWARD_DECL(mkostemps, int, (char *template, int suffixlen, int flags));
49 50 51 52 53 54 55 56
DARSHAN_FORWARD_DECL(read, ssize_t, (int fd, void *buf, size_t count));
DARSHAN_FORWARD_DECL(write, ssize_t, (int fd, const void *buf, size_t count));
DARSHAN_FORWARD_DECL(pread, ssize_t, (int fd, void *buf, size_t count, off_t offset));
DARSHAN_FORWARD_DECL(pwrite, ssize_t, (int fd, const void *buf, size_t count, off_t offset));
DARSHAN_FORWARD_DECL(pread64, ssize_t, (int fd, void *buf, size_t count, off64_t offset));
DARSHAN_FORWARD_DECL(pwrite64, ssize_t, (int fd, const void *buf, size_t count, off64_t offset));
DARSHAN_FORWARD_DECL(readv, ssize_t, (int fd, const struct iovec *iov, int iovcnt));
DARSHAN_FORWARD_DECL(writev, ssize_t, (int fd, const struct iovec *iov, int iovcnt));
57 58
DARSHAN_FORWARD_DECL(lseek, off_t, (int fd, off_t offset, int whence));
DARSHAN_FORWARD_DECL(lseek64, off64_t, (int fd, off64_t offset, int whence));
59 60 61 62 63 64
DARSHAN_FORWARD_DECL(__xstat, int, (int vers, const char* path, struct stat *buf));
DARSHAN_FORWARD_DECL(__xstat64, int, (int vers, const char* path, struct stat64 *buf));
DARSHAN_FORWARD_DECL(__lxstat, int, (int vers, const char* path, struct stat *buf));
DARSHAN_FORWARD_DECL(__lxstat64, int, (int vers, const char* path, struct stat64 *buf));
DARSHAN_FORWARD_DECL(__fxstat, int, (int vers, int fd, struct stat *buf));
DARSHAN_FORWARD_DECL(__fxstat64, int, (int vers, int fd, struct stat64 *buf));
Shane Snyder's avatar
Shane Snyder committed
65 66
DARSHAN_FORWARD_DECL(mmap, void*, (void *addr, size_t length, int prot, int flags, int fd, off_t offset));
DARSHAN_FORWARD_DECL(mmap64, void*, (void *addr, size_t length, int prot, int flags, int fd, off64_t offset));
67 68
DARSHAN_FORWARD_DECL(fsync, int, (int fd));
DARSHAN_FORWARD_DECL(fdatasync, int, (int fd));
69
DARSHAN_FORWARD_DECL(close, int, (int fd));
70 71 72 73 74 75 76 77
DARSHAN_FORWARD_DECL(aio_read, int, (struct aiocb *aiocbp));
DARSHAN_FORWARD_DECL(aio_write, int, (struct aiocb *aiocbp));
DARSHAN_FORWARD_DECL(aio_read64, int, (struct aiocb64 *aiocbp));
DARSHAN_FORWARD_DECL(aio_write64, int, (struct aiocb64 *aiocbp));
DARSHAN_FORWARD_DECL(aio_return, ssize_t, (struct aiocb *aiocbp));
DARSHAN_FORWARD_DECL(aio_return64, ssize_t, (struct aiocb64 *aiocbp));
DARSHAN_FORWARD_DECL(lio_listio, int, (int mode, struct aiocb *const aiocb_list[], int nitems, struct sigevent *sevp));
DARSHAN_FORWARD_DECL(lio_listio64, int, (int mode, struct aiocb64 *const aiocb_list[], int nitems, struct sigevent *sevp));
78

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
/* The posix_file_record_ref structure maintains necessary runtime metadata
 * for the POSIX file record (darshan_posix_file structure, defined in
 * darshan-posix-log-format.h) pointed to by 'file_rec'. This metadata
 * assists with the instrumenting of specific statistics in the file record.
 *
 * RATIONALE: the POSIX module needs to track some stateful, volatile 
 * information about each open file (like the current file offset, most recent 
 * access time, etc.) to aid in instrumentation, but this information can't be
 * stored in the darshan_posix_file struct because we don't want it to appear in
 * the final darshan log file.  We therefore associate a posix_file_record_ref
 * struct with each darshan_posix_file struct in order to track this information
 * (i.e., the mapping between posix_file_record_ref structs to darshan_posix_file
 * structs is one-to-one).
 *
 * NOTE: we use the 'darshan_record_ref' interface (in darshan-common) to
 * associate different types of handles with this posix_file_record_ref struct.
 * This allows us to index this struct (and the underlying file record) by using
 * either the corresponding Darshan record identifier (derived from the filename)
97 98
 * or by a generated file descriptor, for instance. Note that, while there should
 * only be a single Darshan record identifier that indexes a posix_file_record_ref,
99 100
 * there could be multiple open file descriptors that index it.
 */
101
struct posix_file_record_ref
102
{
103
    struct darshan_posix_file *file_rec;
104 105 106
    int64_t offset;
    int64_t last_byte_read;
    int64_t last_byte_written;
Shane Snyder's avatar
Shane Snyder committed
107
    enum darshan_io_type last_io_type;
108 109 110
    double last_meta_end;
    double last_read_end;
    double last_write_end;
111
    void *access_root;
112
    int access_count;
113
    void *stride_root;
114
    int stride_count;
115
    struct posix_aio_tracker* aio_list;
116
    int fs_type; /* same as darshan_fs_info->fs_type */
117 118
};

119 120 121 122
/* The posix_runtime structure maintains necessary state for storing
 * POSIX file records and for coordinating with darshan-core at 
 * shutdown time.
 */
123 124
struct posix_runtime
{
125 126 127
    void *rec_id_hash;
    void *fd_hash;
    int file_rec_count;
128 129
};

130 131 132 133 134
/* struct to track information about aio operations in flight */
struct posix_aio_tracker
{
    double tm1;
    void *aiocbp;
135
    struct posix_aio_tracker *next;
136
};
137

138 139 140
static void posix_runtime_initialize(
    void);
static struct posix_file_record_ref *posix_track_new_file_record(
141
    darshan_record_id rec_id, const char *path);
142 143 144 145
static void posix_aio_tracker_add(
    int fd, void *aiocbp);
static struct posix_aio_tracker* posix_aio_tracker_del(
    int fd, void *aiocbp);
146 147
static void posix_finalize_file_records(
    void *rec_ref_p);
148 149 150 151 152
static void posix_record_reduction_op(
    void* infile_v, void* inoutfile_v, int *len, MPI_Datatype *datatype);
static void posix_shared_record_variance(
    MPI_Comm mod_comm, struct darshan_posix_file *inrec_array,
    struct darshan_posix_file *outrec_array, int shared_rec_count);
153
static void posix_cleanup_runtime(
154
    void);
155 156

static void posix_shutdown(
157
    MPI_Comm mod_comm, darshan_record_id *shared_recs,
158
    int shared_rec_count, void **posix_buf, int *posix_buf_sz);
159

160 161 162 163 164
/* extern DXT function defs */
extern void dxt_posix_write(darshan_record_id rec_id, int64_t offset,
    int64_t length, double start_time, double end_time);
extern void dxt_posix_read(darshan_record_id rec_id, int64_t offset,
    int64_t length, double start_time, double end_time);
165

166 167 168 169 170
static struct posix_runtime *posix_runtime = NULL;
static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static int instrumentation_disabled = 0;
static int my_rank = -1;
static int darshan_mem_alignment = 1;
171
static int enable_dxt_io_trace = 0;
172

173 174 175
#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)

176 177
#define POSIX_PRE_RECORD() do { \
    POSIX_LOCK(); \
178
    if(!instrumentation_disabled) { \
179 180 181
        if(!posix_runtime) { \
            posix_runtime_initialize(); \
        } \
182
        if(posix_runtime) break; \
183
    } \
184 185
    POSIX_UNLOCK(); \
    return(ret); \
186 187 188 189 190
} while(0)

#define POSIX_POST_RECORD() do { \
    POSIX_UNLOCK(); \
} while(0)
191

192
#define POSIX_RECORD_OPEN(__ret, __path, __mode, __tm1, __tm2) do { \
193
    darshan_record_id rec_id; \
194 195
    struct posix_file_record_ref *rec_ref; \
    char *newpath; \
196
    if(__ret < 0) break; \
197 198 199 200 201 202 203
    newpath = darshan_clean_file_path(__path); \
    if(!newpath) newpath = (char *)__path; \
    if(darshan_core_excluded_path(newpath)) { \
        if(newpath != __path) free(newpath); \
        break; \
    } \
    rec_id = darshan_core_gen_record_id(newpath); \
204
    rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
205
    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \
206
    if(!rec_ref) { \
207 208
        if(newpath != __path) free(newpath); \
        break; \
209
    } \
210
    if(__mode) \
211 212 213 214
        rec_ref->file_rec->counters[POSIX_MODE] = __mode; \
    rec_ref->offset = 0; \
    rec_ref->last_byte_written = 0; \
    rec_ref->last_byte_read = 0; \
215
    rec_ref->file_rec->counters[POSIX_OPENS] += 1; \
216 217 218 219
    if(rec_ref->file_rec->fcounters[POSIX_F_OPEN_START_TIMESTAMP] == 0 || \
     rec_ref->file_rec->fcounters[POSIX_F_OPEN_START_TIMESTAMP] > __tm1) \
        rec_ref->file_rec->fcounters[POSIX_F_OPEN_START_TIMESTAMP] = __tm1; \
    rec_ref->file_rec->fcounters[POSIX_F_OPEN_END_TIMESTAMP] = __tm2; \
220 221
    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_META_TIME], \
        __tm1, __tm2, rec_ref->last_meta_end); \
222
    darshan_add_record_ref(&(posix_runtime->fd_hash), &__ret, sizeof(int), rec_ref); \
223
    darshan_instrument_fs_data(rec_ref->fs_type, newpath, __ret); \
224
    if(newpath != __path) free(newpath); \
225 226
} while(0)

227
#define POSIX_RECORD_READ(__ret, __fd, __pread_flag, __pread_offset, __aligned, __tm1, __tm2) do { \
228
    struct posix_file_record_ref* rec_ref; \
229
    size_t stride; \
230
    int64_t this_offset; \
231
    int64_t file_alignment; \
232 233
    double __elapsed = __tm2-__tm1; \
    if(__ret < 0) break; \
234 235
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &(__fd), sizeof(int)); \
    if(!rec_ref) break; \
236
    if(__pread_flag) \
237 238
        this_offset = __pread_offset; \
    else \
239
        this_offset = rec_ref->offset; \
240
    /* DXT to record detailed read tracing information */ \
241 242
    if(enable_dxt_io_trace) { \
        dxt_posix_read(rec_ref->file_rec->base_rec.id, this_offset, __ret, __tm1, __tm2); \
243
    } \
244 245 246 247 248 249 250
    if(this_offset > rec_ref->last_byte_read) \
        rec_ref->file_rec->counters[POSIX_SEQ_READS] += 1;  \
    if(this_offset == (rec_ref->last_byte_read + 1)) \
        rec_ref->file_rec->counters[POSIX_CONSEC_READS] += 1;  \
    if(this_offset > 0 && this_offset > rec_ref->last_byte_read \
        && rec_ref->last_byte_read != 0) \
        stride = this_offset - rec_ref->last_byte_read - 1; \
251
    else \
252
        stride = 0; \
253 254 255 256 257
    rec_ref->last_byte_read = this_offset + __ret - 1; \
    rec_ref->offset = this_offset + __ret; \
    if(rec_ref->file_rec->counters[POSIX_MAX_BYTE_READ] < (this_offset + __ret - 1)) \
        rec_ref->file_rec->counters[POSIX_MAX_BYTE_READ] = (this_offset + __ret - 1); \
    rec_ref->file_rec->counters[POSIX_BYTES_READ] += __ret; \
258
    rec_ref->file_rec->counters[POSIX_READS] += 1; \
259 260 261 262 263 264 265
    DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[POSIX_SIZE_READ_0_100]), __ret); \
    darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, __ret, \
        &(rec_ref->file_rec->counters[POSIX_ACCESS1_ACCESS]), \
        &(rec_ref->file_rec->counters[POSIX_ACCESS1_COUNT])); \
    darshan_common_val_counter(&rec_ref->stride_root, &rec_ref->stride_count, stride, \
        &(rec_ref->file_rec->counters[POSIX_STRIDE1_STRIDE]), \
        &(rec_ref->file_rec->counters[POSIX_STRIDE1_COUNT])); \
266
    if(!__aligned) \
267 268
        rec_ref->file_rec->counters[POSIX_MEM_NOT_ALIGNED] += 1; \
    file_alignment = rec_ref->file_rec->counters[POSIX_FILE_ALIGNMENT]; \
269
    if(file_alignment > 0 && (this_offset % file_alignment) != 0) \
270 271 272 273
        rec_ref->file_rec->counters[POSIX_FILE_NOT_ALIGNED] += 1; \
    if(rec_ref->last_io_type == DARSHAN_IO_WRITE) \
        rec_ref->file_rec->counters[POSIX_RW_SWITCHES] += 1; \
    rec_ref->last_io_type = DARSHAN_IO_READ; \
274 275
    if(rec_ref->file_rec->fcounters[POSIX_F_READ_START_TIMESTAMP] == 0 || \
     rec_ref->file_rec->fcounters[POSIX_F_READ_START_TIMESTAMP] > __tm1) \
276 277 278 279 280 281 282
        rec_ref->file_rec->fcounters[POSIX_F_READ_START_TIMESTAMP] = __tm1; \
    rec_ref->file_rec->fcounters[POSIX_F_READ_END_TIMESTAMP] = __tm2; \
    if(rec_ref->file_rec->fcounters[POSIX_F_MAX_READ_TIME] < __elapsed) { \
        rec_ref->file_rec->fcounters[POSIX_F_MAX_READ_TIME] = __elapsed; \
        rec_ref->file_rec->counters[POSIX_MAX_READ_TIME_SIZE] = __ret; } \
    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_READ_TIME], \
        __tm1, __tm2, rec_ref->last_read_end); \
283 284
} while(0)

285
#define POSIX_RECORD_WRITE(__ret, __fd, __pwrite_flag, __pwrite_offset, __aligned, __tm1, __tm2) do { \
286
    struct posix_file_record_ref* rec_ref; \
287
    size_t stride; \
288
    int64_t this_offset; \
289
    int64_t file_alignment; \
290 291
    double __elapsed = __tm2-__tm1; \
    if(__ret < 0) break; \
292 293
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &__fd, sizeof(int)); \
    if(!rec_ref) break; \
294
    if(__pwrite_flag) \
295 296
        this_offset = __pwrite_offset; \
    else \
297
        this_offset = rec_ref->offset; \
298
    /* DXT to record detailed write tracing information */ \
299 300
    if(enable_dxt_io_trace) { \
        dxt_posix_write(rec_ref->file_rec->base_rec.id, this_offset, __ret, __tm1, __tm2); \
301
    } \
302 303 304 305 306 307 308
    if(this_offset > rec_ref->last_byte_written) \
        rec_ref->file_rec->counters[POSIX_SEQ_WRITES] += 1; \
    if(this_offset == (rec_ref->last_byte_written + 1)) \
        rec_ref->file_rec->counters[POSIX_CONSEC_WRITES] += 1; \
    if(this_offset > 0 && this_offset > rec_ref->last_byte_written \
        && rec_ref->last_byte_written != 0) \
        stride = this_offset - rec_ref->last_byte_written - 1; \
309
    else \
310
        stride = 0; \
311 312 313 314 315
    rec_ref->last_byte_written = this_offset + __ret - 1; \
    rec_ref->offset = this_offset + __ret; \
    if(rec_ref->file_rec->counters[POSIX_MAX_BYTE_WRITTEN] < (this_offset + __ret - 1)) \
        rec_ref->file_rec->counters[POSIX_MAX_BYTE_WRITTEN] = (this_offset + __ret - 1); \
    rec_ref->file_rec->counters[POSIX_BYTES_WRITTEN] += __ret; \
316
    rec_ref->file_rec->counters[POSIX_WRITES] += 1; \
317 318 319 320 321 322 323
    DARSHAN_BUCKET_INC(&(rec_ref->file_rec->counters[POSIX_SIZE_WRITE_0_100]), __ret); \
    darshan_common_val_counter(&rec_ref->access_root, &rec_ref->access_count, __ret, \
        &(rec_ref->file_rec->counters[POSIX_ACCESS1_ACCESS]), \
        &(rec_ref->file_rec->counters[POSIX_ACCESS1_COUNT])); \
    darshan_common_val_counter(&rec_ref->stride_root, &rec_ref->stride_count, stride, \
        &(rec_ref->file_rec->counters[POSIX_STRIDE1_STRIDE]), \
        &(rec_ref->file_rec->counters[POSIX_STRIDE1_COUNT])); \
324
    if(!__aligned) \
325 326
        rec_ref->file_rec->counters[POSIX_MEM_NOT_ALIGNED] += 1; \
    file_alignment = rec_ref->file_rec->counters[POSIX_FILE_ALIGNMENT]; \
327
    if(file_alignment > 0 && (this_offset % file_alignment) != 0) \
328 329 330 331
        rec_ref->file_rec->counters[POSIX_FILE_NOT_ALIGNED] += 1; \
    if(rec_ref->last_io_type == DARSHAN_IO_READ) \
        rec_ref->file_rec->counters[POSIX_RW_SWITCHES] += 1; \
    rec_ref->last_io_type = DARSHAN_IO_WRITE; \
332 333
    if(rec_ref->file_rec->fcounters[POSIX_F_WRITE_START_TIMESTAMP] == 0 || \
     rec_ref->file_rec->fcounters[POSIX_F_WRITE_START_TIMESTAMP] > __tm1) \
334 335 336 337 338 339 340
        rec_ref->file_rec->fcounters[POSIX_F_WRITE_START_TIMESTAMP] = __tm1; \
    rec_ref->file_rec->fcounters[POSIX_F_WRITE_END_TIMESTAMP] = __tm2; \
    if(rec_ref->file_rec->fcounters[POSIX_F_MAX_WRITE_TIME] < __elapsed) { \
        rec_ref->file_rec->fcounters[POSIX_F_MAX_WRITE_TIME] = __elapsed; \
        rec_ref->file_rec->counters[POSIX_MAX_WRITE_TIME_SIZE] = __ret; } \
    DARSHAN_TIMER_INC_NO_OVERLAP(rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME], \
        __tm1, __tm2, rec_ref->last_write_end); \
341
} while(0)
342

343
#define POSIX_LOOKUP_RECORD_STAT(__path, __statbuf, __tm1, __tm2) do { \
344
    darshan_record_id rec_id; \
345 346 347 348 349 350
    struct posix_file_record_ref* rec_ref; \
    char *newpath = darshan_clean_file_path(__path); \
    if(!newpath) newpath = (char *)__path; \
    if(darshan_core_excluded_path(newpath)) { \
        if(newpath != __path) free(newpath); \
        break; \
351
    } \
352 353
    rec_id = darshan_core_gen_record_id(newpath); \
    rec_ref = darshan_lookup_record_ref(posix_runtime->rec_id_hash, &rec_id, sizeof(darshan_record_id)); \
354
    if(!rec_ref) rec_ref = posix_track_new_file_record(rec_id, newpath); \
355
    if(newpath != __path) free(newpath); \
356 357
    if(rec_ref) { \
        POSIX_RECORD_STAT(rec_ref, __statbuf, __tm1, __tm2); \
358 359 360
    } \
} while(0)

361 362 363 364
#define POSIX_RECORD_STAT(__rec_ref, __statbuf, __tm1, __tm2) do { \
    (__rec_ref)->file_rec->counters[POSIX_STATS] += 1; \
    DARSHAN_TIMER_INC_NO_OVERLAP((__rec_ref)->file_rec->fcounters[POSIX_F_META_TIME], \
        __tm1, __tm2, (__rec_ref)->last_meta_end); \
365 366
} while(0)

367

368 369 370 371
/**********************************************************
 *      Wrappers for POSIX I/O functions of interest      * 
 **********************************************************/

372
int DARSHAN_DECL(open)(const char *path, int flags, ...)
373 374 375 376 377
{
    int mode = 0;
    int ret;
    double tm1, tm2;

378 379
    MAP_OR_FAIL(open);

380
    if(flags & O_CREAT) 
381 382 383 384 385 386
    {
        va_list arg;
        va_start(arg, flags);
        mode = va_arg(arg, int);
        va_end(arg);

387
        tm1 = darshan_core_wtime();
388
        ret = __real_open(path, flags, mode);
389
        tm2 = darshan_core_wtime();
390 391 392
    }
    else
    {
393
        tm1 = darshan_core_wtime();
394
        ret = __real_open(path, flags);
395
        tm2 = darshan_core_wtime();
396 397
    }

398
    POSIX_PRE_RECORD();
399
    POSIX_RECORD_OPEN(ret, path, mode, tm1, tm2);
400
    POSIX_POST_RECORD();
401 402 403 404

    return(ret);
}

Shane Snyder's avatar
Shane Snyder committed
405 406 407 408 409 410
int DARSHAN_DECL(open64)(const char *path, int flags, ...)
{
    int mode = 0;
    int ret;
    double tm1, tm2;

411
    MAP_OR_FAIL(open64);
Shane Snyder's avatar
Shane Snyder committed
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430

    if(flags & O_CREAT)
    {
        va_list arg;
        va_start(arg, flags);
        mode = va_arg(arg, int);
        va_end(arg);

        tm1 = darshan_core_wtime();
        ret = __real_open64(path, flags, mode);
        tm2 = darshan_core_wtime();
    }
    else
    {
        tm1 = darshan_core_wtime();
        ret = __real_open64(path, flags);
        tm2 = darshan_core_wtime();
    }

431
    POSIX_PRE_RECORD();
432
    POSIX_RECORD_OPEN(ret, path, mode, tm1, tm2);
433
    POSIX_POST_RECORD();
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448

    return(ret);
}

int DARSHAN_DECL(creat)(const char* path, mode_t mode)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(creat);

    tm1 = darshan_core_wtime();
    ret = __real_creat(path, mode);
    tm2 = darshan_core_wtime();

449
    POSIX_PRE_RECORD();
450
    POSIX_RECORD_OPEN(ret, path, mode, tm1, tm2);
451
    POSIX_POST_RECORD();
452 453 454 455 456 457 458 459 460 461

    return(ret);
}

int DARSHAN_DECL(creat64)(const char* path, mode_t mode)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(creat64);
Shane Snyder's avatar
Shane Snyder committed
462

463 464 465 466
    tm1 = darshan_core_wtime();
    ret = __real_creat64(path, mode);
    tm2 = darshan_core_wtime();

467
    POSIX_PRE_RECORD();
468
    POSIX_RECORD_OPEN(ret, path, mode, tm1, tm2);
469
    POSIX_POST_RECORD();
470 471 472 473

    return(ret);
}

474 475 476 477 478 479 480 481 482 483 484
int DARSHAN_DECL(mkstemp)(char* template)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(mkstemp);

    tm1 = darshan_core_wtime();
    ret = __real_mkstemp(template);
    tm2 = darshan_core_wtime();

485
    POSIX_PRE_RECORD();
486
    POSIX_RECORD_OPEN(ret, template, 0, tm1, tm2);
487
    POSIX_POST_RECORD();
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502

    return(ret);
}

int DARSHAN_DECL(mkostemp)(char* template, int flags)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(mkostemp);

    tm1 = darshan_core_wtime();
    ret = __real_mkostemp(template, flags);
    tm2 = darshan_core_wtime();

503
    POSIX_PRE_RECORD();
504
    POSIX_RECORD_OPEN(ret, template, 0, tm1, tm2);
505
    POSIX_POST_RECORD();
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520

    return(ret);
}

int DARSHAN_DECL(mkstemps)(char* template, int suffixlen)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(mkstemps);

    tm1 = darshan_core_wtime();
    ret = __real_mkstemps(template, suffixlen);
    tm2 = darshan_core_wtime();

521
    POSIX_PRE_RECORD();
522
    POSIX_RECORD_OPEN(ret, template, 0, tm1, tm2);
523
    POSIX_POST_RECORD();
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538

    return(ret);
}

int DARSHAN_DECL(mkostemps)(char* template, int suffixlen, int flags)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(mkostemps);

    tm1 = darshan_core_wtime();
    ret = __real_mkostemps(template, suffixlen, flags);
    tm2 = darshan_core_wtime();

539
    POSIX_PRE_RECORD();
540
    POSIX_RECORD_OPEN(ret, template, 0, tm1, tm2);
541
    POSIX_POST_RECORD();
542 543 544 545

    return(ret);
}

546 547 548 549 550 551
ssize_t DARSHAN_DECL(read)(int fd, void *buf, size_t count)
{
    ssize_t ret;
    int aligned_flag = 0;
    double tm1, tm2;

552
    MAP_OR_FAIL(read);
553

554
    if((unsigned long)buf % darshan_mem_alignment == 0) aligned_flag = 1;
555 556 557 558 559

    tm1 = darshan_core_wtime();
    ret = __real_read(fd, buf, count);
    tm2 = darshan_core_wtime();

560
    POSIX_PRE_RECORD();
561
    POSIX_RECORD_READ(ret, fd, 0, 0, aligned_flag, tm1, tm2);
562
    POSIX_POST_RECORD();
563 564 565 566 567 568 569 570 571 572 573 574

    return(ret);
}

ssize_t DARSHAN_DECL(write)(int fd, const void *buf, size_t count)
{
    ssize_t ret;
    int aligned_flag = 0;
    double tm1, tm2;

    MAP_OR_FAIL(write);

575
    if((unsigned long)buf % darshan_mem_alignment == 0) aligned_flag = 1;
576 577 578 579 580

    tm1 = darshan_core_wtime();
    ret = __real_write(fd, buf, count);
    tm2 = darshan_core_wtime();

581
    POSIX_PRE_RECORD();
582
    POSIX_RECORD_WRITE(ret, fd, 0, 0, aligned_flag, tm1, tm2);
583
    POSIX_POST_RECORD();
584 585 586 587 588 589 590 591 592 593 594 595

    return(ret);
}

ssize_t DARSHAN_DECL(pread)(int fd, void *buf, size_t count, off_t offset)
{
    ssize_t ret;
    int aligned_flag = 0;
    double tm1, tm2;

    MAP_OR_FAIL(pread);

596
    if((unsigned long)buf % darshan_mem_alignment == 0) aligned_flag = 1;
597 598 599 600 601

    tm1 = darshan_core_wtime();
    ret = __real_pread(fd, buf, count, offset);
    tm2 = darshan_core_wtime();

602
    POSIX_PRE_RECORD();
603
    POSIX_RECORD_READ(ret, fd, 1, offset, aligned_flag, tm1, tm2);
604
    POSIX_POST_RECORD();
605 606 607 608 609 610 611 612 613 614 615 616

    return(ret);
}

ssize_t DARSHAN_DECL(pwrite)(int fd, const void *buf, size_t count, off_t offset)
{
    ssize_t ret;
    int aligned_flag = 0;
    double tm1, tm2;

    MAP_OR_FAIL(pwrite);

617
    if((unsigned long)buf % darshan_mem_alignment == 0) aligned_flag = 1;
618 619 620 621 622

    tm1 = darshan_core_wtime();
    ret = __real_pwrite(fd, buf, count, offset);
    tm2 = darshan_core_wtime();

623
    POSIX_PRE_RECORD();
624
    POSIX_RECORD_WRITE(ret, fd, 1, offset, aligned_flag, tm1, tm2);
625
    POSIX_POST_RECORD();
626 627 628 629 630 631 632 633 634 635 636 637

    return(ret);
}

ssize_t DARSHAN_DECL(pread64)(int fd, void *buf, size_t count, off64_t offset)
{
    ssize_t ret;
    int aligned_flag = 0;
    double tm1, tm2;

    MAP_OR_FAIL(pread64);

638
    if((unsigned long)buf % darshan_mem_alignment == 0) aligned_flag = 1;
639 640 641 642 643

    tm1 = darshan_core_wtime();
    ret = __real_pread64(fd, buf, count, offset);
    tm2 = darshan_core_wtime();

644
    POSIX_PRE_RECORD();
645
    POSIX_RECORD_READ(ret, fd, 1, offset, aligned_flag, tm1, tm2);
646
    POSIX_POST_RECORD();
647 648 649 650 651 652 653 654 655 656 657 658

    return(ret);
}

ssize_t DARSHAN_DECL(pwrite64)(int fd, const void *buf, size_t count, off64_t offset)
{
    ssize_t ret;
    int aligned_flag = 0;
    double tm1, tm2;

    MAP_OR_FAIL(pwrite64);

659
    if((unsigned long)buf % darshan_mem_alignment == 0) aligned_flag = 1;
660 661 662 663 664

    tm1 = darshan_core_wtime();
    ret = __real_pwrite64(fd, buf, count, offset);
    tm2 = darshan_core_wtime();

665
    POSIX_PRE_RECORD();
666
    POSIX_RECORD_WRITE(ret, fd, 1, offset, aligned_flag, tm1, tm2);
667
    POSIX_POST_RECORD();
668 669 670 671 672 673 674

    return(ret);
}

ssize_t DARSHAN_DECL(readv)(int fd, const struct iovec *iov, int iovcnt)
{
    ssize_t ret;
675
    int aligned_flag = 1;
676 677 678 679
    int i;
    double tm1, tm2;

    MAP_OR_FAIL(readv);
680

681 682 683 684
    for(i=0; i<iovcnt; i++)
    {
        if(((unsigned long)iov[i].iov_base % darshan_mem_alignment) != 0)
            aligned_flag = 0;
685
    }
686 687 688 689 690

    tm1 = darshan_core_wtime();
    ret = __real_readv(fd, iov, iovcnt);
    tm2 = darshan_core_wtime();

691
    POSIX_PRE_RECORD();
692
    POSIX_RECORD_READ(ret, fd, 0, 0, aligned_flag, tm1, tm2);
693
    POSIX_POST_RECORD();
694 695 696 697 698 699 700

    return(ret);
}

ssize_t DARSHAN_DECL(writev)(int fd, const struct iovec *iov, int iovcnt)
{
    ssize_t ret;
701
    int aligned_flag = 1;
702 703 704 705
    int i;
    double tm1, tm2;

    MAP_OR_FAIL(writev);
706

707 708 709 710
    for(i=0; i<iovcnt; i++)
    {
        if(((unsigned long)iov[i].iov_base % darshan_mem_alignment) != 0)
            aligned_flag = 0;
711
    }
712 713 714 715 716

    tm1 = darshan_core_wtime();
    ret = __real_writev(fd, iov, iovcnt);
    tm2 = darshan_core_wtime();

717
    POSIX_PRE_RECORD();
718
    POSIX_RECORD_WRITE(ret, fd, 0, 0, aligned_flag, tm1, tm2);
719
    POSIX_POST_RECORD();
720 721 722 723 724 725 726

    return(ret);
}

off_t DARSHAN_DECL(lseek)(int fd, off_t offset, int whence)
{
    off_t ret;
727
    struct posix_file_record_ref *rec_ref;
728 729 730 731 732 733 734 735 736 737
    double tm1, tm2;

    MAP_OR_FAIL(lseek);

    tm1 = darshan_core_wtime();
    ret = __real_lseek(fd, offset, whence);
    tm2 = darshan_core_wtime();

    if(ret >= 0)
    {
738
        POSIX_PRE_RECORD();
739 740
        rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
        if(rec_ref)
741
        {
742
            rec_ref->offset = ret;
743
            DARSHAN_TIMER_INC_NO_OVERLAP(
744 745 746
                rec_ref->file_rec->fcounters[POSIX_F_META_TIME],
                tm1, tm2, rec_ref->last_meta_end);
            rec_ref->file_rec->counters[POSIX_SEEKS] += 1;
747
        }
748
        POSIX_POST_RECORD();
749 750 751 752 753 754 755 756
    }

    return(ret);
}

off_t DARSHAN_DECL(lseek64)(int fd, off_t offset, int whence)
{
    off_t ret;
757
    struct posix_file_record_ref *rec_ref;
758 759 760 761 762 763 764 765 766 767
    double tm1, tm2;

    MAP_OR_FAIL(lseek64);

    tm1 = darshan_core_wtime();
    ret = __real_lseek64(fd, offset, whence);
    tm2 = darshan_core_wtime();

    if(ret >= 0)
    {
768
        POSIX_PRE_RECORD();
769 770
        rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
        if(rec_ref)
771
        {
772
            rec_ref->offset = ret;
773
            DARSHAN_TIMER_INC_NO_OVERLAP(
774 775 776
                rec_ref->file_rec->fcounters[POSIX_F_META_TIME],
                tm1, tm2, rec_ref->last_meta_end);
            rec_ref->file_rec->counters[POSIX_SEEKS] += 1;
777
        }
778
        POSIX_POST_RECORD();
779 780 781 782 783
    }

    return(ret);
}

784 785 786 787 788 789 790 791 792 793 794 795 796 797
int DARSHAN_DECL(__xstat)(int vers, const char *path, struct stat *buf)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(__xstat);

    tm1 = darshan_core_wtime();
    ret = __real___xstat(vers, path, buf);
    tm2 = darshan_core_wtime();

    if(ret < 0 || !S_ISREG(buf->st_mode))
        return(ret);

798
    POSIX_PRE_RECORD();
799
    POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2);
800
    POSIX_POST_RECORD();
801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818

    return(ret);
}

int DARSHAN_DECL(__xstat64)(int vers, const char *path, struct stat64 *buf)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(__xstat64);

    tm1 = darshan_core_wtime();
    ret = __real___xstat64(vers, path, buf);
    tm2 = darshan_core_wtime();

    if(ret < 0 || !S_ISREG(buf->st_mode))
        return(ret);

819
    POSIX_PRE_RECORD();
820
    POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2);
821
    POSIX_POST_RECORD();
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839

    return(ret);
}

int DARSHAN_DECL(__lxstat)(int vers, const char *path, struct stat *buf)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(__lxstat);

    tm1 = darshan_core_wtime();
    ret = __real___lxstat(vers, path, buf);
    tm2 = darshan_core_wtime();

    if(ret < 0 || !S_ISREG(buf->st_mode))
        return(ret);

840
    POSIX_PRE_RECORD();
841
    POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2);
842
    POSIX_POST_RECORD();
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860

    return(ret);
}

int DARSHAN_DECL(__lxstat64)(int vers, const char *path, struct stat64 *buf)
{
    int ret;
    double tm1, tm2;

    MAP_OR_FAIL(__lxstat64);

    tm1 = darshan_core_wtime();
    ret = __real___lxstat64(vers, path, buf);
    tm2 = darshan_core_wtime();

    if(ret < 0 || !S_ISREG(buf->st_mode))
        return(ret);

861
    POSIX_PRE_RECORD();
862
    POSIX_LOOKUP_RECORD_STAT(path, buf, tm1, tm2);
863
    POSIX_POST_RECORD();
864 865 866 867 868 869 870

    return(ret);
}

int DARSHAN_DECL(__fxstat)(int vers, int fd, struct stat *buf)
{
    int ret;
871
    struct posix_file_record_ref *rec_ref;
872 873 874 875 876 877 878 879 880 881 882
    double tm1, tm2;

    MAP_OR_FAIL(__fxstat);

    tm1 = darshan_core_wtime();
    ret = __real___fxstat(vers, fd, buf);
    tm2 = darshan_core_wtime();

    if(ret < 0 || !S_ISREG(buf->st_mode))
        return(ret);

883
    POSIX_PRE_RECORD();
884 885
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
886
    {
887
        POSIX_RECORD_STAT(rec_ref, buf, tm1, tm2);
888
    }
889
    POSIX_POST_RECORD();
890 891 892 893 894 895 896

    return(ret);
}

int DARSHAN_DECL(__fxstat64)(int vers, int fd, struct stat64 *buf)
{
    int ret;
897
    struct posix_file_record_ref *rec_ref;
898 899 900 901 902 903 904 905 906 907 908
    double tm1, tm2;

    MAP_OR_FAIL(__fxstat64);

    tm1 = darshan_core_wtime();
    ret = __real___fxstat64(vers, fd, buf);
    tm2 = darshan_core_wtime();

    if(ret < 0 || !S_ISREG(buf->st_mode))
        return(ret);

909
    POSIX_PRE_RECORD();
910 911
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
912
    {
913
        POSIX_RECORD_STAT(rec_ref, buf, tm1, tm2);
914
    }
915
    POSIX_POST_RECORD();
916 917 918 919

    return(ret);
}

Shane Snyder's avatar
Shane Snyder committed
920 921 922 923
void* DARSHAN_DECL(mmap)(void *addr, size_t length, int prot, int flags,
    int fd, off_t offset)
{
    void* ret;
924
    struct posix_file_record_ref *rec_ref;
Shane Snyder's avatar
Shane Snyder committed
925 926 927

    MAP_OR_FAIL(mmap);

928 929 930 931 932 933 934 935
    if(fd < 0 || (flags & MAP_ANONYMOUS))
    {
        /* mmap is not associated with a backing file; skip all Darshan
         * characterization attempts.
         */
        return(__real_mmap(addr, length, prot, flags, fd, offset));
    }

Shane Snyder's avatar
Shane Snyder committed
936 937 938 939
    ret = __real_mmap(addr, length, prot, flags, fd, offset);
    if(ret == MAP_FAILED)
        return(ret);

940
    POSIX_PRE_RECORD();
941 942
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
Shane Snyder's avatar
Shane Snyder committed
943
    {
944
        rec_ref->file_rec->counters[POSIX_MMAPS] += 1;
Shane Snyder's avatar
Shane Snyder committed
945
    }
946
    POSIX_POST_RECORD();
Shane Snyder's avatar
Shane Snyder committed
947 948 949 950 951 952 953 954

    return(ret);
}

void* DARSHAN_DECL(mmap64)(void *addr, size_t length, int prot, int flags,
    int fd, off64_t offset)
{
    void* ret;
955
    struct posix_file_record_ref *rec_ref;
Shane Snyder's avatar
Shane Snyder committed
956 957 958

    MAP_OR_FAIL(mmap64);

959 960 961 962 963 964 965 966
    if(fd < 0 || (flags & MAP_ANONYMOUS))
    {
        /* mmap is not associated with a backing file; skip all Darshan
         * characterization attempts.
         */
        return(__real_mmap64(addr, length, prot, flags, fd, offset));
    }

Shane Snyder's avatar
Shane Snyder committed
967 968 969 970
    ret = __real_mmap64(addr, length, prot, flags, fd, offset);
    if(ret == MAP_FAILED)
        return(ret);

971
    POSIX_PRE_RECORD();
972 973
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
Shane Snyder's avatar
Shane Snyder committed
974
    {
975
        rec_ref->file_rec->counters[POSIX_MMAPS] += 1;
Shane Snyder's avatar
Shane Snyder committed
976
    }
977
    POSIX_POST_RECORD();
Shane Snyder's avatar
Shane Snyder committed
978 979 980 981

    return(ret);
}

982 983 984
int DARSHAN_DECL(fsync)(int fd)
{
    int ret;
985
    struct posix_file_record_ref *rec_ref;
986 987 988 989 990 991 992 993 994 995 996
    double tm1, tm2;

    MAP_OR_FAIL(fsync);

    tm1 = darshan_core_wtime();
    ret = __real_fsync(fd);
    tm2 = darshan_core_wtime();

    if(ret < 0)
        return(ret);

997
    POSIX_PRE_RECORD();
998 999
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
1000
    {
1001
        DARSHAN_TIMER_INC_NO_OVERLAP(
1002 1003 1004
            rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME],
            tm1, tm2, rec_ref->last_write_end);
        rec_ref->file_rec->counters[POSIX_FSYNCS] += 1;
1005
    }
1006
    POSIX_POST_RECORD();
1007 1008 1009 1010 1011 1012 1013

    return(ret);
}

int DARSHAN_DECL(fdatasync)(int fd)
{
    int ret;
1014
    struct posix_file_record_ref *rec_ref;
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
    double tm1, tm2;

    MAP_OR_FAIL(fdatasync);

    tm1 = darshan_core_wtime();
    ret = __real_fdatasync(fd);
    tm2 = darshan_core_wtime();

    if(ret < 0)
        return(ret);

1026
    POSIX_PRE_RECORD();
1027 1028
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
1029
    {
1030
        DARSHAN_TIMER_INC_NO_OVERLAP(
1031 1032 1033
            rec_ref->file_rec->fcounters[POSIX_F_WRITE_TIME],
            tm1, tm2, rec_ref->last_write_end);
        rec_ref->file_rec->counters[POSIX_FDSYNCS] += 1;
1034
    }
1035
    POSIX_POST_RECORD();
1036 1037 1038 1039

    return(ret);
}

1040 1041 1042
int DARSHAN_DECL(close)(int fd)
{
    int ret;
1043 1044
    struct posix_file_record_ref *rec_ref;
    double tm1, tm2;
1045 1046 1047 1048 1049 1050 1051

    MAP_OR_FAIL(close);

    tm1 = darshan_core_wtime();
    ret = __real_close(fd);
    tm2 = darshan_core_wtime();

1052
    POSIX_PRE_RECORD();
1053 1054
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
Shane Snyder's avatar
Shane Snyder committed
1055
    {
1056 1057
        rec_ref->last_byte_written = 0;
        rec_ref->last_byte_read = 0;
1058 1059 1060 1061
        if(rec_ref->file_rec->fcounters[POSIX_F_CLOSE_START_TIMESTAMP] == 0 ||
         rec_ref->file_rec->fcounters[POSIX_F_CLOSE_START_TIMESTAMP] > tm1)
           rec_ref->file_rec->fcounters[POSIX_F_CLOSE_START_TIMESTAMP] = tm1;
        rec_ref->file_rec->fcounters[POSIX_F_CLOSE_END_TIMESTAMP] = tm2;
1062
        DARSHAN_TIMER_INC_NO_OVERLAP(
1063 1064 1065
            rec_ref->file_rec->fcounters[POSIX_F_META_TIME],
            tm1, tm2, rec_ref->last_meta_end);
        darshan_delete_record_ref(&(posix_runtime->fd_hash), &fd, sizeof(int));
Shane Snyder's avatar
Shane Snyder committed
1066
    }
1067
    POSIX_POST_RECORD();
1068 1069 1070 1071

    return(ret);
}

1072 1073 1074 1075 1076 1077 1078 1079 1080
int DARSHAN_DECL(aio_read)(struct aiocb *aiocbp)
{
    int ret;

    MAP_OR_FAIL(aio_read);

    ret = __real_aio_read(aiocbp);
    if(ret == 0)
    {
1081
        POSIX_PRE_RECORD();
1082
        posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp);
1083
        POSIX_POST_RECORD();
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097
    }

    return(ret);
}

int DARSHAN_DECL(aio_write)(struct aiocb *aiocbp)
{
    int ret;

    MAP_OR_FAIL(aio_write);

    ret = __real_aio_write(aiocbp);
    if(ret == 0)
    {
1098
        POSIX_PRE_RECORD();
1099
        posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp);
1100
        POSIX_POST_RECORD();
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
    }

    return(ret);
}

int DARSHAN_DECL(aio_read64)(struct aiocb64 *aiocbp)
{
    int ret;

    MAP_OR_FAIL(aio_read64);

    ret = __real_aio_read64(aiocbp);
    if(ret == 0)
    {
1115
        POSIX_PRE_RECORD();
1116
        posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp);
1117
        POSIX_POST_RECORD();
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
    }

    return(ret);
}

int DARSHAN_DECL(aio_write64)(struct aiocb64 *aiocbp)
{
    int ret;

    MAP_OR_FAIL(aio_write64);

    ret = __real_aio_write64(aiocbp);
    if(ret == 0)
    {
1132
        POSIX_PRE_RECORD();
1133
        posix_aio_tracker_add(aiocbp->aio_fildes, aiocbp);
1134
        POSIX_POST_RECORD();
1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
    }

    return(ret);
}

ssize_t DARSHAN_DECL(aio_return)(struct aiocb *aiocbp)
{
    int ret;
    double tm2;
    struct posix_aio_tracker *tmp;
    int aligned_flag = 0;

    MAP_OR_FAIL(aio_return);

    ret = __real_aio_return(aiocbp);
    tm2 = darshan_core_wtime();

1152
    POSIX_PRE_RECORD();
1153
    tmp = posix_aio_tracker_del(aiocbp->aio_fildes, aiocbp);
1154
    if(tmp)
1155 1156 1157 1158 1159 1160
    {
        if((unsigned long)aiocbp->aio_buf % darshan_mem_alignment == 0)
            aligned_flag = 1;
        if(aiocbp->aio_lio_opcode == LIO_WRITE)
        {
            POSIX_RECORD_WRITE(ret, aiocbp->aio_fildes,
1161
                1, aiocbp->aio_offset, aligned_flag,
1162 1163 1164 1165 1166
                tmp->tm1, tm2);
        }
        else if(aiocbp->aio_lio_opcode == LIO_READ)
        {
            POSIX_RECORD_READ(ret, aiocbp->aio_fildes,
1167
                1, aiocbp->aio_offset, aligned_flag,
1168 1169 1170 1171
                tmp->tm1, tm2);
        }
        free(tmp);
    }
1172
    POSIX_POST_RECORD();
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188

    return(ret);
}

ssize_t DARSHAN_DECL(aio_return64)(struct aiocb64 *aiocbp)
{
    int ret;
    double tm2;
    struct posix_aio_tracker *tmp;
    int aligned_flag = 0;

    MAP_OR_FAIL(aio_return64);

    ret = __real_aio_return64(aiocbp);
    tm2 = darshan_core_wtime();

1189
    POSIX_PRE_RECORD();
1190
    tmp = posix_aio_tracker_del(aiocbp->aio_fildes, aiocbp);
1191
    if(tmp)
1192 1193 1194 1195 1196 1197
    {
        if((unsigned long)aiocbp->aio_buf % darshan_mem_alignment == 0)
            aligned_flag = 1;
        if(aiocbp->aio_lio_opcode == LIO_WRITE)
        {
            POSIX_RECORD_WRITE(ret, aiocbp->aio_fildes,
1198
                1, aiocbp->aio_offset, aligned_flag,
1199 1200 1201 1202 1203
                tmp->tm1, tm2);
        }
        else if(aiocbp->aio_lio_opcode == LIO_READ)
        {
            POSIX_RECORD_READ(ret, aiocbp->aio_fildes,
1204
                1, aiocbp->aio_offset, aligned_flag,
1205 1206 1207 1208
                tmp->tm1, tm2);
        }
        free(tmp);
    }
1209
    POSIX_POST_RECORD();
1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224

    return(ret);
}

int DARSHAN_DECL(lio_listio)(int mode, struct aiocb *const aiocb_list[],
    int nitems, struct sigevent *sevp)
{
    int ret;
    int i;

    MAP_OR_FAIL(lio_listio);

    ret = __real_lio_listio(mode, aiocb_list, nitems, sevp);
    if(ret == 0)
    {
1225
        POSIX_PRE_RECORD();
1226 1227 1228 1229
        for(i = 0; i < nitems; i++)
        {
            posix_aio_tracker_add(aiocb_list[i]->aio_fildes, aiocb_list[i]);
        }
1230
        POSIX_POST_RECORD();
1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
    }

    return(ret);
}

int DARSHAN_DECL(lio_listio64)(int mode, struct aiocb64 *const aiocb_list[],
    int nitems, struct sigevent *sevp)
{
    int ret;
    int i;

    MAP_OR_FAIL(lio_listio64);

    ret = __real_lio_listio64(mode, aiocb_list, nitems, sevp);
    if(ret == 0)
    {
1247
        POSIX_PRE_RECORD();
1248 1249 1250 1251
        for(i = 0; i < nitems; i++)
        {
            posix_aio_tracker_add(aiocb_list[i]->aio_fildes, aiocb_list[i]);
        }
1252
        POSIX_POST_RECORD();
1253 1254 1255 1256 1257
    }

    return(ret);
}

1258 1259 1260
/**********************************************************
 * Internal functions for manipulating POSIX module state *
 **********************************************************/
1261

1262
/* initialize internal POSIX module data structures and register with darshan-core */
1263
static void posix_runtime_initialize()
1264
{
1265
    int psx_buf_size;
1266

1267
    /* try and store a default number of records for this module */
1268 1269
    psx_buf_size = DARSHAN_DEF_MOD_REC_COUNT * sizeof(struct darshan_posix_file);

1270
    /* register the POSIX module with darshan core */
1271
    darshan_core_register_module(
1272
        DARSHAN_POSIX_MOD,
1273
        &posix_shutdown,
1274
        &psx_buf_size,
1275
        &my_rank,
1276
        &darshan_mem_alignment);
1277

1278 1279
    /* return if darshan-core does not provide enough module memory */
    if(psx_buf_size < sizeof(struct darshan_posix_file))
1280 1281
    {
        darshan_core_unregister_module(DARSHAN_POSIX_MOD);
1282
        return;
1283
    }
1284 1285 1286

    posix_runtime = malloc(sizeof(*posix_runtime));
    if(!posix_runtime)
1287 1288
    {
        darshan_core_unregister_module(DARSHAN_POSIX_MOD);
1289
        return;
1290
    }
1291 1292
    memset(posix_runtime, 0, sizeof(*posix_runtime));

1293 1294 1295 1296 1297
    /* check if DXT (Darshan extended tracing) should be enabled */
    if (getenv("ENABLE_DXT_IO_TRACE")) {
        enable_dxt_io_trace = 1;
    }

1298 1299
    return;
}
1300

1301
static struct posix_file_record_ref *posix_track_new_file_record(
1302
    darshan_record_id rec_id, const char *path)
1303
{
1304
    struct darshan_posix_file *file_rec = NULL;
1305
    struct posix_file_record_ref *rec_ref = NULL;
1306
    struct darshan_fs_info fs_info;
1307
    int ret;
1308

1309 1310
    rec_ref = malloc(sizeof(*rec_ref));
    if(!rec_ref)
1311
        return(NULL);
1312
    memset(rec_ref, 0, sizeof(*rec_ref));
1313

1314 1315 1316 1317
    /* add a reference to this file record based on record id */
    ret = darshan_add_record_ref(&(posix_runtime->rec_id_hash), &rec_id,
        sizeof(darshan_record_id), rec_ref);
    if(ret == 0)
1318
    {
1319
        free(rec_ref);
1320 1321 1322
        return(NULL);
    }

1323 1324 1325
    /* register the actual file record with darshan-core so it is persisted
     * in the log file
     */
1326 1327 1328 1329 1330
    file_rec = darshan_core_register_record(
        rec_id,
        path,
        DARSHAN_POSIX_MOD,
        sizeof(struct darshan_posix_file),
Shane Snyder's avatar
Shane Snyder committed
1331
        &fs_info);
1332

1333
    if(!file_rec)
1334
    {
1335 1336 1337 1338
        darshan_delete_record_ref(&(posix_runtime->rec_id_hash),
            &rec_id, sizeof(darshan_record_id));
        free(rec_ref);
        return(NULL);
1339 1340
    }

1341 1342 1343 1344
    /* registering this file record was successful, so initialize some fields */
    file_rec->base_rec.id = rec_id;
    file_rec->base_rec.rank = my_rank;
    file_rec->counters[POSIX_MEM_ALIGNMENT] = darshan_mem_alignment;
Shane Snyder's avatar
Shane Snyder committed
1345
    file_rec->counters[POSIX_FILE_ALIGNMENT] = fs_info.block_size;
1346
    rec_ref->fs_type = fs_info.fs_type;
1347
    rec_ref->file_rec = file_rec;
1348 1349
    posix_runtime->file_rec_count++;

1350 1351 1352
    return(rec_ref);
}

1353
/* finds the tracker structure for a given aio operation, removes it from
1354
 * the associated linked list for this file record, and returns a pointer.  
1355 1356 1357 1358 1359 1360
 *
 * returns NULL if aio operation not found
 */
static struct posix_aio_tracker* posix_aio_tracker_del(int fd, void *aiocbp)
{
    struct posix_aio_tracker *tracker = NULL, *iter, *tmp;
1361
    struct posix_file_record_ref *rec_ref;
1362

1363 1364
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
1365
    {
1366
        LL_FOREACH_SAFE(rec_ref->aio_list, iter, tmp)
1367
        {
1368
            if(iter->aiocbp == aiocbp)
1369
            {
1370
                LL_DELETE(rec_ref->aio_list, iter);
1371 1372 1373 1374 1375 1376 1377 1378 1379
                tracker = iter;
                break;
            }
        }
    }

    return(tracker);
}

1380 1381 1382 1383
/* adds a tracker for the given aio operation */
static void posix_aio_tracker_add(int fd, void *aiocbp)
{
    struct posix_aio_tracker* tracker;
1384
    struct posix_file_record_ref *rec_ref;
1385

1386 1387
    rec_ref = darshan_lookup_record_ref(posix_runtime->fd_hash, &fd, sizeof(int));
    if(rec_ref)
1388 1389
    {
        tracker = malloc(sizeof(*tracker));
1390
        if(tracker)
1391 1392 1393
        {
            tracker->tm1 = darshan_core_wtime();
            tracker->aiocbp = aiocbp;
1394
            LL_PREPEND(rec_ref->aio_list, tracker);
1395 1396 1397 1398 1399 1400
        }
    }

    return;
}

1401 1402 1403 1404 1405 1406 1407 1408 1409 1410
static void posix_finalize_file_records(void *rec_ref_p)
{
    struct posix_file_record_ref *rec_ref =
        (struct posix_file_record_ref *)rec_ref_p;

    tdestroy(rec_ref->access_root, free);
    tdestroy(rec_ref->stride_root, free);
    return;
}

1411 1412
static void posix_record_reduction_op(void* infile_v, void* inoutfile_v,
    int *len, MPI_Datatype *datatype)
Shane Snyder's avatar
Shane Snyder committed
1413
{
1414 1415 1416
    struct darshan_posix_file tmp_file;
    struct darshan_posix_file *infile = infile_v;
    struct darshan_posix_file *inoutfile = inoutfile_v;
1417
    int i, j, k;
1418

1419
    for(i=0; i<*len; i++)
1420 1421
    {
        memset(&tmp_file, 0, sizeof(struct darshan_posix_file));
1422 1423
        tmp_file.base_rec.id = infile->base_rec.id;
        tmp_file.base_rec.rank = -1;
1424