darshan-posix.c 14.5 KB
Newer Older
1 2 3 4 5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6 7
#define _GNU_SOURCE

8
#include "darshan-runtime-config.h"
9

10 11 12 13 14 15 16 17 18 19 20 21 22 23
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <search.h>
#include <assert.h>
24
#include <libgen.h>
25
#include <aio.h>
26
#include <pthread.h>
27 28

#include "darshan.h"
29
#include "uthash.h"
30

31
#ifndef HAVE_OFF64_T
32 33
typedef int64_t off64_t;
#endif
34 35 36
#ifndef HAVE_AIOCB64
#define aiocb64 aiocb
#endif
37

38 39
/* TODO these go where ? */

40 41 42 43 44
#define DARSHAN_FORWARD_DECL(name,ret,args) \
  extern ret __real_ ## name args;

#define DARSHAN_DECL(__name) __wrap_ ## __name

45 46
#define MAP_OR_FAIL(func)

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
/* TODO: where do these file record structs go? (some needed for darshan-util) */
/* TODO: DARSHAN_* OR CP_* */

#define POSIX_MOD_NAME "POSIX"

enum darshan_posix_indices
{
    CP_POSIX_READS,              /* count of posix reads */
    CP_POSIX_WRITES,             /* count of posix writes */
    CP_POSIX_OPENS,              /* count of posix opens */
    CP_POSIX_SEEKS,              /* count of posix seeks */
    CP_POSIX_STATS,              /* count of posix stat/lstat/fstats */
    CP_POSIX_MMAPS,              /* count of posix mmaps */
    CP_POSIX_FREADS,
    CP_POSIX_FWRITES,
    CP_POSIX_FOPENS,
    CP_POSIX_FSEEKS,
    CP_POSIX_FSYNCS,
    CP_POSIX_FDSYNCS,
    CP_MODE,                      /* mode of file */
    CP_BYTES_READ,                /* total bytes read */
    CP_BYTES_WRITTEN,             /* total bytes written */
    CP_MAX_BYTE_READ,             /* highest offset byte read */
    CP_MAX_BYTE_WRITTEN,          /* highest offset byte written */
    CP_CONSEC_READS,              /* count of consecutive reads */
    CP_CONSEC_WRITES,             /* count of consecutive writes */
    CP_SEQ_READS,                 /* count of sequential reads */
    CP_SEQ_WRITES,                /* count of sequential writes */
    CP_RW_SWITCHES,               /* number of times switched between read and write */
    CP_MEM_NOT_ALIGNED,           /* count of accesses not mem aligned */
    CP_MEM_ALIGNMENT,             /* mem alignment in bytes */
    CP_FILE_NOT_ALIGNED,          /* count of accesses not file aligned */
    CP_FILE_ALIGNMENT,            /* file alignment in bytes */
    CP_MAX_READ_TIME_SIZE,
    CP_MAX_WRITE_TIME_SIZE,
    /* buckets */
    CP_SIZE_READ_0_100,           /* count of posix read size ranges */
    CP_SIZE_READ_100_1K,
    CP_SIZE_READ_1K_10K,
    CP_SIZE_READ_10K_100K,
    CP_SIZE_READ_100K_1M,
    CP_SIZE_READ_1M_4M,
    CP_SIZE_READ_4M_10M,
    CP_SIZE_READ_10M_100M,
    CP_SIZE_READ_100M_1G,
    CP_SIZE_READ_1G_PLUS,
    /* buckets */
    CP_SIZE_WRITE_0_100,          /* count of posix write size ranges */
    CP_SIZE_WRITE_100_1K,
    CP_SIZE_WRITE_1K_10K,
    CP_SIZE_WRITE_10K_100K,
    CP_SIZE_WRITE_100K_1M,
    CP_SIZE_WRITE_1M_4M,
    CP_SIZE_WRITE_4M_10M,
    CP_SIZE_WRITE_10M_100M,
    CP_SIZE_WRITE_100M_1G,
    CP_SIZE_WRITE_1G_PLUS,
    /* counters */
    CP_STRIDE1_STRIDE,             /* the four most frequently appearing strides */
    CP_STRIDE2_STRIDE,
    CP_STRIDE3_STRIDE,
    CP_STRIDE4_STRIDE,
    CP_STRIDE1_COUNT,              /* count of each of the most frequent strides */
    CP_STRIDE2_COUNT,
    CP_STRIDE3_COUNT,
    CP_STRIDE4_COUNT,
    CP_ACCESS1_ACCESS,             /* the four most frequently appearing access sizes */
    CP_ACCESS2_ACCESS,
    CP_ACCESS3_ACCESS,
    CP_ACCESS4_ACCESS,
    CP_ACCESS1_COUNT,              /* count of each of the most frequent access sizes */
    CP_ACCESS2_COUNT,
    CP_ACCESS3_COUNT,
    CP_ACCESS4_COUNT,
    CP_DEVICE,                     /* device id reported by stat */
    CP_SIZE_AT_OPEN,
    CP_FASTEST_RANK,
    CP_FASTEST_RANK_BYTES,
    CP_SLOWEST_RANK,
    CP_SLOWEST_RANK_BYTES,

    CP_NUM_INDICES,
};
130

131
/* floating point statistics */
132
enum darshan_f_posix_indices
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
{
    /* NOTE: adjust cp_normalize_timestamps() function if any TIMESTAMPS are
     * added or modified in this list
     */
    CP_F_OPEN_TIMESTAMP = 0,    /* timestamp of first open */
    CP_F_READ_START_TIMESTAMP,  /* timestamp of first read */
    CP_F_WRITE_START_TIMESTAMP, /* timestamp of first write */
    CP_F_CLOSE_TIMESTAMP,       /* timestamp of last close */
    CP_F_READ_END_TIMESTAMP,    /* timestamp of last read */
    CP_F_WRITE_END_TIMESTAMP,   /* timestamp of last write */
    CP_F_POSIX_READ_TIME,       /* cumulative posix read time */
    CP_F_POSIX_WRITE_TIME,      /* cumulative posix write time */
    CP_F_POSIX_META_TIME,       /* cumulative posix meta time */
    CP_F_MAX_READ_TIME,
    CP_F_MAX_WRITE_TIME,
    /* Total I/O and meta time consumed by fastest and slowest ranks, 
     * reported in either MPI or POSIX time depending on how the file 
     * was accessed.
     */
    CP_F_FASTEST_RANK_TIME,     
    CP_F_SLOWEST_RANK_TIME,
    CP_F_VARIANCE_RANK_TIME,
    CP_F_VARIANCE_RANK_BYTES,

    CP_F_NUM_INDICES,
};

struct darshan_posix_file
{
162
    darshan_file_id f_id;
163
    int64_t rank;
164 165 166 167
    int64_t counters[CP_NUM_INDICES];
    double fcounters[CP_F_NUM_INDICES];
};

168
struct posix_runtime_file
169 170
{
    struct darshan_posix_file file_record;
171
    UT_hash_handle hlink;
172
};
173

174
struct posix_runtime_file_ref
175
{
176 177 178 179 180 181 182 183
    struct posix_runtime_file* file;
    int fd;
    UT_hash_handle hlink;
};

struct posix_runtime
{
    struct posix_runtime_file* file_array;
184 185
    int file_array_size;
    int file_count;
186 187
    struct posix_runtime_file* file_hash;
    struct posix_runtime_file_ref* fd_hash;
188 189
};

190
static struct posix_runtime *posix_runtime = NULL;
191
static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
192
static int my_rank = -1;
193
static int darshan_mem_alignment = 1;
194

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209
/* these are paths that we will not trace */
static char* exclusions[] = {
"/etc/",
"/dev/",
"/usr/",
"/bin/",
"/boot/",
"/lib/",
"/opt/",
"/sbin/",
"/sys/",
"/proc/",
NULL
};

210
DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
211
DARSHAN_FORWARD_DECL(close, int, (int fd));
212

213 214
static void posix_runtime_initialize(void);

215 216 217 218
static struct posix_runtime_file* posix_file_by_name(const char *name);
static struct posix_runtime_file* posix_file_by_name_setfd(const char* name, int fd);
static void posix_file_close_fd(int fd);

219 220 221
static void posix_prepare_for_shutdown(void);
static void posix_get_output_data(void **buffer, int size);

222 223
#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)
224

225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
#define POSIX_SET(__file, __counter, __value) do {\
    (__file)->file_record.counters[__counter] = __value; \
} while(0)

#define POSIX_F_SET(__file, __counter, __value) do {\
    (__file)->file_record.fcounters[__counter] = __value; \
} while(0)

#define POSIX_INC(__file, __counter, __value) do {\
    (__file)->file_record.counters[__counter] += __value; \
} while(0)

#define POSIX_F_INC(__file, __counter, __value) do {\
    (__file)->file_record.fcounters[__counter] += __value; \
} while(0)

#define POSIX_F_INC_NO_OVERLAP(__file, __tm1, __tm2, __last, __counter) do { \
    if(__tm1 > __last) \
        POSIX_F_INC(__file, __counter, (__tm2-__tm1)); \
    else \
        POSIX_F_INC(__file, __counter, (__tm2 - __last)); \
    if(__tm2 > __last) \
        __last = __tm2; \
} while(0)

#define POSIX_VALUE(__file, __counter) \
    ((__file)->file_record.counters[__counter])

#define POSIX_F_VALUE(__file, __counter) \
    ((__file)->file_record.fcounters[__counter])

#define POSIX_MAX(__file, __counter, __value) do {\
    if((__file)->file_record.counters[__counter] < __value) \
    { \
        (__file)->file_record.counters[__counter] = __value; \
    } \
} while(0)

263
#define POSIX_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \
264
    struct posix_runtime_file* file; \
265 266 267 268 269 270 271 272 273
    char* exclude; \
    int tmp_index = 0; \
    if(__ret < 0) break; \
    while((exclude = exclusions[tmp_index])) { \
        if(!(strncmp(exclude, __path, strlen(exclude)))) \
            break; \
        tmp_index++; \
    } \
    if(exclude) break; \
274
    file = posix_file_by_name_setfd(__path, __ret); \
275
    if(!file) break; \
276
    file->file_record.rank = my_rank; \
277
    if(__mode) \
278
        POSIX_SET(file, CP_MODE, __mode); \
279
    if(__stream_flag)\
280
        POSIX_INC(file, CP_POSIX_FOPENS, 1); \
281
    else \
282 283 284 285
        POSIX_INC(file, CP_POSIX_OPENS, 1); \
    if(POSIX_F_VALUE(file, CP_F_OPEN_TIMESTAMP) == 0) \
        POSIX_F_SET(file, CP_F_OPEN_TIMESTAMP, __tm1); \
} while(0)
286

287
int DARSHAN_DECL(open)(const char *path, int flags, ...)
288 289 290 291 292
{
    int mode = 0;
    int ret;
    double tm1, tm2;

293 294
    MAP_OR_FAIL(open);

295
    if(flags & O_CREAT) 
296 297 298 299 300 301
    {
        va_list arg;
        va_start(arg, flags);
        mode = va_arg(arg, int);
        va_end(arg);

302
        tm1 = darshan_core_wtime();
303
        ret = __real_open(path, flags, mode);
304
        tm2 = darshan_core_wtime();
305 306 307
    }
    else
    {
308
        tm1 = darshan_core_wtime();
309
        ret = __real_open(path, flags);
310
        tm2 = darshan_core_wtime();
311 312
    }

313 314 315
    POSIX_LOCK();
    posix_runtime_initialize();

316
    POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2);
317

318
    POSIX_UNLOCK();
319 320 321 322

    return(ret);
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
int DARSHAN_DECL(close)(int fd)
{
    struct darshan_file_runtime* file;
    int tmp_fd = fd;
    double tm1, tm2;
    int ret;

    MAP_OR_FAIL(close);

    tm1 = darshan_core_wtime();
    ret = __real_close(fd);
    tm2 = darshan_core_wtime();

    POSIX_LOCK();
    posix_runtime_initialize();
338

339
    posix_file_close_fd(tmp_fd);
340 341 342 343 344 345 346

    POSIX_UNLOCK();    

    return(ret);
}

/* ***************************************************** */
347

348
static void posix_runtime_initialize()
349
{
350 351 352 353 354 355 356 357 358
    char *alignstr;
    int tmpval;
    int ret;
    int mem_limit;
    struct darshan_module_funcs posix_mod_fns =
    {
        .prepare_for_shutdown = &posix_prepare_for_shutdown,
        .get_output_data = &posix_get_output_data,
    };
359

360
    if(posix_runtime)
361
        return;
362

363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
    /* register the posix module with darshan core */
    darshan_core_register_module(
        POSIX_MOD_NAME,
        &posix_mod_fns,
        &mem_limit);

    /* return if no memory assigned by darshan core */
    if(mem_limit == 0)
        return;

    posix_runtime = malloc(sizeof(*posix_runtime));
    if(!posix_runtime)
        return;
    memset(posix_runtime, 0, sizeof(*posix_runtime));

    /* set maximum number of file records according to max memory limit */
    /* NOTE: maximum number of records is based on the size of a posix file record */
    posix_runtime->file_array_size = mem_limit / sizeof(struct darshan_posix_file);

    /* allocate array of runtime file records */
    posix_runtime->file_array = malloc(sizeof(struct posix_runtime_file) *
                                       posix_runtime->file_array_size);
    if(!posix_runtime->file_array)
    {
        posix_runtime->file_array_size = 0;
        return;
    }
    memset(posix_runtime->file_array, 0, sizeof(struct posix_runtime_file) *
           posix_runtime->file_array_size);

393 394
    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);

395
#if 0
396 397 398 399
    /* set the memory alignment according to config or environment variables */
    #if (__CP_MEM_ALIGNMENT < 1)
        #error Darshan must be configured with a positive value for --with-mem-align
    #endif
400
    alignstr = getenv(CP_MEM_ALIGNMENT_OVERRIDE);
401
    if(alignstr)
402 403 404 405 406 407 408 409
    {
        ret = sscanf(alignstr, "%d", &tmpval);
        /* silently ignore if the env variable is set poorly */
        if(ret == 1 && tmpval > 0)
        {
            darshan_mem_alignment = tmpval;
        }
    }
410
    else
411 412 413
    {
        darshan_mem_alignment = __CP_MEM_ALIGNMENT;
    }
414

415
    /* avoid floating point errors on faulty input */
416
    if(darshan_mem_alignment < 1)
417 418 419
    {
        darshan_mem_alignment = 1;
    }
420
#endif
421

422 423
    return;
}
424

425
static struct posix_runtime_file* posix_file_by_name(const char *name)
426
{
427
    struct posix_runtime_file *file = NULL;
428
    char *newname = NULL;
429
    darshan_file_id file_id;
430

431
    if(!posix_runtime)
432
        return(NULL);
433

434
    newname = darshan_clean_file_path(name);
435
    if(!newname)
436
        newname = (char*)name;
437

438 439 440 441 442
    /* get a unique id for this file from darshan core */
    darshan_core_lookup_id(
        (void*)newname,
        strlen(newname),
        1,
443
        &file_id);
444

445
    /* search the hash table for this file record, and return if found */
446 447
    HASH_FIND(hlink, posix_runtime->file_hash, &file_id, sizeof(darshan_file_id), file);
    if(file)
448
    {
449
        if(newname != name)
450
            free(newname);
451
        return(file);
452 453 454
    }

    /* no existing record, assign a new file record from the global array */
455 456
    file = &posix_runtime->file_array[posix_runtime->file_count];
    file->file_record.f_id = file_id;
457

458
    /* add new record to file hash table */
459
    HASH_ADD(hlink, posix_runtime->file_hash, file_record.f_id, sizeof(darshan_file_id), file);
460

461
    posix_runtime->file_count++;
462 463

    if(newname != name)
464
        free(newname);
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
    return(file);
}

static struct posix_runtime_file* posix_file_by_name_setfd(const char* name, int fd)
{
    struct posix_runtime_file* file;
    struct posix_runtime_file_ref* ref;

    if(!posix_runtime)
        return(NULL);

    /* find file record by name first */
    file = posix_file_by_name(name);

    if(!file)
        return(NULL);

    /* search hash table for existing file ref for this fd */
    HASH_FIND(hlink, posix_runtime->fd_hash, &fd, sizeof(int), ref);
    if(ref)
    {
        /* we have a reference.  Make sure it points to the correct file
         * and return it
         */
        ref->file = file;
        return(file);
    }

    /* if we hit this point, then we don't have a reference for this fd
     * in the table yet.  Add it.
     */
    ref = malloc(sizeof(*ref));
    if(!ref)
        return(NULL);
    memset(ref, 0, sizeof(*ref));
    ref->file = file;
    ref->fd = fd;    

    HASH_ADD(hlink, posix_runtime->fd_hash, fd, sizeof(int), ref);

    return(file);
}

static void posix_file_close_fd(int fd)
{
    struct posix_runtime_file_ref *ref;

    if(!posix_runtime)
        return;

    /* search hash table for this fd */
    HASH_FIND(hlink, posix_runtime->fd_hash, &fd, sizeof(int), ref);
517
    if(ref)
518 519 520 521 522 523 524
    {
        /* we have a reference, delete it */
        HASH_DELETE(hlink, posix_runtime->fd_hash, ref);
        free(ref);
    }

    return;
525 526
}

527 528
/* ***************************************************** */

529
static void posix_prepare_for_shutdown()
530 531
{

532 533
    

534
    return;
535 536
}

537
static void posix_get_output_data(void **buffer, int size)
538
{
539 540

    return;
541 542
}

543 544 545 546 547 548 549 550
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */