darshan-posix.c 12.1 KB
Newer Older
1 2 3 4 5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#include "darshan-runtime-config.h"
7 8 9 10 11 12 13 14 15 16 17 18 19 20
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <search.h>
#include <assert.h>
21
#include <libgen.h>
22
#include <limits.h>
23
#include <aio.h>
24 25
#define __USE_GNU
#include <pthread.h>
26 27

#include "darshan.h"
28
#include "uthash.h"
29

30
#ifndef HAVE_OFF64_T
31 32
typedef int64_t off64_t;
#endif
33 34 35
#ifndef HAVE_AIOCB64
#define aiocb64 aiocb
#endif
36

37 38
/* TODO these go where ? */

39 40 41 42 43
#define DARSHAN_FORWARD_DECL(name,ret,args) \
  extern ret __real_ ## name args;

#define DARSHAN_DECL(__name) __wrap_ ## __name

44 45
#define MAP_OR_FAIL(func)

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
/* TODO: where do these file record structs go? (some needed for darshan-util) */
/* TODO: DARSHAN_* OR CP_* */

#define POSIX_MOD_NAME "POSIX"

enum darshan_posix_indices
{
    CP_POSIX_READS,              /* count of posix reads */
    CP_POSIX_WRITES,             /* count of posix writes */
    CP_POSIX_OPENS,              /* count of posix opens */
    CP_POSIX_SEEKS,              /* count of posix seeks */
    CP_POSIX_STATS,              /* count of posix stat/lstat/fstats */
    CP_POSIX_MMAPS,              /* count of posix mmaps */
    CP_POSIX_FREADS,
    CP_POSIX_FWRITES,
    CP_POSIX_FOPENS,
    CP_POSIX_FSEEKS,
    CP_POSIX_FSYNCS,
    CP_POSIX_FDSYNCS,
    CP_MODE,                      /* mode of file */
    CP_BYTES_READ,                /* total bytes read */
    CP_BYTES_WRITTEN,             /* total bytes written */
    CP_MAX_BYTE_READ,             /* highest offset byte read */
    CP_MAX_BYTE_WRITTEN,          /* highest offset byte written */
    CP_CONSEC_READS,              /* count of consecutive reads */
    CP_CONSEC_WRITES,             /* count of consecutive writes */
    CP_SEQ_READS,                 /* count of sequential reads */
    CP_SEQ_WRITES,                /* count of sequential writes */
    CP_RW_SWITCHES,               /* number of times switched between read and write */
    CP_MEM_NOT_ALIGNED,           /* count of accesses not mem aligned */
    CP_MEM_ALIGNMENT,             /* mem alignment in bytes */
    CP_FILE_NOT_ALIGNED,          /* count of accesses not file aligned */
    CP_FILE_ALIGNMENT,            /* file alignment in bytes */
    CP_MAX_READ_TIME_SIZE,
    CP_MAX_WRITE_TIME_SIZE,
    /* buckets */
    CP_SIZE_READ_0_100,           /* count of posix read size ranges */
    CP_SIZE_READ_100_1K,
    CP_SIZE_READ_1K_10K,
    CP_SIZE_READ_10K_100K,
    CP_SIZE_READ_100K_1M,
    CP_SIZE_READ_1M_4M,
    CP_SIZE_READ_4M_10M,
    CP_SIZE_READ_10M_100M,
    CP_SIZE_READ_100M_1G,
    CP_SIZE_READ_1G_PLUS,
    /* buckets */
    CP_SIZE_WRITE_0_100,          /* count of posix write size ranges */
    CP_SIZE_WRITE_100_1K,
    CP_SIZE_WRITE_1K_10K,
    CP_SIZE_WRITE_10K_100K,
    CP_SIZE_WRITE_100K_1M,
    CP_SIZE_WRITE_1M_4M,
    CP_SIZE_WRITE_4M_10M,
    CP_SIZE_WRITE_10M_100M,
    CP_SIZE_WRITE_100M_1G,
    CP_SIZE_WRITE_1G_PLUS,
    /* counters */
    CP_STRIDE1_STRIDE,             /* the four most frequently appearing strides */
    CP_STRIDE2_STRIDE,
    CP_STRIDE3_STRIDE,
    CP_STRIDE4_STRIDE,
    CP_STRIDE1_COUNT,              /* count of each of the most frequent strides */
    CP_STRIDE2_COUNT,
    CP_STRIDE3_COUNT,
    CP_STRIDE4_COUNT,
    CP_ACCESS1_ACCESS,             /* the four most frequently appearing access sizes */
    CP_ACCESS2_ACCESS,
    CP_ACCESS3_ACCESS,
    CP_ACCESS4_ACCESS,
    CP_ACCESS1_COUNT,              /* count of each of the most frequent access sizes */
    CP_ACCESS2_COUNT,
    CP_ACCESS3_COUNT,
    CP_ACCESS4_COUNT,
    CP_DEVICE,                     /* device id reported by stat */
    CP_SIZE_AT_OPEN,
    CP_FASTEST_RANK,
    CP_FASTEST_RANK_BYTES,
    CP_SLOWEST_RANK,
    CP_SLOWEST_RANK_BYTES,

    CP_NUM_INDICES,
};
129

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
/* floating point statistics */
enum f_darshan_posix_indices
{
    /* NOTE: adjust cp_normalize_timestamps() function if any TIMESTAMPS are
     * added or modified in this list
     */
    CP_F_OPEN_TIMESTAMP = 0,    /* timestamp of first open */
    CP_F_READ_START_TIMESTAMP,  /* timestamp of first read */
    CP_F_WRITE_START_TIMESTAMP, /* timestamp of first write */
    CP_F_CLOSE_TIMESTAMP,       /* timestamp of last close */
    CP_F_READ_END_TIMESTAMP,    /* timestamp of last read */
    CP_F_WRITE_END_TIMESTAMP,   /* timestamp of last write */
    CP_F_POSIX_READ_TIME,       /* cumulative posix read time */
    CP_F_POSIX_WRITE_TIME,      /* cumulative posix write time */
    CP_F_POSIX_META_TIME,       /* cumulative posix meta time */
    CP_F_MAX_READ_TIME,
    CP_F_MAX_WRITE_TIME,
    /* Total I/O and meta time consumed by fastest and slowest ranks, 
     * reported in either MPI or POSIX time depending on how the file 
     * was accessed.
     */
    CP_F_FASTEST_RANK_TIME,     
    CP_F_SLOWEST_RANK_TIME,
    CP_F_VARIANCE_RANK_TIME,
    CP_F_VARIANCE_RANK_BYTES,

    CP_F_NUM_INDICES,
};

struct darshan_posix_file
{
161
    darshan_file_id f_id;
162 163 164 165 166 167 168
    int64_t counters[CP_NUM_INDICES];
    double fcounters[CP_F_NUM_INDICES];
};

struct darshan_posix_runtime_file
{
    struct darshan_posix_file file_record;
169
    UT_hash_handle hlink;
170
};
171

172
struct darshan_posix_runtime
173
{
174 175 176
    struct darshan_posix_runtime_file* file_array;
    int file_array_size;
    int file_count;
177
    struct darshan_posix_runtime_file* file_hash;
178 179
};

180 181
static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static struct darshan_posix_runtime *posix_runtime = NULL;
182 183
static int my_rank = -1;
static int darshan_mem_alignment = 1;
184

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
/* these are paths that we will not trace */
static char* exclusions[] = {
"/etc/",
"/dev/",
"/usr/",
"/bin/",
"/boot/",
"/lib/",
"/opt/",
"/sbin/",
"/sys/",
"/proc/",
NULL
};

200 201
DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
//DARSHAN_FORWARD_DECL(close, int, (int fd));
202

203
static void posix_runtime_initialize(void);
204
static void posix_runtime_finalize(void);
205

206 207 208
static void posix_prepare_for_shutdown(void);
static void posix_get_output_data(void **buffer, int size);

209 210
#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)
211

212 213
#define POSIX_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \
    struct darshan_posix_runtime_file* file; \
214 215 216 217 218 219 220 221 222
    char* exclude; \
    int tmp_index = 0; \
    if(__ret < 0) break; \
    while((exclude = exclusions[tmp_index])) { \
        if(!(strncmp(exclude, __path, strlen(exclude)))) \
            break; \
        tmp_index++; \
    } \
    if(exclude) break; \
223 224
    file = darshan_file_by_name(__path, __ret); \
    if(!file) break; \
225 226
    file->log_file->rank = my_rank; \
    if(__mode) \
227
        DARSHAN_SET(file, DARSHAN_MODE, __mode); \
228 229 230 231
    file->offset = 0; \
    file->last_byte_written = 0; \
    file->last_byte_read = 0; \
    if(__stream_flag)\
232
        DARSHAN_INC(file, DARSHAN_POSIX_FOPENS, 1); \
233
    else \
234 235 236 237
        DARSHAN_INC(file, DARSHAN_POSIX_OPENS, 1); \
    if(DARSHAN_F_VALUE(file, DARSHAN_F_OPEN_TIMESTAMP) == 0) \
        DARSHAN_F_SET(file, DARSHAN_F_OPEN_TIMESTAMP, __tm1); \
    DARSHAN_F_INC_NO_OVERLAP(file, __tm1, __tm2, file->last_posix_meta_end, DARSHAN_F_POSIX_META_TIME); \
238 239
} while (0)

240
int DARSHAN_DECL(open)(const char *path, int flags, ...)
241 242 243 244 245
{
    int mode = 0;
    int ret;
    double tm1, tm2;

246 247
    MAP_OR_FAIL(open);

248
    if(flags & O_CREAT) 
249 250 251 252 253 254
    {
        va_list arg;
        va_start(arg, flags);
        mode = va_arg(arg, int);
        va_end(arg);

255
        tm1 = darshan_core_wtime();
256
        ret = __real_open(path, flags, mode);
257
        tm2 = darshan_core_wtime();
258 259 260
    }
    else
    {
261
        tm1 = darshan_core_wtime();
262
        ret = __real_open(path, flags);
263
        tm2 = darshan_core_wtime();
264 265
    }

266 267 268
    POSIX_LOCK();
    posix_runtime_initialize();

269 270
    //POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2);

271
    POSIX_UNLOCK();
272 273 274 275

    return(ret);
}

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
#if 0
int DARSHAN_DECL(close)(int fd)
{
    struct darshan_file_runtime* file;
    int tmp_fd = fd;
    double tm1, tm2;
    int ret;

    MAP_OR_FAIL(close);

    tm1 = darshan_core_wtime();
    ret = __real_close(fd);
    tm2 = darshan_core_wtime();

    POSIX_LOCK();
    posix_runtime_initialize();
292

293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
    file = darshan_file_by_fd(tmp_fd);
    if(file)
    {
        file->last_byte_written = 0;
        file->last_byte_read = 0;
        DARSHAN_F_SET(file, DARSHAN_F_CLOSE_TIMESTAMP, posix_wtime());
        DARSHAN_F_INC_NO_OVERLAP(file, tm1, tm2, file->last_posix_meta_end, DARSHAN_F_POSIX_META_TIME);
        darshan_file_close_fd(tmp_fd);
    }

    POSIX_UNLOCK();    

    return(ret);
}
#endif

/* ***************************************************** */
310

311
static void posix_runtime_initialize()
312
{
313 314 315 316 317 318 319 320 321
    char *alignstr;
    int tmpval;
    int ret;
    int mem_limit;
    struct darshan_module_funcs posix_mod_fns =
    {
        .prepare_for_shutdown = &posix_prepare_for_shutdown,
        .get_output_data = &posix_get_output_data,
    };
322

323
    if(posix_runtime)
324
        return;
325

326
#if 0
327 328 329 330 331
    /* set the memory alignment according to config or environment variables */
    #if (__CP_MEM_ALIGNMENT < 1)
        #error Darshan must be configured with a positive value for --with-mem-align
    #endif
    alignstr = getenv("DARSHAN_MEMALIGN");
332
    if(alignstr)
333 334 335 336 337 338 339 340
    {
        ret = sscanf(alignstr, "%d", &tmpval);
        /* silently ignore if the env variable is set poorly */
        if(ret == 1 && tmpval > 0)
        {
            darshan_mem_alignment = tmpval;
        }
    }
341
    else
342 343 344
    {
        darshan_mem_alignment = __CP_MEM_ALIGNMENT;
    }
345

346
    /* avoid floating point errors on faulty input */
347
    if(darshan_mem_alignment < 1)
348 349 350
    {
        darshan_mem_alignment = 1;
    }
351
#endif
352

353
    posix_runtime = malloc(sizeof(*posix_runtime));
354
    if(!posix_runtime)
355
        return;
356
    memset(posix_runtime, 0, sizeof(*posix_runtime));
357

358 359 360 361 362 363
    /* register the posix module with darshan core */
    darshan_core_register_module(
        POSIX_MOD_NAME,
        &posix_mod_fns,
        &mem_limit);

364
    /* set maximum number of file records according to max memory limit */
365 366
    posix_runtime->file_array_size = mem_limit / sizeof(struct darshan_posix_runtime_file);

367
    /* allocate array of runtime file records */
368
    posix_runtime->file_array = malloc(sizeof(struct darshan_posix_runtime_file) *
369 370
                                       posix_runtime->file_array_size);
    if(!posix_runtime->file_array)
371
    {
372
        posix_runtime->file_array_size = 0;
373 374 375
        return;
    }
    memset(posix_runtime->file_array, 0, sizeof(struct darshan_posix_runtime_file) *
376
           posix_runtime->file_array_size);
377

378
    return;
379 380
}

381
static struct darshan_posix_runtime_file* posix_file_by_name(const char *name)
382
{
383
    struct darshan_posix_runtime_file *tmp_file = NULL;
384
    char *newname = NULL;
385
    darshan_file_id tmp_id;
386

387
    if(!posix_runtime)
388
        return(NULL);
389

390
    newname = darshan_clean_file_path(name);
391
    if(!newname)
392
        newname = (char*)name;
393

394 395 396 397 398 399 400
    /* get a unique id for this file from darshan core */
    darshan_core_lookup_id(
        (void*)newname,
        strlen(newname),
        1,
        &tmp_id);

401 402 403
    /* search the hash table for this file record, and return if found */
    HASH_FIND(hlink, posix_runtime->file_hash, &tmp_id, sizeof(darshan_file_id), tmp_file);
    if (tmp_file)
404
    {
405 406 407
        if (newname != name)
            free(newname);
        return(tmp_file);
408 409 410 411 412 413
    }

    /* no existing record, assign a new file record from the global array */
    tmp_file = &posix_runtime->file_array[posix_runtime->file_count];
    tmp_file->file_record.f_id = tmp_id;

414 415
    /* add new record to file hash table */
    HASH_ADD(hlink, posix_runtime->file_hash, file_record.f_id, sizeof(darshan_file_id), tmp_file);
416

417
    posix_runtime->file_count++;
418 419

    if(newname != name)
420 421
        free(newname);
    return(tmp_file);
422 423
}

424 425
/* ***************************************************** */

426
static void posix_prepare_for_shutdown()
427 428
{

429
    return;
430 431
}

432
static void posix_get_output_data(void **buffer, int size)
433
{
434 435

    return;
436 437
}

438 439 440 441 442 443 444 445
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */