darshan-posix.c 11.4 KB
Newer Older
1
2
3
4
5
/*
 *  (C) 2009 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

6
#include "darshan-runtime-config.h"
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdarg.h>
#include <string.h>
#include <time.h>
#include <stdlib.h>
#include <errno.h>
#include <sys/uio.h>
#include <sys/mman.h>
#include <search.h>
#include <assert.h>
21
#include <libgen.h>
22
#include <limits.h>
Philip Carns's avatar
Philip Carns committed
23
#include <aio.h>
24
25
#define __USE_GNU
#include <pthread.h>
26
27
28

#include "darshan.h"

29
#ifndef HAVE_OFF64_T
30
31
typedef int64_t off64_t;
#endif
32
33
34
#ifndef HAVE_AIOCB64
#define aiocb64 aiocb
#endif
35

36
37
38
39
40
#define DARSHAN_FORWARD_DECL(name,ret,args) \
  extern ret __real_ ## name args;

#define DARSHAN_DECL(__name) __wrap_ ## __name

41
42
#define MAP_OR_FAIL(func)

43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
/* TODO: where do these file record structs go? (some needed for darshan-util) */
/* TODO: DARSHAN_* OR CP_* */

#define POSIX_MOD_NAME "POSIX"

enum darshan_posix_indices
{
    CP_POSIX_READS,              /* count of posix reads */
    CP_POSIX_WRITES,             /* count of posix writes */
    CP_POSIX_OPENS,              /* count of posix opens */
    CP_POSIX_SEEKS,              /* count of posix seeks */
    CP_POSIX_STATS,              /* count of posix stat/lstat/fstats */
    CP_POSIX_MMAPS,              /* count of posix mmaps */
    CP_POSIX_FREADS,
    CP_POSIX_FWRITES,
    CP_POSIX_FOPENS,
    CP_POSIX_FSEEKS,
    CP_POSIX_FSYNCS,
    CP_POSIX_FDSYNCS,
    CP_MODE,                      /* mode of file */
    CP_BYTES_READ,                /* total bytes read */
    CP_BYTES_WRITTEN,             /* total bytes written */
    CP_MAX_BYTE_READ,             /* highest offset byte read */
    CP_MAX_BYTE_WRITTEN,          /* highest offset byte written */
    CP_CONSEC_READS,              /* count of consecutive reads */
    CP_CONSEC_WRITES,             /* count of consecutive writes */
    CP_SEQ_READS,                 /* count of sequential reads */
    CP_SEQ_WRITES,                /* count of sequential writes */
    CP_RW_SWITCHES,               /* number of times switched between read and write */
    CP_MEM_NOT_ALIGNED,           /* count of accesses not mem aligned */
    CP_MEM_ALIGNMENT,             /* mem alignment in bytes */
    CP_FILE_NOT_ALIGNED,          /* count of accesses not file aligned */
    CP_FILE_ALIGNMENT,            /* file alignment in bytes */
    CP_MAX_READ_TIME_SIZE,
    CP_MAX_WRITE_TIME_SIZE,
    /* buckets */
    CP_SIZE_READ_0_100,           /* count of posix read size ranges */
    CP_SIZE_READ_100_1K,
    CP_SIZE_READ_1K_10K,
    CP_SIZE_READ_10K_100K,
    CP_SIZE_READ_100K_1M,
    CP_SIZE_READ_1M_4M,
    CP_SIZE_READ_4M_10M,
    CP_SIZE_READ_10M_100M,
    CP_SIZE_READ_100M_1G,
    CP_SIZE_READ_1G_PLUS,
    /* buckets */
    CP_SIZE_WRITE_0_100,          /* count of posix write size ranges */
    CP_SIZE_WRITE_100_1K,
    CP_SIZE_WRITE_1K_10K,
    CP_SIZE_WRITE_10K_100K,
    CP_SIZE_WRITE_100K_1M,
    CP_SIZE_WRITE_1M_4M,
    CP_SIZE_WRITE_4M_10M,
    CP_SIZE_WRITE_10M_100M,
    CP_SIZE_WRITE_100M_1G,
    CP_SIZE_WRITE_1G_PLUS,
    /* counters */
    CP_STRIDE1_STRIDE,             /* the four most frequently appearing strides */
    CP_STRIDE2_STRIDE,
    CP_STRIDE3_STRIDE,
    CP_STRIDE4_STRIDE,
    CP_STRIDE1_COUNT,              /* count of each of the most frequent strides */
    CP_STRIDE2_COUNT,
    CP_STRIDE3_COUNT,
    CP_STRIDE4_COUNT,
    CP_ACCESS1_ACCESS,             /* the four most frequently appearing access sizes */
    CP_ACCESS2_ACCESS,
    CP_ACCESS3_ACCESS,
    CP_ACCESS4_ACCESS,
    CP_ACCESS1_COUNT,              /* count of each of the most frequent access sizes */
    CP_ACCESS2_COUNT,
    CP_ACCESS3_COUNT,
    CP_ACCESS4_COUNT,
    CP_DEVICE,                     /* device id reported by stat */
    CP_SIZE_AT_OPEN,
    CP_FASTEST_RANK,
    CP_FASTEST_RANK_BYTES,
    CP_SLOWEST_RANK,
    CP_SLOWEST_RANK_BYTES,

    CP_NUM_INDICES,
};
126

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
/* floating point statistics */
enum f_darshan_posix_indices
{
    /* NOTE: adjust cp_normalize_timestamps() function if any TIMESTAMPS are
     * added or modified in this list
     */
    CP_F_OPEN_TIMESTAMP = 0,    /* timestamp of first open */
    CP_F_READ_START_TIMESTAMP,  /* timestamp of first read */
    CP_F_WRITE_START_TIMESTAMP, /* timestamp of first write */
    CP_F_CLOSE_TIMESTAMP,       /* timestamp of last close */
    CP_F_READ_END_TIMESTAMP,    /* timestamp of last read */
    CP_F_WRITE_END_TIMESTAMP,   /* timestamp of last write */
    CP_F_POSIX_READ_TIME,       /* cumulative posix read time */
    CP_F_POSIX_WRITE_TIME,      /* cumulative posix write time */
    CP_F_POSIX_META_TIME,       /* cumulative posix meta time */
    CP_F_MAX_READ_TIME,
    CP_F_MAX_WRITE_TIME,
    /* Total I/O and meta time consumed by fastest and slowest ranks, 
     * reported in either MPI or POSIX time depending on how the file 
     * was accessed.
     */
    CP_F_FASTEST_RANK_TIME,     
    CP_F_SLOWEST_RANK_TIME,
    CP_F_VARIANCE_RANK_TIME,
    CP_F_VARIANCE_RANK_BYTES,

    CP_F_NUM_INDICES,
};

struct darshan_posix_file
{
    int64_t counters[CP_NUM_INDICES];
    double fcounters[CP_F_NUM_INDICES];
};

struct darshan_posix_runtime_file
{
    struct darshan_posix_file file_record;
};
166

167
struct darshan_posix_runtime
168
{
169
170
    struct darshan_posix_file_runtime *file_array;
    int file_array_sz;
171
172
};

173
174
175
static pthread_mutex_t posix_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static struct darshan_posix_runtime *posix_runtime = NULL;

176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
/* these are paths that we will not trace */
static char* exclusions[] = {
"/etc/",
"/dev/",
"/usr/",
"/bin/",
"/boot/",
"/lib/",
"/opt/",
"/sbin/",
"/sys/",
"/proc/",
NULL
};

191
192
static int darshan_mem_alignment = 1;

193
194
DARSHAN_FORWARD_DECL(open, int, (const char *path, int flags, ...));
//DARSHAN_FORWARD_DECL(close, int, (int fd));
195

196
static void posix_runtime_initialize(void);
197
static void posix_runtime_finalize(void);
198

199
200
201
static void posix_prepare_for_shutdown(void);
static void posix_get_output_data(void **buffer, int size);

202
203
#define POSIX_LOCK() pthread_mutex_lock(&posix_runtime_mutex)
#define POSIX_UNLOCK() pthread_mutex_unlock(&posix_runtime_mutex)
204

205
206
207
#ifdef __DARSHAN_STAT_AT_OPEN
#define DARSHAN_STAT_FILE(_f, _p, _r) do { \
    if(!DARSHAN_VALUE((_f), DARSHAN_POSIX_STATS) && !DARSHAN_VALUE((_f), DARSHAN_POSIX_OPENS)){ \
208
        if(fstat64(_r, &cp_stat_buf) == 0) { \
209
210
            DARSHAN_SET(_f, DARSHAN_FILE_ALIGNMENT, cp_stat_buf.st_blksize); \
            DARSHAN_SET(_f, DARSHAN_SIZE_AT_OPEN, cp_stat_buf.st_size); \
Philip Carns's avatar
Philip Carns committed
211
212
213
214
        }\
    }\
}while(0)
#else
215
#define DARSHAN_STAT_FILE(_f, _p, _r) do { }while(0)
Philip Carns's avatar
Philip Carns committed
216
217
#endif

218
219
#define POSIX_RECORD_OPEN(__ret, __path, __mode, __stream_flag, __tm1, __tm2) do { \
    struct darshan_posix_runtime_file* file; \
220
221
222
223
224
225
226
227
228
    char* exclude; \
    int tmp_index = 0; \
    if(__ret < 0) break; \
    while((exclude = exclusions[tmp_index])) { \
        if(!(strncmp(exclude, __path, strlen(exclude)))) \
            break; \
        tmp_index++; \
    } \
    if(exclude) break; \
229
230
231
    file = *************darshan_file_by_name_setfd(__path, __ret); \
    if(!file_rec) break; \
    DARSHAN_STAT_FILE(file, __path, __ret); \
232
233
    file->log_file->rank = my_rank; \
    if(__mode) \
234
        DARSHAN_SET(file, DARSHAN_MODE, __mode); \
235
236
237
238
    file->offset = 0; \
    file->last_byte_written = 0; \
    file->last_byte_read = 0; \
    if(__stream_flag)\
239
        DARSHAN_INC(file, DARSHAN_POSIX_FOPENS, 1); \
240
    else \
241
242
243
244
        DARSHAN_INC(file, DARSHAN_POSIX_OPENS, 1); \
    if(DARSHAN_F_VALUE(file, DARSHAN_F_OPEN_TIMESTAMP) == 0) \
        DARSHAN_F_SET(file, DARSHAN_F_OPEN_TIMESTAMP, __tm1); \
    DARSHAN_F_INC_NO_OVERLAP(file, __tm1, __tm2, file->last_posix_meta_end, DARSHAN_F_POSIX_META_TIME); \
245
246
} while (0)

247
#if 0
248
int DARSHAN_DECL(close)(int fd)
249
250
251
252
253
254
{
    struct darshan_file_runtime* file;
    int tmp_fd = fd;
    double tm1, tm2;
    int ret;

255
256
    MAP_OR_FAIL(close);

257
    tm1 = darshan_core_wtime();
258
    ret = __real_close(fd);
259
    tm2 = darshan_core_wtime();
260

261
262
263
    POSIX_LOCK();
    posix_runtime_initialize();

264
265
266
267
268
    file = darshan_file_by_fd(tmp_fd);
    if(file)
    {
        file->last_byte_written = 0;
        file->last_byte_read = 0;
269
270
        DARSHAN_F_SET(file, DARSHAN_F_CLOSE_TIMESTAMP, posix_wtime());
        DARSHAN_F_INC_NO_OVERLAP(file, tm1, tm2, file->last_posix_meta_end, DARSHAN_F_POSIX_META_TIME);
271
        darshan_file_close_fd(tmp_fd);
272
    }
273
274

    POSIX_UNLOCK();    
275
276
277

    return(ret);
}
278
#endif
279

280
int DARSHAN_DECL(open)(const char *path, int flags, ...)
281
282
283
284
285
{
    int mode = 0;
    int ret;
    double tm1, tm2;

286
287
    MAP_OR_FAIL(open);

288
289
290
291
292
293
294
    if (flags & O_CREAT) 
    {
        va_list arg;
        va_start(arg, flags);
        mode = va_arg(arg, int);
        va_end(arg);

295
        tm1 = darshan_core_wtime();
296
        ret = __real_open(path, flags, mode);
297
        tm2 = darshan_core_wtime();
298
299
300
    }
    else
    {
301
        tm1 = darshan_core_wtime();
302
        ret = __real_open(path, flags);
303
        tm2 = darshan_core_wtime();
304
305
    }

306
307
308
309
310
    POSIX_LOCK();
    posix_runtime_initialize();

//    POSIX_RECORD_OPEN(ret, path, mode, 0, tm1, tm2);
    POSIX_UNLOCK();
311
312
313
314

    return(ret);
}

315
/* ***************************************************** */
316
317


318
static void posix_runtime_initialize()
319
{
320
321
322
323
324
325
326
327
328
    char *alignstr;
    int tmpval;
    int ret;
    int mem_limit;
    struct darshan_module_funcs posix_mod_fns =
    {
        .prepare_for_shutdown = &posix_prepare_for_shutdown,
        .get_output_data = &posix_get_output_data,
    };
329

330
331
    if (posix_runtime)
        return;
332

333
334
335
336
337
338
339
340
341
342
343
344
345
346
    /* set the memory alignment according to config or environment variables */
    #if (__CP_MEM_ALIGNMENT < 1)
        #error Darshan must be configured with a positive value for --with-mem-align
    #endif
    alignstr = getenv("DARSHAN_MEMALIGN");
    if (alignstr)
    {
        ret = sscanf(alignstr, "%d", &tmpval);
        /* silently ignore if the env variable is set poorly */
        if(ret == 1 && tmpval > 0)
        {
            darshan_mem_alignment = tmpval;
        }
    }
347
    else
348
349
350
    {
        darshan_mem_alignment = __CP_MEM_ALIGNMENT;
    }
351

352
353
354
355
356
    /* avoid floating point errors on faulty input */
    if (darshan_mem_alignment < 1)
    {
        darshan_mem_alignment = 1;
    }
357

358
359
360
    posix_runtime = malloc(sizeof(*posix_runtime));
    if (!posix_runtime)
        return;
361

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
    /* register the posix module with darshan core */
    darshan_core_register_module(
        POSIX_MOD_NAME,
        &posix_mod_fns,
        &mem_limit);

    /* allocate array of runtime file records no larger than the returned mem_limit */
    posix_runtime->file_array_sz = mem_limit / sizeof(struct darshan_posix_runtime_file);
    posix_runtime->file_array = malloc(sizeof(struct darshan_posix_runtime_file) *
                                       posix_runtime->file_array_sz);
    if (!posix_runtime->file_array)
    {
        posix_runtime->file_array_sz = 0;
        return;
    }
    memset(posix_runtime->file_array, 0, sizeof(struct darshan_posix_runtime_file) *
           posix_runtime->file_array_sz);
379

380
    return;
381
382
}

383
static struct darshan_posix_runtime_file* posix_file_by_name(const char *name)
384
{
385
386
    struct darshan_posix_runtime_file *tmp_file;
    char *newname = NULL;
387

388
389
    if (!posix_runtime)
        return(NULL);
390

391
392
393
    newname = darshan_clean_file_path(name);
    if (!newname)
        newname = (char*)name;
394

395
396
397
    if (newname != name)
        free(newname);
    return(tmp_file);
398
399
}

400
static void posix_prepare_for_shutdown()
401
402
{

403
    return;
404
405
}

406
static void posix_get_output_data(void **buffer, int size)
407
{
408
409

    return;
410
411
}

412
413
414
415
416
417
418
419
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */