darshan-lustre.c 6.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE

#include "darshan-runtime-config.h"
#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <time.h>
#include <stdlib.h>
#include <assert.h>
#include <pthread.h>

#include "uthash.h"

#include "darshan.h"
#include "darshan-dynamic.h"

25
26
27
28
29
30
31
/* TODO: once mmap merge is complete, we can just use an array
 * to store record data rather than a hash table -- in that
 * branch, register_record() returns whether the record 
 * already exists, at which point we won't need to instrument
 * more data, since the Lustre record data is immutable. records
 * could just be appended to the array if there is no need to
 * search for a specific record.
32
 */
33
34
35
36
37
38
struct lustre_record_ref
{
    struct darshan_lustre_record *record;
    UT_hash_handle hlink;
};

39
40
struct lustre_runtime
{
41
    struct lustre_record_ref *ref_array;
42
43
44
    struct darshan_lustre_record *record_array;
    int record_array_size;
    int record_array_ndx;
45
    struct lustre_record_ref *record_hash;
46
47
48
49
50
51
52
};

static struct lustre_runtime *lustre_runtime = NULL;
static pthread_mutex_t lustre_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
static int instrumentation_disabled = 0;
static int my_rank = -1;

53
54
static void lustre_runtime_initialize(void);

55
56
57
58
59
60
61
62
static void lustre_begin_shutdown(void);
static void lustre_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs,
    int shared_rec_count, void **lustre_buf, int *lustre_buf_sz);
static void lustre_shutdown(void);

#define LUSTRE_LOCK() pthread_mutex_lock(&lustre_runtime_mutex)
#define LUSTRE_UNLOCK() pthread_mutex_unlock(&lustre_runtime_mutex)

63
64
65
66
67
68
69
/* TODO: is there any way we can further compact Lustre data to save space?
 * e.g., are all files in the same directory guaranteed same striping parameters?
 * if so, can we store stripe parameters on per-directory basis and the OST
 * list on a per-file basis? maybe the storage savings are small enough this isn't
 * worth it, but nice to keep in mind
 */

70
71
void darshan_instrument_lustre_file(char *filepath)
{
72
73
74
75
76
    struct lustre_record_ref *lustre_ref;
    darshan_record_id rec_id;
    int limit_flag;

    LUSTRE_LOCK();
77
78
    /* make sure the lustre module is already initialized */
    lustre_runtime_initialize();
79

80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
    limit_flag = (lustre_runtime->record_array_ndx >= lustre_runtime->record_array_size);

    /* register a Lustre file record with Darshan */
    darshan_core_register_record(
        (void *)filepath,
        strlen(filepath),
        DARSHAN_LUSTRE_MOD,
        1,
        limit_flag,
        &rec_id,
        NULL);

    /* if record id is 0, darshan has no more memory for instrumenting */
    if(rec_id == 0)
        return;

    HASH_FIND(hlink, lustre_runtime->record_hash, &rec_id,
        sizeof(darshan_record_id), lustre_ref);
    if(!lustre_ref)
    {
        /* no existing record, allocate a new one and add it to the hash */
        lustre_ref = &(lustre_runtime->ref_array[lustre_runtime->record_array_ndx]);
        lustre_ref->record = &(lustre_runtime->record_array[lustre_runtime->record_array_ndx]);
        lustre_ref->record->rec_id = rec_id;
        lustre_ref->record->rank = my_rank;

        /* TODO: gather lustre data, store in record hash */
        /* counters in lustre_ref->record->counters */
108

109
110
111
112
113
114
        HASH_ADD(hlink, lustre_runtime->record_hash, record->rec_id,
            sizeof(darshan_record_id), lustre_ref);
        lustre_runtime->record_array_ndx++;
    }

    LUSTRE_UNLOCK();
115
116
117
    return;
}

118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
static void lustre_runtime_initialize()
{
    int mem_limit;
    struct darshan_module_funcs lustre_mod_fns =
    {
        .begin_shutdown = &lustre_begin_shutdown,
        .get_output_data = &lustre_get_output_data,
        .shutdown = &lustre_shutdown
    };

    /* don't do anything if already initialized or instrumenation is disabled */
    if(lustre_runtime || instrumentation_disabled)
        return;

    /* register the lustre module with darshan-core */
    darshan_core_register_module(
        DARSHAN_LUSTRE_MOD,
        &lustre_mod_fns,
        &my_rank,
        &mem_limit,
        NULL);

    /* return if no memory assigned by darshan core */
    if(mem_limit == 0)
        return;

    lustre_runtime = malloc(sizeof(*lustre_runtime));
    if(!lustre_runtime)
        return;
    memset(lustre_runtime, 0, sizeof(*lustre_runtime));

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
    /* allocate array of Lustre records according to the amount of memory
     * assigned by Darshan
     */
    lustre_runtime->record_array_size = mem_limit / sizeof(struct darshan_lustre_record);

    lustre_runtime->ref_array = malloc(lustre_runtime->record_array_size *
                                       sizeof(struct lustre_record_ref));
    lustre_runtime->record_array = malloc(lustre_runtime->record_array_size *
                                          sizeof(struct darshan_lustre_record));
    if(!lustre_runtime->ref_array || !lustre_runtime->record_array)
    {
        lustre_runtime->record_array_size = 0;
        return;
    }
    memset(lustre_runtime->ref_array, 0, lustre_runtime->record_array_size *
        sizeof(struct lustre_record_ref));
    memset(lustre_runtime->record_array, 0, lustre_runtime->record_array_size *
        sizeof(struct darshan_lustre_record));
167
168
169
170

    return;
}

171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/**************************************************************************
 * Functions exported by Lustre module for coordinating with darshan-core *
 **************************************************************************/

static void lustre_begin_shutdown(void)
{
    assert(lustre_runtime);

    LUSTRE_LOCK();
    /* disable further instrumentation while Darshan shuts down */
    instrumentation_disabled = 1;
    LUSTRE_UNLOCK();

    return;
}

static void lustre_get_output_data(
    MPI_Comm mod_comm,
    darshan_record_id *shared_recs,
    int shared_rec_count,
    void **lustre_buf,
    int *lustre_buf_sz)
{
    assert(lustre_runtime);

196
197
198
199
200
    /* TODO: determine lustre record shared across all processes,
     * and have only rank 0 write these records out. No shared 
     * reductions should be necessary as the Lustre data for a
     * given file should be the same on each process
     */
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

    return;
}

static void lustre_shutdown(void)
{
    assert(lustre_runtime);

    /* TODO: free data structures */

    lustre_runtime = NULL;

    return;
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */