darshan-bgq.c 9.02 KB
Newer Older
Kevin Harms's avatar
Kevin Harms committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE

#include "darshan-runtime-config.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <assert.h>

#include "uthash.h"
#include "darshan.h"
#include "darshan-bgq-log-format.h"
Shane Snyder's avatar
Shane Snyder committed
20
#include "darshan-dynamic.h"
Kevin Harms's avatar
Kevin Harms committed
21

Shane Snyder's avatar
Shane Snyder committed
22
#include <mpix.h>
Kevin Harms's avatar
Kevin Harms committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include <spi/include/kernel/location.h>
#include <spi/include/kernel/process.h>
#include <firmware/include/personality.h>

/*
 * Simple module which captures BG/Q hardware specific information about 
 * the job.
 * 
 * This module does not intercept any system calls. It just pulls data
 * from the personality struct at initialization.
 */


/*
 * Global runtime struct for tracking data needed at runtime
 */
struct bgq_runtime
{
    struct darshan_bgq_record record;
42
43
44

    /* TODO: we don't need the mmap and regular buffer, both */
    struct darshan_bgq_record *mmap_buf;
Kevin Harms's avatar
Kevin Harms committed
45
46
47
48
49
50
51
52
53
54
};

static struct bgq_runtime *bgq_runtime = NULL;
static pthread_mutex_t bgq_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;

/* the instrumentation_disabled flag is used to toggle functions on/off */
static int instrumentation_disabled = 0;

/* my_rank indicates the MPI rank of this process */
static int my_rank = -1;
55
static int darshan_mem_alignment = 1;
Kevin Harms's avatar
Kevin Harms committed
56
57

/* internal helper functions for the "NULL" module */
Kevin Harms's avatar
Kevin Harms committed
58
void bgq_runtime_initialize(void);
Kevin Harms's avatar
Kevin Harms committed
59
60
61

/* forward declaration for module functions needed to interface with darshan-core */
static void bgq_begin_shutdown(void);
62
static void bgq_get_output_data(MPI_Comm mod_comm, darshan_record_id *shared_recs, int shared_rec_count, void **buffer, int *size);
Kevin Harms's avatar
Kevin Harms committed
63
64
65
66
67
68
69
70
71
72
73
74
static void bgq_shutdown(void);

/* macros for obtaining/releasing the "NULL" module lock */
#define BGQ_LOCK() pthread_mutex_lock(&bgq_runtime_mutex)
#define BGQ_UNLOCK() pthread_mutex_unlock(&bgq_runtime_mutex)

/*
 * Function which updates all the counter data
 */
static void capture(struct darshan_bgq_record *rec)
{
    Personality_t person;
Kevin Harms's avatar
Kevin Harms committed
75
    int r;
Kevin Harms's avatar
Kevin Harms committed
76
77
78

    rec->counters[BGQ_CSJOBID] = Kernel_GetJobID();
    rec->counters[BGQ_RANKSPERNODE] = Kernel_ProcessCount();
Kevin Harms's avatar
Kevin Harms committed
79
    rec->counters[BGQ_INODES] = MPIX_IO_node_id();
Kevin Harms's avatar
Kevin Harms committed
80

Kevin Harms's avatar
Kevin Harms committed
81
82
83
84
85
86
87
88
89
90
    r = Kernel_GetPersonality(&person, sizeof(person));
    if (r == 0)
    {
        rec->counters[BGQ_NNODES] = ND_TORUS_SIZE(person.Network_Config);
        rec->counters[BGQ_ANODES] = person.Network_Config.Anodes;
        rec->counters[BGQ_BNODES] = person.Network_Config.Bnodes;
        rec->counters[BGQ_CNODES] = person.Network_Config.Cnodes;
        rec->counters[BGQ_DNODES] = person.Network_Config.Dnodes;
        rec->counters[BGQ_ENODES] = person.Network_Config.Enodes;
        rec->counters[BGQ_TORUSENABLED] =
91
92
93
94
95
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_A) == ND_ENABLE_TORUS_DIM_A) << 0) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_B) == ND_ENABLE_TORUS_DIM_B) << 1) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_C) == ND_ENABLE_TORUS_DIM_C) << 2) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_D) == ND_ENABLE_TORUS_DIM_D) << 3) |
            (((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_E) == ND_ENABLE_TORUS_DIM_E) << 4);
Kevin Harms's avatar
Kevin Harms committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109

        rec->counters[BGQ_DDRPERNODE] = person.DDR_Config.DDRSizeMB;
    }

    rec->rank = my_rank;
    rec->fcounters[BGQ_F_TIMESTAMP] = darshan_core_wtime();

    return;
}

/**********************************************************
 * Internal functions for manipulating BGQ module state *
 **********************************************************/

Kevin Harms's avatar
Kevin Harms committed
110
void bgq_runtime_initialize()
Kevin Harms's avatar
Kevin Harms committed
111
112
113
114
115
116
117
118
119
{
    /* struct of function pointers for interfacing with darshan-core */
    struct darshan_module_funcs bgq_mod_fns =
    {
        .begin_shutdown = bgq_begin_shutdown,
        .get_output_data = bgq_get_output_data,
        .shutdown = bgq_shutdown
    };
    int mem_limit;
120
121
    void *mmap_buf;
    int mmap_buf_size;
Kevin Harms's avatar
Kevin Harms committed
122
123
124
125
126
127
128
129
    char *recname = "darshan-internal-bgq";

    BGQ_LOCK();

    /* don't do anything if already initialized or instrumenation is disabled */
    if(bgq_runtime || instrumentation_disabled)
        return;

130
    /* register the BG/Q module with the darshan-core component */
Kevin Harms's avatar
Kevin Harms committed
131
132
133
    darshan_core_register_module(
        DARSHAN_BGQ_MOD,
        &bgq_mod_fns,
134
        &my_rank,
Kevin Harms's avatar
Kevin Harms committed
135
        &mem_limit,
136
137
        &mmap_buf,
        &mmap_buf_size,
138
        &darshan_mem_alignment);
Kevin Harms's avatar
Kevin Harms committed
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166

    /* return if no memory assigned by darshan-core */
    if(mem_limit == 0)
    {
        instrumentation_disabled = 1;
        return;
    }

    /* no enough memory to fit bgq module */
    if (mem_limit < sizeof(*bgq_runtime))
    {
        instrumentation_disabled = 1;
        return;
    }

    /* initialize module's global state */
    bgq_runtime = malloc(sizeof(*bgq_runtime));
    if(!bgq_runtime)
    {
        instrumentation_disabled = 1;
        return;
    }
    memset(bgq_runtime, 0, sizeof(*bgq_runtime));

    darshan_core_register_record(
        recname,
        strlen(recname),
        1,
167
        DARSHAN_BGQ_MOD,
Kevin Harms's avatar
Kevin Harms committed
168
169
170
        &bgq_runtime->record.f_id,
        &bgq_runtime->record.alignment);

171
172
173
174
175
176
177
178
179
180
181
182
    /* if record is set to 0, darshan-core is out of space and will not
     * track this record, so we should avoid tracking it, too
     */
    if(bgq_runtime->record.f_id == 0)
    {
        instrumentation_disabled = 1;
        free(bgq_runtime);
        bgq_runtime = NULL;
        BGQ_UNLOCK();
        return;
    }

Kevin Harms's avatar
Kevin Harms committed
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
    capture(&bgq_runtime->record);

    BGQ_UNLOCK();

    return;
}

/* Perform any necessary steps prior to shutting down for the "NULL" module. */
static void bgq_begin_shutdown()
{
    BGQ_LOCK();

    /* In general, we want to disable all wrappers while Darshan shuts down. 
     * This is to avoid race conditions and ensure data consistency, as
     * executing wrappers could potentially modify module state while Darshan
     * is in the process of shutting down. 
     */
    instrumentation_disabled = 1;

    BGQ_UNLOCK();

    return;
}

207
208
static int cmpr(const void *p1, const void *p2)
{
Kevin Harms's avatar
Kevin Harms committed
209
210
    const uint64_t *a = (uint64_t*) p1;
    const uint64_t *b = (uint64_t*) p2;
211
212
213
    return ((*a == *b) ?  0 : ((*a < *b) ? -1 : 1));
}

Kevin Harms's avatar
Kevin Harms committed
214
215
/* Pass output data for the "BGQ" module back to darshan-core to log to file. */
static void bgq_get_output_data(
216
217
218
    MPI_Comm mod_comm,
    darshan_record_id *shared_recs,
    int shared_rec_count,
Kevin Harms's avatar
Kevin Harms committed
219
220
221
222
223
224
225
226
    void **buffer,
    int *size)
{

    /* Just set the output buffer to point at the array of the "BGQ" module's
     * I/O records, and set the output size according to the number of records
     * currently being tracked.
     */
227
228
229
230
231
232
    int nprocs;
    int result;
    uint64_t *ion_ids;

    if (my_rank == 0)
    {
Shane Snyder's avatar
Shane Snyder committed
233
        DARSHAN_MPI_CALL(PMPI_Comm_size)(mod_comm, &nprocs);
234
235
236
        ion_ids = malloc(sizeof(*ion_ids)*nprocs);
        result = (ion_ids != NULL); 
    }
Shane Snyder's avatar
Shane Snyder committed
237
    DARSHAN_MPI_CALL(PMPI_Bcast)(&result, 1, MPI_INT, 0, mod_comm);
238
239
240
241
242
243

    if (bgq_runtime && result)
    {
        int i, found;
        uint64_t val;

Shane Snyder's avatar
Shane Snyder committed
244
245
246
247
248
249
250
251
        DARSHAN_MPI_CALL(PMPI_Gather)(&bgq_runtime->record.counters[BGQ_INODES],
                                      1,
                                      MPI_LONG_LONG_INT,
                                      ion_ids,
                                      1,
                                      MPI_LONG_LONG_INT,
                                      0,
                                      mod_comm);
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
        if (my_rank == 0)
        {
            qsort(ion_ids, nprocs, sizeof(*ion_ids), cmpr);
            for (i = 1, val = ion_ids[0], found = 1; i < nprocs; i++)
            {
                if (val != ion_ids[i])
                {
                    val = ion_ids[i];
                    found += 1;
                }
            }
            bgq_runtime->record.counters[BGQ_INODES] = found;
        }
    }

Kevin Harms's avatar
Kevin Harms committed
267
    if ((bgq_runtime) && (my_rank == 0))
Kevin Harms's avatar
Kevin Harms committed
268
    {
Kevin Harms's avatar
Kevin Harms committed
269
        *buffer = &bgq_runtime->record;
Kevin Harms's avatar
Kevin Harms committed
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
        *size = sizeof(struct darshan_bgq_record);
    }
    else
    {
        *buffer = NULL;
        *size   = 0;
    }

    return;
}

/* Shutdown the "BGQ" module by freeing up all data structures. */
static void bgq_shutdown()
{
    if (bgq_runtime)
    {
        free(bgq_runtime);
        bgq_runtime = NULL;
    }

    return;
}

Shane Snyder's avatar
Shane Snyder committed
293
#if 0
Kevin Harms's avatar
Kevin Harms committed
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
static void bgq_record_reduction_op(
    void* infile_v,
    void* inoutfile_v,
    int* len,
    MPI_Datatype *datatype)
{
    int i;
    int j;
    struct darshan_bgq_record *infile = infile_v;
    struct darshan_bgq_record *inoutfile = inoutfile_v;

    for (i = 0; i<*len; i++)
    {
        for (j = 0; j < BGQ_NUM_INDICES; j++)
        {
            if (infile->counters[j] != inoutfile->counters[j])
            {
                // unexpected
                fprintf(stderr,
                        "%lu counter mismatch: %d [%lu] [%lu]\n",
                        infile->f_id,
                        j,
                        infile->counters[j],
                        inoutfile->counters[j]);
            }
        }
        infile++;
        inoutfile++;
    }

    return;
}
Shane Snyder's avatar
Shane Snyder committed
326
#endif
Kevin Harms's avatar
Kevin Harms committed
327

Kevin Harms's avatar
Kevin Harms committed
328
329
330
331
332
333
334
335
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */