darshan-bgq.c 7.77 KB
Newer Older
Kevin Harms's avatar
Kevin Harms committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
/*
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#define _XOPEN_SOURCE 500
#define _GNU_SOURCE

#include "darshan-runtime-config.h"
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <assert.h>

#include "uthash.h"
#include "darshan.h"
#include "darshan-bgq-log-format.h"

#ifdef __bgq__
#include <spi/include/kernel/location.h>
#include <spi/include/kernel/process.h>
#include <firmware/include/personality.h>
#endif

/*
 * Simple module which captures BG/Q hardware specific information about 
 * the job.
 * 
 * This module does not intercept any system calls. It just pulls data
 * from the personality struct at initialization.
 */


/*
 * Global runtime struct for tracking data needed at runtime
 */
struct bgq_runtime
{
    struct darshan_bgq_record record;
};

static struct bgq_runtime *bgq_runtime = NULL;
static pthread_mutex_t bgq_runtime_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;

/* the instrumentation_disabled flag is used to toggle functions on/off */
static int instrumentation_disabled = 0;

/* my_rank indicates the MPI rank of this process */
static int my_rank = -1;

/* internal helper functions for the "NULL" module */
Kevin Harms's avatar
Kevin Harms committed
54
void bgq_runtime_initialize(void);
Kevin Harms's avatar
Kevin Harms committed
55
56
57
58
59

/* forward declaration for module functions needed to interface with darshan-core */
static void bgq_begin_shutdown(void);
static void bgq_get_output_data(void **buffer, int *size);
static void bgq_shutdown(void);
Kevin Harms's avatar
Kevin Harms committed
60
61
static void bgq_setup_reduction(darshan_record_id *shared_recs,int *shared_rec_count,void **send_buf,void **recv_buf,int *rec_size);
static void bgq_record_reduction_op(void* infile_v,void* inoutfile_v,int *len,MPI_Datatype *datatype);
Kevin Harms's avatar
Kevin Harms committed
62
63
64
65
66
67
68
69
70
71
72
73

/* macros for obtaining/releasing the "NULL" module lock */
#define BGQ_LOCK() pthread_mutex_lock(&bgq_runtime_mutex)
#define BGQ_UNLOCK() pthread_mutex_unlock(&bgq_runtime_mutex)

/*
 * Function which updates all the counter data
 */
static void capture(struct darshan_bgq_record *rec)
{
#ifdef __bgq__
    Personality_t person;
Kevin Harms's avatar
Kevin Harms committed
74
    int r;
Kevin Harms's avatar
Kevin Harms committed
75
76
77
78

    rec->counters[BGQ_CSJOBID] = Kernel_GetJobID();
    rec->counters[BGQ_RANKSPERNODE] = Kernel_ProcessCount();

Kevin Harms's avatar
Kevin Harms committed
79
80
    rec->counters[BGQ_INODES] = MPIX_IO_node();

Kevin Harms's avatar
Kevin Harms committed
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    r = Kernel_GetPersonality(&person, sizeof(person));
    if (r == 0)
    {
        rec->counters[BGQ_NNODES] = ND_TORUS_SIZE(person.Network_Config);
        rec->counters[BGQ_ANODES] = person.Network_Config.Anodes;
        rec->counters[BGQ_BNODES] = person.Network_Config.Bnodes;
        rec->counters[BGQ_CNODES] = person.Network_Config.Cnodes;
        rec->counters[BGQ_DNODES] = person.Network_Config.Dnodes;
        rec->counters[BGQ_ENODES] = person.Network_Config.Enodes;
        rec->counters[BGQ_TORUSENABLED] =
            ((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_A) << 0) |
            ((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_B) << 1) |
            ((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_C) << 2) |
            ((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_D) << 3) |
            ((person.Network_Config.NetFlags & ND_ENABLE_TORUS_DIM_E) << 4);

        rec->counters[BGQ_DDRPERNODE] = person.DDR_Config.DDRSizeMB;
    }
#endif

    rec->rank = my_rank;
    rec->fcounters[BGQ_F_TIMESTAMP] = darshan_core_wtime();

    return;
}

/**********************************************************
 * Internal functions for manipulating BGQ module state *
 **********************************************************/

Kevin Harms's avatar
Kevin Harms committed
111
void bgq_runtime_initialize()
Kevin Harms's avatar
Kevin Harms committed
112
113
114
115
116
{
    /* struct of function pointers for interfacing with darshan-core */
    struct darshan_module_funcs bgq_mod_fns =
    {
        .begin_shutdown = bgq_begin_shutdown,
Kevin Harms's avatar
Kevin Harms committed
117
118
        .setup_reduction = bgq_setup_reduction, 
        .record_reduction_op = bgq_record_reduction_op, 
Kevin Harms's avatar
Kevin Harms committed
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
        .get_output_data = bgq_get_output_data,
        .shutdown = bgq_shutdown
    };
    int mem_limit;
    char *recname = "darshan-internal-bgq";

    BGQ_LOCK();

    /* don't do anything if already initialized or instrumenation is disabled */
    if(bgq_runtime || instrumentation_disabled)
        return;

    /* register the "NULL" module with the darshan-core component */
    darshan_core_register_module(
        DARSHAN_BGQ_MOD,
        &bgq_mod_fns,
        &mem_limit,
        NULL);

    /* return if no memory assigned by darshan-core */
    if(mem_limit == 0)
    {
        instrumentation_disabled = 1;
        return;
    }

    /* no enough memory to fit bgq module */
    if (mem_limit < sizeof(*bgq_runtime))
    {
        instrumentation_disabled = 1;
        return;
    }

    /* initialize module's global state */
    bgq_runtime = malloc(sizeof(*bgq_runtime));
    if(!bgq_runtime)
    {
        instrumentation_disabled = 1;
        return;
    }
    memset(bgq_runtime, 0, sizeof(*bgq_runtime));

    darshan_core_register_record(
        recname,
        strlen(recname),
        1,
        DARSHAN_POSIX_MOD,
        &bgq_runtime->record.f_id,
        &bgq_runtime->record.alignment);

    DARSHAN_MPI_CALL(PMPI_Comm_rank)(MPI_COMM_WORLD, &my_rank);

    capture(&bgq_runtime->record);

    BGQ_UNLOCK();

    return;
}

/* Perform any necessary steps prior to shutting down for the "NULL" module. */
static void bgq_begin_shutdown()
{
    BGQ_LOCK();

    /* In general, we want to disable all wrappers while Darshan shuts down. 
     * This is to avoid race conditions and ensure data consistency, as
     * executing wrappers could potentially modify module state while Darshan
     * is in the process of shutting down. 
     */
    instrumentation_disabled = 1;

    BGQ_UNLOCK();

    return;
}

/* Pass output data for the "BGQ" module back to darshan-core to log to file. */
static void bgq_get_output_data(
    void **buffer,
    int *size)
{

    /* Just set the output buffer to point at the array of the "BGQ" module's
     * I/O records, and set the output size according to the number of records
     * currently being tracked.
     */
Kevin Harms's avatar
Kevin Harms committed
205
    if ((bgq_runtime) && (my_rank == 0))
Kevin Harms's avatar
Kevin Harms committed
206
    {
Kevin Harms's avatar
Kevin Harms committed
207
        *buffer = &bgq_runtime->record;
Kevin Harms's avatar
Kevin Harms committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
        *size = sizeof(struct darshan_bgq_record);
    }
    else
    {
        *buffer = NULL;
        *size   = 0;
    }

    return;
}

/* Shutdown the "BGQ" module by freeing up all data structures. */
static void bgq_shutdown()
{
    if (bgq_runtime)
    {
        free(bgq_runtime);
        bgq_runtime = NULL;
    }

    return;
}

Kevin Harms's avatar
Kevin Harms committed
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
static void bgq_setup_reduction(
    darshan_record_id *shared_recs,
    int *shared_rec_count,
    void **send_buf,
    void **recv_buf,
    int *rec_size)
{
    int i;
    int found;

    for (i = 0; i < *shared_rec_count; i++)
    {
        if (shared_recs[i] == bgq_runtime->record.f_id)
        {
            found = 1;
            break;
        }
    }

    if (found)
    {
        printf("found bgq shared record\n");
        *rec_size = sizeof(struct darshan_bgq_record);
        *shared_rec_count = 1;
        *send_buf = &bgq_runtime->record;
        *recv_buf = &bgq_runtime->record;
    }

    return;
}

static void bgq_record_reduction_op(
    void* infile_v,
    void* inoutfile_v,
    int* len,
    MPI_Datatype *datatype)
{
    int i;
    int j;
    struct darshan_bgq_record *infile = infile_v;
    struct darshan_bgq_record *inoutfile = inoutfile_v;

    for (i = 0; i<*len; i++)
    {
        for (j = 0; j < BGQ_NUM_INDICES; j++)
        {
            if (infile->counters[j] != inoutfile->counters[j])
            {
                // unexpected
                fprintf(stderr,
                        "%lu counter mismatch: %d [%lu] [%lu]\n",
                        infile->f_id,
                        j,
                        infile->counters[j],
                        inoutfile->counters[j]);
            }
        }
        infile++;
        inoutfile++;
    }

    return;
}

Kevin Harms's avatar
Kevin Harms committed
295
296
297
298
299
300
301
302
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */