darshan-bgq-logutils.c 9.92 KB
Newer Older
Kevin Harms's avatar
Kevin Harms committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
/*
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#define _GNU_SOURCE
#include "darshan-util-config.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

20
#include "darshan-logutils.h"
Kevin Harms's avatar
Kevin Harms committed
21

22
/* counter name strings for the BGQ module */
23
#define X(a) #a,
24
25
26
27
28
29
30
31
32
char *bgq_counter_names[] = {
    BGQ_COUNTERS
};

char *bgq_f_counter_names[] = {
    BGQ_F_COUNTERS
};
#undef X

Shane Snyder's avatar
Shane Snyder committed
33
34
35
/* NOTE:
 */
#define DARSHAN_BGQ_FILE_SIZE_1 (112 + 8)
36

37
static int darshan_log_get_bgq_rec(darshan_fd fd, void** bgq_buf_p);
38
static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf, int ver);
39
static void darshan_log_print_bgq_rec(void *file_rec,
40
    char *file_name, char *mnt_pt, char *fs_type, int ver);
41
static void darshan_log_print_bgq_description(void);
42
43
static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1,
    void *file_rec2, char *file_name2);
Shane Snyder's avatar
Shane Snyder committed
44
static void darshan_log_agg_bgq_recs(void *rec, void *agg_rec, int init_flag);
45
46
47

struct darshan_mod_logutil_funcs bgq_logutils =
{
48
49
50
    .log_get_record = &darshan_log_get_bgq_rec,
    .log_put_record = &darshan_log_put_bgq_rec,
    .log_print_record = &darshan_log_print_bgq_rec,
51
    .log_print_description = &darshan_log_print_bgq_description,
52
    .log_print_diff = &darshan_log_print_bgq_rec_diff,
Shane Snyder's avatar
Shane Snyder committed
53
    .log_agg_records = &darshan_log_agg_bgq_recs
54
55
};

56
static int darshan_log_get_bgq_rec(darshan_fd fd, void** bgq_buf_p)
Kevin Harms's avatar
Kevin Harms committed
57
{
58
    struct darshan_bgq_record *rec = *((struct darshan_bgq_record **)bgq_buf_p);
Shane Snyder's avatar
Shane Snyder committed
59
60
    int rec_len;
    char *buffer, *p;
Kevin Harms's avatar
Kevin Harms committed
61
    int i;
62
63
    int ret = -1;

64
65
66
67
68
69
70
71
72
73
    if(fd->mod_map[DARSHAN_BGQ_MOD].len == 0)
        return(0);

    if(*bgq_buf_p == NULL)
    {
        rec = malloc(sizeof(*rec));
        if(!rec)
            return(-1);
    }

74
75
76
77
78
    /* read the BGQ record from file, checking the version first so we
     * can read it correctly
     */
    if(fd->mod_ver[DARSHAN_BGQ_MOD] == 1)
    {
Shane Snyder's avatar
Shane Snyder committed
79
80
        buffer = malloc(DARSHAN_BGQ_FILE_SIZE_1);
        if(!buffer)
81
82
83
        {
            if(*bgq_buf_p == NULL)
                free(rec);
Shane Snyder's avatar
Shane Snyder committed
84
            return(-1);
85
        }
86

Shane Snyder's avatar
Shane Snyder committed
87
88
        rec_len = DARSHAN_BGQ_FILE_SIZE_1;
        ret = darshan_log_get_mod(fd, DARSHAN_BGQ_MOD, buffer, rec_len);
Shane Snyder's avatar
Shane Snyder committed
89
        if(ret > 0)
90
91
        {
            /* up-convert old BGQ format to new format */
Shane Snyder's avatar
Shane Snyder committed
92
93
            p = buffer;
            memcpy(&(rec->base_rec), p, sizeof(struct darshan_base_record));
Shane Snyder's avatar
Shane Snyder committed
94
95
96
            /* skip however long int+padding is */
            p += (rec_len - (BGQ_NUM_INDICES * sizeof(int64_t)) -
                (BGQ_F_NUM_INDICES * sizeof(double)));
Shane Snyder's avatar
Shane Snyder committed
97
98
99
            memcpy(&(rec->counters[0]), p, BGQ_NUM_INDICES * sizeof(int64_t));
            p += (BGQ_NUM_INDICES * sizeof(int64_t));
            memcpy(&(rec->fcounters[0]), p, BGQ_F_NUM_INDICES * sizeof(double));
Shane Snyder's avatar
Shane Snyder committed
100
            ret = rec_len;
101
        }
Shane Snyder's avatar
Shane Snyder committed
102
        free(buffer);
103
104
105
    }
    else if(fd->mod_ver[DARSHAN_BGQ_MOD] == 2)
    {
Shane Snyder's avatar
Shane Snyder committed
106
107
        rec_len = sizeof(struct darshan_bgq_record);
        ret = darshan_log_get_mod(fd, DARSHAN_BGQ_MOD, rec, rec_len);
108
    }
109

110
111
112
113
114
115
116
117
    if(*bgq_buf_p == NULL)
    {
        if(ret == rec_len)
            *bgq_buf_p = rec;
        else
            free(rec);
    }

118
    if(ret < 0)
119
        return(-1);
Shane Snyder's avatar
Shane Snyder committed
120
    else if(ret < rec_len)
121
122
        return(0);
    else
123
    {
124
125
126
        if(fd->swap_flag)
        {
            /* swap bytes if necessary */
127
128
            DARSHAN_BSWAP64(&(rec->base_rec.id));
            DARSHAN_BSWAP64(&(rec->base_rec.rank));
129
130
131
132
133
134
135
            for(i=0; i<BGQ_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&rec->counters[i]);
            for(i=0; i<BGQ_F_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&rec->fcounters[i]);
        }

        return(1);
136
    }
137
}
Kevin Harms's avatar
Kevin Harms committed
138

139
static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf, int ver)
140
141
142
143
{
    struct darshan_bgq_record *rec = (struct darshan_bgq_record *)bgq_buf;
    int ret;

144
    ret = darshan_log_put_mod(fd, DARSHAN_BGQ_MOD, rec,
145
        sizeof(struct darshan_bgq_record), DARSHAN_BGQ_VER);
146
147
    if(ret < 0)
        return(-1);
148
149
150
151

    return(0);
}

152
static void darshan_log_print_bgq_rec(void *file_rec, char *file_name,
153
    char *mnt_pt, char *fs_type, int ver)
154
155
156
157
158
{
    int i;
    struct darshan_bgq_record *bgq_file_rec =
        (struct darshan_bgq_record *)file_rec;

Kevin Harms's avatar
Kevin Harms committed
159
    for(i=0; i<BGQ_NUM_INDICES; i++)
160
    {
Kevin Harms's avatar
Kevin Harms committed
161
        DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
162
163
164
            bgq_file_rec->base_rec.rank, bgq_file_rec->base_rec.id,
            bgq_counter_names[i], bgq_file_rec->counters[i],
            file_name, mnt_pt, fs_type);
165
    }
Kevin Harms's avatar
Kevin Harms committed
166

Kevin Harms's avatar
Kevin Harms committed
167
    for(i=0; i<BGQ_F_NUM_INDICES; i++)
Kevin Harms's avatar
Kevin Harms committed
168
    {
Kevin Harms's avatar
Kevin Harms committed
169
        DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
170
171
172
            bgq_file_rec->base_rec.rank, bgq_file_rec->base_rec.id,
            bgq_f_counter_names[i], bgq_file_rec->fcounters[i],
            file_name, mnt_pt, fs_type);
Kevin Harms's avatar
Kevin Harms committed
173
174
    }

175
    return;
Kevin Harms's avatar
Kevin Harms committed
176
177
}

178
179
static void darshan_log_print_bgq_description()
{
180
    printf("\n# description of BGQ counters:\n");
181
182
183
184
185
186
187
188
189
190
191
192
    printf("#   BGQ_CSJOBID: BGQ control system job ID.\n");
    printf("#   BGQ_NNODES: number of BGQ compute nodes for this job.\n");
    printf("#   BGQ_RANKSPERNODE: number of MPI ranks per compute node.\n");
    printf("#   BGQ_DDRPERNODE: size in MB of DDR3 per compute node.\n");
    printf("#   BGQ_INODES: number of BGQ I/O nodes for this job.\n");
    printf("#   BGQ_*NODES: dimension of A, B, C, D, & E dimensions of torus.\n");
    printf("#   BGQ_TORUSENABLED: which dimensions of the torus are enabled.\n");
    printf("#   BGQ_F_TIMESTAMP: timestamp when the BGQ data was collected.\n");

    return;
}

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1,
    void *file_rec2, char *file_name2)
{
    struct darshan_bgq_record *file1 = (struct darshan_bgq_record *)file_rec1;
    struct darshan_bgq_record *file2 = (struct darshan_bgq_record *)file_rec2;
    int i;

    /* NOTE: we assume that both input records are the same module format version */

    for(i=0; i<BGQ_NUM_INDICES; i++)
    {
        if(!file2)
        {
            printf("- ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
208
                file1->base_rec.rank, file1->base_rec.id, bgq_counter_names[i],
209
210
211
212
213
214
215
                file1->counters[i], file_name1, "", "");

        }
        else if(!file1)
        {
            printf("+ ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
216
                file2->base_rec.rank, file2->base_rec.id, bgq_counter_names[i],
217
218
219
220
221
222
                file2->counters[i], file_name2, "", "");
        }
        else if(file1->counters[i] != file2->counters[i])
        {
            printf("- ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
223
                file1->base_rec.rank, file1->base_rec.id, bgq_counter_names[i],
224
225
226
                file1->counters[i], file_name1, "", "");
            printf("+ ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
227
                file2->base_rec.rank, file2->base_rec.id, bgq_counter_names[i],
228
229
230
231
232
233
234
235
236
237
                file2->counters[i], file_name2, "", "");
        }
    }

    for(i=0; i<BGQ_F_NUM_INDICES; i++)
    {
        if(!file2)
        {
            printf("- ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
238
                file1->base_rec.rank, file1->base_rec.id, bgq_f_counter_names[i],
239
240
241
242
243
244
245
                file1->fcounters[i], file_name1, "", "");

        }
        else if(!file1)
        {
            printf("+ ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
246
                file2->base_rec.rank, file2->base_rec.id, bgq_f_counter_names[i],
247
248
249
250
251
252
                file2->fcounters[i], file_name2, "", "");
        }
        else if(file1->fcounters[i] != file2->fcounters[i])
        {
            printf("- ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
253
                file1->base_rec.rank, file1->base_rec.id, bgq_f_counter_names[i],
254
255
256
                file1->fcounters[i], file_name1, "", "");
            printf("+ ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
Shane Snyder's avatar
Shane Snyder committed
257
                file2->base_rec.rank, file2->base_rec.id, bgq_f_counter_names[i],
258
259
260
261
262
263
264
265
                file2->fcounters[i], file_name2, "", "");
        }
    }

    return;
}


Shane Snyder's avatar
Shane Snyder committed
266
267
static void darshan_log_agg_bgq_recs(void *rec, void *agg_rec, int init_flag)
{
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
    struct darshan_bgq_record *bgq_rec = (struct darshan_bgq_record *)rec;
    struct darshan_bgq_record *agg_bgq_rec = (struct darshan_bgq_record *)agg_rec;
    int i;

    if(init_flag)
    {
        /* when initializing, just copy over the first record */
        memcpy(agg_bgq_rec, bgq_rec, sizeof(struct darshan_bgq_record));

        /* TODO: each record stores the ID of the corresponding rank's BG/Q
         * inode. Currently, this log aggregation interface assumes we can
         * aggregate logs one at a time, without having to know the value of
         * a counter on all processes. What we need here is a way to determine
         * the inode IDs used for every process, filter out duplicates, then
         * count the total number to set this counter. Will have to think
         * more about how we can calculate this value using this interface
         */
        agg_bgq_rec->counters[BGQ_INODES] = -1;
    }
    else
    {
        /* for remaining records, just sanity check the records are identical */
        for(i = 0; i < BGQ_NUM_INDICES; i++)
        {
            /* TODO: ignore BGQ_INODES counter since it might be different in
             * each record (more details in note above)
             */
            if(i == BGQ_INODES)
                continue;
            assert(bgq_rec->counters[i] == agg_bgq_rec->counters[i]);
        }

        /* NOTE: ignore BGQ_F_TIMESTAMP counter -- just use the value from the
         * record that we initialized with
         */
    }

Shane Snyder's avatar
Shane Snyder committed
305
306
307
    return;
}

Kevin Harms's avatar
Kevin Harms committed
308
309
310
311
312
313
314
315
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */