darshan-bgq-logutils.c 9.89 KB
Newer Older
Kevin Harms's avatar
Kevin Harms committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (C) 2015 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#define _GNU_SOURCE
#include "darshan-util-config.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>

20
#include "darshan-logutils.h"
Kevin Harms's avatar
Kevin Harms committed
21

22
/* counter name strings for the BGQ module */
23
#define X(a) #a,
24 25 26 27 28 29 30 31 32
char *bgq_counter_names[] = {
    BGQ_COUNTERS
};

char *bgq_f_counter_names[] = {
    BGQ_F_COUNTERS
};
#undef X

33 34 35
/* NOTE:
 */
#define DARSHAN_BGQ_FILE_SIZE_1 (112 + 8)
36

37
static int darshan_log_get_bgq_rec(darshan_fd fd, void** bgq_buf_p);
38
static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf);
39
static void darshan_log_print_bgq_rec(void *file_rec,
40
    char *file_name, char *mnt_pt, char *fs_type);
41
static void darshan_log_print_bgq_description(int ver);
42 43
static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1,
    void *file_rec2, char *file_name2);
44
static void darshan_log_agg_bgq_recs(void *rec, void *agg_rec, int init_flag);
45 46 47

struct darshan_mod_logutil_funcs bgq_logutils =
{
48 49 50
    .log_get_record = &darshan_log_get_bgq_rec,
    .log_put_record = &darshan_log_put_bgq_rec,
    .log_print_record = &darshan_log_print_bgq_rec,
51
    .log_print_description = &darshan_log_print_bgq_description,
52
    .log_print_diff = &darshan_log_print_bgq_rec_diff,
53
    .log_agg_records = &darshan_log_agg_bgq_recs
54 55
};

56
static int darshan_log_get_bgq_rec(darshan_fd fd, void** bgq_buf_p)
Kevin Harms's avatar
Kevin Harms committed
57
{
58
    struct darshan_bgq_record *rec = *((struct darshan_bgq_record **)bgq_buf_p);
59 60
    int rec_len;
    char *buffer, *p;
Kevin Harms's avatar
Kevin Harms committed
61
    int i;
62 63
    int ret = -1;

64 65 66 67 68 69 70 71 72 73
    if(fd->mod_map[DARSHAN_BGQ_MOD].len == 0)
        return(0);

    if(*bgq_buf_p == NULL)
    {
        rec = malloc(sizeof(*rec));
        if(!rec)
            return(-1);
    }

74 75 76 77 78
    /* read the BGQ record from file, checking the version first so we
     * can read it correctly
     */
    if(fd->mod_ver[DARSHAN_BGQ_MOD] == 1)
    {
79 80
        buffer = malloc(DARSHAN_BGQ_FILE_SIZE_1);
        if(!buffer)
81 82 83
        {
            if(*bgq_buf_p == NULL)
                free(rec);
84
            return(-1);
85
        }
86

87 88
        rec_len = DARSHAN_BGQ_FILE_SIZE_1;
        ret = darshan_log_get_mod(fd, DARSHAN_BGQ_MOD, buffer, rec_len);
89
        if(ret > 0)
90 91
        {
            /* up-convert old BGQ format to new format */
92 93
            p = buffer;
            memcpy(&(rec->base_rec), p, sizeof(struct darshan_base_record));
94 95 96
            /* skip however long int+padding is */
            p += (rec_len - (BGQ_NUM_INDICES * sizeof(int64_t)) -
                (BGQ_F_NUM_INDICES * sizeof(double)));
97 98 99
            memcpy(&(rec->counters[0]), p, BGQ_NUM_INDICES * sizeof(int64_t));
            p += (BGQ_NUM_INDICES * sizeof(int64_t));
            memcpy(&(rec->fcounters[0]), p, BGQ_F_NUM_INDICES * sizeof(double));
100
            ret = rec_len;
101
        }
102
        free(buffer);
103 104 105
    }
    else if(fd->mod_ver[DARSHAN_BGQ_MOD] == 2)
    {
106 107
        rec_len = sizeof(struct darshan_bgq_record);
        ret = darshan_log_get_mod(fd, DARSHAN_BGQ_MOD, rec, rec_len);
108
    }
109

110 111 112 113 114 115 116 117
    if(*bgq_buf_p == NULL)
    {
        if(ret == rec_len)
            *bgq_buf_p = rec;
        else
            free(rec);
    }

118
    if(ret < 0)
119
        return(-1);
120
    else if(ret < rec_len)
121 122
        return(0);
    else
123
    {
124 125 126
        if(fd->swap_flag)
        {
            /* swap bytes if necessary */
127 128
            DARSHAN_BSWAP64(&(rec->base_rec.id));
            DARSHAN_BSWAP64(&(rec->base_rec.rank));
129 130 131 132 133 134 135
            for(i=0; i<BGQ_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&rec->counters[i]);
            for(i=0; i<BGQ_F_NUM_INDICES; i++)
                DARSHAN_BSWAP64(&rec->fcounters[i]);
        }

        return(1);
136
    }
137
}
Kevin Harms's avatar
Kevin Harms committed
138

139
static int darshan_log_put_bgq_rec(darshan_fd fd, void* bgq_buf)
140 141 142 143
{
    struct darshan_bgq_record *rec = (struct darshan_bgq_record *)bgq_buf;
    int ret;

144
    ret = darshan_log_put_mod(fd, DARSHAN_BGQ_MOD, rec,
145
        sizeof(struct darshan_bgq_record), DARSHAN_BGQ_VER);
146 147
    if(ret < 0)
        return(-1);
148 149 150 151

    return(0);
}

152
static void darshan_log_print_bgq_rec(void *file_rec, char *file_name,
153
    char *mnt_pt, char *fs_type)
154 155 156 157 158
{
    int i;
    struct darshan_bgq_record *bgq_file_rec =
        (struct darshan_bgq_record *)file_rec;

Kevin Harms's avatar
Kevin Harms committed
159
    for(i=0; i<BGQ_NUM_INDICES; i++)
160
    {
Kevin Harms's avatar
Kevin Harms committed
161
        DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
162 163 164
            bgq_file_rec->base_rec.rank, bgq_file_rec->base_rec.id,
            bgq_counter_names[i], bgq_file_rec->counters[i],
            file_name, mnt_pt, fs_type);
165
    }
Kevin Harms's avatar
Kevin Harms committed
166

Kevin Harms's avatar
Kevin Harms committed
167
    for(i=0; i<BGQ_F_NUM_INDICES; i++)
Kevin Harms's avatar
Kevin Harms committed
168
    {
Kevin Harms's avatar
Kevin Harms committed
169
        DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
170 171 172
            bgq_file_rec->base_rec.rank, bgq_file_rec->base_rec.id,
            bgq_f_counter_names[i], bgq_file_rec->fcounters[i],
            file_name, mnt_pt, fs_type);
Kevin Harms's avatar
Kevin Harms committed
173 174
    }

175
    return;
Kevin Harms's avatar
Kevin Harms committed
176 177
}

178
static void darshan_log_print_bgq_description(int ver)
179
{
180
    printf("\n# description of BGQ counters:\n");
181 182 183 184 185 186 187 188 189 190 191 192
    printf("#   BGQ_CSJOBID: BGQ control system job ID.\n");
    printf("#   BGQ_NNODES: number of BGQ compute nodes for this job.\n");
    printf("#   BGQ_RANKSPERNODE: number of MPI ranks per compute node.\n");
    printf("#   BGQ_DDRPERNODE: size in MB of DDR3 per compute node.\n");
    printf("#   BGQ_INODES: number of BGQ I/O nodes for this job.\n");
    printf("#   BGQ_*NODES: dimension of A, B, C, D, & E dimensions of torus.\n");
    printf("#   BGQ_TORUSENABLED: which dimensions of the torus are enabled.\n");
    printf("#   BGQ_F_TIMESTAMP: timestamp when the BGQ data was collected.\n");

    return;
}

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
static void darshan_log_print_bgq_rec_diff(void *file_rec1, char *file_name1,
    void *file_rec2, char *file_name2)
{
    struct darshan_bgq_record *file1 = (struct darshan_bgq_record *)file_rec1;
    struct darshan_bgq_record *file2 = (struct darshan_bgq_record *)file_rec2;
    int i;

    /* NOTE: we assume that both input records are the same module format version */

    for(i=0; i<BGQ_NUM_INDICES; i++)
    {
        if(!file2)
        {
            printf("- ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
208
                file1->base_rec.rank, file1->base_rec.id, bgq_counter_names[i],
209 210 211 212 213 214 215
                file1->counters[i], file_name1, "", "");

        }
        else if(!file1)
        {
            printf("+ ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
216
                file2->base_rec.rank, file2->base_rec.id, bgq_counter_names[i],
217 218 219 220 221 222
                file2->counters[i], file_name2, "", "");
        }
        else if(file1->counters[i] != file2->counters[i])
        {
            printf("- ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
223
                file1->base_rec.rank, file1->base_rec.id, bgq_counter_names[i],
224 225 226
                file1->counters[i], file_name1, "", "");
            printf("+ ");
            DARSHAN_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
227
                file2->base_rec.rank, file2->base_rec.id, bgq_counter_names[i],
228 229 230 231 232 233 234 235 236 237
                file2->counters[i], file_name2, "", "");
        }
    }

    for(i=0; i<BGQ_F_NUM_INDICES; i++)
    {
        if(!file2)
        {
            printf("- ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
238
                file1->base_rec.rank, file1->base_rec.id, bgq_f_counter_names[i],
239 240 241 242 243 244 245
                file1->fcounters[i], file_name1, "", "");

        }
        else if(!file1)
        {
            printf("+ ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
246
                file2->base_rec.rank, file2->base_rec.id, bgq_f_counter_names[i],
247 248 249 250 251 252
                file2->fcounters[i], file_name2, "", "");
        }
        else if(file1->fcounters[i] != file2->fcounters[i])
        {
            printf("- ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
253
                file1->base_rec.rank, file1->base_rec.id, bgq_f_counter_names[i],
254 255 256
                file1->fcounters[i], file_name1, "", "");
            printf("+ ");
            DARSHAN_F_COUNTER_PRINT(darshan_module_names[DARSHAN_BGQ_MOD],
257
                file2->base_rec.rank, file2->base_rec.id, bgq_f_counter_names[i],
258 259 260 261 262 263 264 265
                file2->fcounters[i], file_name2, "", "");
        }
    }

    return;
}


266 267
static void darshan_log_agg_bgq_recs(void *rec, void *agg_rec, int init_flag)
{
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
    struct darshan_bgq_record *bgq_rec = (struct darshan_bgq_record *)rec;
    struct darshan_bgq_record *agg_bgq_rec = (struct darshan_bgq_record *)agg_rec;
    int i;

    if(init_flag)
    {
        /* when initializing, just copy over the first record */
        memcpy(agg_bgq_rec, bgq_rec, sizeof(struct darshan_bgq_record));

        /* TODO: each record stores the ID of the corresponding rank's BG/Q
         * inode. Currently, this log aggregation interface assumes we can
         * aggregate logs one at a time, without having to know the value of
         * a counter on all processes. What we need here is a way to determine
         * the inode IDs used for every process, filter out duplicates, then
         * count the total number to set this counter. Will have to think
         * more about how we can calculate this value using this interface
         */
        agg_bgq_rec->counters[BGQ_INODES] = -1;
    }
    else
    {
        /* for remaining records, just sanity check the records are identical */
        for(i = 0; i < BGQ_NUM_INDICES; i++)
        {
            /* TODO: ignore BGQ_INODES counter since it might be different in
             * each record (more details in note above)
             */
            if(i == BGQ_INODES)
                continue;
            assert(bgq_rec->counters[i] == agg_bgq_rec->counters[i]);
        }

        /* NOTE: ignore BGQ_F_TIMESTAMP counter -- just use the value from the
         * record that we initialized with
         */
    }

305 306 307
    return;
}

Kevin Harms's avatar
Kevin Harms committed
308 309 310 311 312 313 314 315
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ts=8 sts=4 sw=4 expandtab
 */