codes-workload.c 11.4 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

7
#include <assert.h>
8 9 10 11 12

#include "ross.h"
#include "codes/codes-workload.h"
#include "codes-workload-method.h"

13
/* list of available methods.  These are statically compiled for now, but we
14 15
 * could make generators optional via autoconf tests etc. if needed
 */
16
extern struct codes_workload_method test_workload_method;
17
extern struct codes_workload_method iolang_workload_method;
18 19 20
#ifdef USE_DUMPI
extern struct codes_workload_method dumpi_trace_workload_method;
#endif
21
#ifdef USE_DARSHAN
22
extern struct codes_workload_method darshan_io_workload_method;
23
#endif
24 25 26
#ifdef USE_RECORDER
extern struct codes_workload_method recorder_io_workload_method;
#endif
27
extern struct codes_workload_method checkpoint_workload_method;
28

29
static struct codes_workload_method *method_array[] =
30 31
{
    &test_workload_method,
32
    &iolang_workload_method,
33 34 35
#ifdef USE_DUMPI
    &dumpi_trace_workload_method,
#endif
36 37
#ifdef USE_DARSHAN
    &darshan_io_workload_method,
38 39 40
#endif
#ifdef USE_RECORDER
    &recorder_io_workload_method,
41
#endif
42
    &checkpoint_workload_method,
43
    NULL};
44

Philip Carns's avatar
Philip Carns committed
45 46 47 48 49
/* This shim layer is responsible for queueing up reversed operations and
 * re-issuing them so that the underlying workload generator method doesn't
 * have to worry about reverse events.
 *
 * NOTE: we could make this faster with a smarter data structure.  For now
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
 * we just have a linked list of rank_queue structs, one per rank that has
 * opened the workload.  We then have a linked list off of each of those
 * to hold a lifo queue of operations that have been reversed for that rank.
 */

/* holds an operation that has been reversed */
struct rc_op
{
    struct codes_workload_op op;
    struct rc_op* next;
};

/* tracks lifo queue of reversed operations for a given rank */
struct rank_queue
{
65
    int app;
66 67 68 69 70 71 72
    int rank;
    struct rc_op *lifo;
    struct rank_queue *next;
};

static struct rank_queue *ranks = NULL;

73 74
codes_workload_config_return codes_workload_read_config(
        ConfigHandle * handle,
75 76 77
        char const * section_name,
        char const * annotation,
        int num_ranks)
78 79 80 81 82 83 84
{
    char type[MAX_NAME_LENGTH_WKLD];
    codes_workload_config_return r;
    r.type = NULL;
    r.params = NULL;

    int rc = configuration_get_value(handle, section_name, "type",
85
            annotation, type, MAX_NAME_LENGTH_WKLD);
86 87 88 89 90 91 92 93 94 95
    if (rc <= 0)
        return r;

    for (int i = 0; method_array[i] != NULL; i++){
        struct codes_workload_method const * m = method_array[i];
        if (strcmp(m->method_name, type) == 0) {
            r.type = m->method_name;
            if (m->codes_workload_read_config == NULL)
                r.params = NULL;
            else
96 97
                r.params = m->codes_workload_read_config(handle, section_name,
                        annotation, num_ranks);
98 99 100 101 102 103 104 105 106 107 108 109 110
        }
    }

    return r;
}

void codes_workload_free_config_return(codes_workload_config_return *c)
{
    free(c->params);
    c->type = NULL;
    c->params = NULL;
}

111 112 113 114 115
int codes_workload_load(
        const char* type,
        const char* params,
        int app_id,
        int rank)
116
{
117 118
    int i;
    int ret;
119
    struct rank_queue *tmp;
120 121 122 123 124

    for(i=0; method_array[i] != NULL; i++)
    {
        if(strcmp(method_array[i]->method_name, type) == 0)
        {
125
            /* load appropriate workload generator */
126
            ret = method_array[i]->codes_workload_load(params, app_id, rank);
127 128 129 130
            if(ret < 0)
            {
                return(-1);
            }
131 132 133 134 135

            /* are we tracking information for this rank yet? */
            tmp = ranks;
            while(tmp)
            {
136
                if(tmp->rank == rank && tmp->app == app_id)
137 138 139 140 141
                    break;
                tmp = tmp->next;
            }
            if(tmp == NULL)
            {
142
                tmp = (struct rank_queue*)malloc(sizeof(*tmp));
143
                assert(tmp);
144
                tmp->app  = app_id;
145 146 147 148 149 150
                tmp->rank = rank;
                tmp->lifo = NULL;
                tmp->next = ranks;
                ranks = tmp;
            }

151 152 153 154 155
            return(i);
        }
    }

    fprintf(stderr, "Error: failed to find workload generator %s\n", type);
156 157 158
    return(-1);
}

159 160 161 162 163
void codes_workload_get_next(
        int wkld_id,
        int app_id,
        int rank,
        struct codes_workload_op *op)
164
{
165 166 167 168 169 170 171 172 173
    struct rank_queue *tmp;
    struct rc_op *tmp_op;

    /* first look to see if we have a reversed operation that we can
     * re-issue
     */
    tmp = ranks;
    while(tmp)
    {
174
        if(tmp->rank == rank && tmp->app == app_id)
175 176 177
            break;
        tmp = tmp->next;
    }
yangxuserene's avatar
yangxuserene committed
178 179
    if(tmp==NULL)
        printf("tmp is NULL, rank=%d, app_id = %d", rank, app_id);
180 181 182 183
    assert(tmp);
    if(tmp->lifo)
    {
        tmp_op = tmp->lifo;
184
        tmp->lifo = tmp_op->next;
185 186 187 188 189 190 191

        *op = tmp_op->op;
        free(tmp_op);
        return;
    }

    /* ask generator for the next operation */
192
    method_array[wkld_id]->codes_workload_get_next(app_id, rank, op);
193

194 195 196
    return;
}

197 198 199 200 201
void codes_workload_get_next_rc(
        int wkld_id,
        int app_id,
        int rank,
        const struct codes_workload_op *op)
202
{
203 204 205 206 207 208
    struct rank_queue *tmp;
    struct rc_op *tmp_op;

    tmp = ranks;
    while(tmp)
    {
209
        if(tmp->rank == rank && tmp->app == app_id)
210 211 212 213 214
            break;
        tmp = tmp->next;
    }
    assert(tmp);

Elsa Gonsiorowski (Uranus)'s avatar
Elsa Gonsiorowski (Uranus) committed
215
    tmp_op = (struct rc_op*)malloc(sizeof(*tmp_op));
216 217 218 219
    assert(tmp_op);
    tmp_op->op = *op;
    tmp_op->next = tmp->lifo;
    tmp->lifo = tmp_op;
220

221 222 223
    return;
}

224 225 226 227
int codes_workload_get_rank_cnt(
        const char* type,
        const char* params,
        int app_id)
228
{
229 230 231 232 233 234 235
    int i;
    int rank_cnt;

    for(i=0; method_array[i] != NULL; i++)
    {
        if(strcmp(method_array[i]->method_name, type) == 0)
        {
236 237
            rank_cnt =
                method_array[i]->codes_workload_get_rank_cnt(params, app_id);
238 239 240 241 242 243 244
            assert(rank_cnt > 0);
            return(rank_cnt);
        }
    }

    fprintf(stderr, "Error: failed to find workload generator %s\n", type);
    return(-1);
245 246
}

247 248 249 250 251 252
void codes_workload_print_op(
        FILE *f,
        struct codes_workload_op *op,
        int app_id,
        int rank)
{
253 254
    switch(op->op_type){
        case CODES_WK_END:
255
            fprintf(f, "op: app:%d rank:%d type:end\n", app_id, rank);
256 257
            break;
        case CODES_WK_DELAY:
258
            fprintf(f, "op: app:%d rank:%d type:delay seconds:%lf\n",
259
                    app_id, rank, op->u.delay.seconds);
260 261
            break;
        case CODES_WK_BARRIER:
262
            fprintf(f, "op: app:%d rank:%d type:barrier count:%d root:%d\n",
263
                    app_id, rank, op->u.barrier.count, op->u.barrier.root);
264 265
            break;
        case CODES_WK_OPEN:
266
            fprintf(f, "op: app:%d rank:%d type:open file_id:%lu flag:%d\n",
267
                    app_id, rank, op->u.open.file_id, op->u.open.create_flag);
268 269
            break;
        case CODES_WK_CLOSE:
270
            fprintf(f, "op: app:%d rank:%d type:close file_id:%lu\n",
271
                    app_id, rank, op->u.close.file_id);
272 273
            break;
        case CODES_WK_WRITE:
274
            fprintf(f, "op: app:%d rank:%d type:write "
275
                       "file_id:%lu off:%lu size:%lu\n",
276
                    app_id, rank, op->u.write.file_id, op->u.write.offset,
277 278 279
                    op->u.write.size);
            break;
        case CODES_WK_READ:
280
            fprintf(f, "op: app:%d rank:%d type:read "
281
                       "file_id:%lu off:%lu size:%lu\n",
282
                    app_id, rank, op->u.read.file_id, op->u.read.offset,
283 284
                    op->u.read.size);
            break;
285
        case CODES_WK_SEND:
286
            fprintf(f, "op: app:%d rank:%d type:send "
287
                    "src:%d dst:%d bytes:%d type:%d count:%d tag:%d "
288
                    "start:%.5e end:%.5e\n",
289
                    app_id, rank,
290 291
                    op->u.send.source_rank, op->u.send.dest_rank,
                    op->u.send.num_bytes, op->u.send.data_type,
292 293
                    op->u.send.count, op->u.send.tag,
                    op->start_time, op->end_time);
294 295
            break;
        case CODES_WK_RECV:
296
            fprintf(f, "op: app:%d rank:%d type:recv "
Jonathan Jenkins's avatar
Jonathan Jenkins committed
297
                    "src:%d dst:%d bytes:%d type:%d count:%d tag:%d "
298
                    "start:%.5e end:%.5e\n",
299
                    app_id, rank,
300 301
                    op->u.recv.source_rank, op->u.recv.dest_rank,
                    op->u.recv.num_bytes, op->u.recv.data_type,
302 303
                    op->u.recv.count, op->u.recv.tag,
                    op->start_time, op->end_time);
304 305
            break;
        case CODES_WK_ISEND:
306
            fprintf(f, "op: app:%d rank:%d type:isend "
Jonathan Jenkins's avatar
Jonathan Jenkins committed
307
                    "src:%d dst:%d bytes:%d type:%d count:%d tag:%d "
308
                    "start:%.5e end:%.5e\n",
309
                    app_id, rank,
310 311
                    op->u.send.source_rank, op->u.send.dest_rank,
                    op->u.send.num_bytes, op->u.send.data_type,
312 313
                    op->u.send.count, op->u.send.tag,
                    op->start_time, op->end_time);
314 315
            break;
        case CODES_WK_IRECV:
316
            fprintf(f, "op: app:%d rank:%d type:irecv "
Jonathan Jenkins's avatar
Jonathan Jenkins committed
317
                    "src:%d dst:%d bytes:%d type:%d count:%d tag:%d "
318
                    "start:%.5e end:%.5e\n",
319
                    app_id, rank,
320 321
                    op->u.recv.source_rank, op->u.recv.dest_rank,
                    op->u.recv.num_bytes, op->u.recv.data_type,
322 323
                    op->u.recv.count, op->u.recv.tag,
                    op->start_time, op->end_time);
324
            break;
325 326
#define PRINT_COL(_type_str) \
            fprintf(f, "op: app:%d rank:%d type:%s" \
327
                    " bytes:%d, start:%.5e, end:%.5e\n", app_id, rank, \
328 329
                    _type_str, op->u.collective.num_bytes, op->start_time, \
                    op->end_time)
330
        case CODES_WK_BCAST:
331
            PRINT_COL("bcast");
332 333
            break;
        case CODES_WK_ALLGATHER:
334
            PRINT_COL("allgather");
335 336
            break;
        case CODES_WK_ALLGATHERV:
337
            PRINT_COL("allgatherv");
338 339
            break;
        case CODES_WK_ALLTOALL:
340
            PRINT_COL("alltoall");
341 342
            break;
        case CODES_WK_ALLTOALLV:
343
            PRINT_COL("alltoallv");
344 345
            break;
        case CODES_WK_REDUCE:
346
            PRINT_COL("reduce");
347 348
            break;
        case CODES_WK_ALLREDUCE:
349
            PRINT_COL("allreduce");
350 351
            break;
        case CODES_WK_COL:
352
            PRINT_COL("collective");
353
            break;
354 355 356
#undef PRINT_COL
#define PRINT_WAIT(_type_str, _ct) \
            fprintf(f, "op: app:%d rank:%d type:%s" \
357
                    "num reqs:%d, start:%.5e, end:%.5e\n", \
358
                    app_id, rank, _type_str, _ct, op->start_time, op->end_time)
359
        case CODES_WK_WAITALL:
360
            PRINT_WAIT("waitall", op->u.waits.count);
361 362
            break;
        case CODES_WK_WAIT:
363
            PRINT_WAIT("wait", 1);
364 365
            break;
        case CODES_WK_WAITSOME:
366
            PRINT_WAIT("waitsome", op->u.waits.count);
367 368
            break;
        case CODES_WK_WAITANY:
369
            PRINT_WAIT("waitany", op->u.waits.count);
370
            break;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
371 372
        case CODES_WK_IGNORE:
            break;
373
        default:
Jonathan Jenkins's avatar
Jonathan Jenkins committed
374 375 376
            fprintf(stderr,
                    "%s:%d: codes_workload_print_op: unrecognized workload type "
                    "(op code %d)\n", __FILE__, __LINE__, op->op_type);
377 378 379
    }
}

380 381 382 383
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
384
 *  indent-tabs-mode: nil
385 386 387 388
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */