model-net-lp.c 18.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11
/*
 * Copyright (C) 2014 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

#include <stddef.h>
#include <assert.h>
#include "codes/model-net.h"
#include "codes/model-net-method.h"
#include "codes/model-net-lp.h"
12
#include "codes/model-net-sched.h"
13 14 15 16 17 18 19 20 21 22 23 24 25
#include "codes/codes_mapping.h"
#include "codes/jenkins-hash.h"

#define MN_NAME "model_net_base"

/**** BEGIN SIMULATION DATA STRUCTURES ****/

int model_net_base_magic;

// message-type specific offsets - don't want to get bitten later by alignment
// issues...
static int msg_offsets[MAX_NETS];

26 27
typedef struct model_net_base_params_s {
    model_net_sched_cfg_params sched_params;
28
    uint64_t packet_size;
29
    int use_recv_queue;
30 31 32 33 34 35 36 37
} model_net_base_params;

/* annotation-specific parameters (unannotated entry occurs at the 
 * last index) */
static int                       num_params = 0;
static const char              * annos[CONFIGURATION_MAX_ANNOS];
static model_net_base_params     all_params[CONFIGURATION_MAX_ANNOS];

38 39
typedef struct model_net_base_state {
    int net_id;
40
    // whether scheduler loop is running
41
    int in_sched_send_loop, in_sched_recv_loop;
42 43 44
    // unique message id counter. This doesn't get decremented on RC to prevent
    // optimistic orderings using "stale" ids
    uint64_t msg_id;
45 46
    // model-net schedulers
    model_net_sched *sched_send, *sched_recv;
47 48
    // parameters
    const model_net_base_params * params;
49 50 51 52 53 54
    // lp type and state of underlying model net method - cache here so we
    // don't have to constantly look up
    const tw_lptype *sub_type;
    void *sub_state;
} model_net_base_state;

55

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
/**** END SIMULATION DATA STRUCTURES ****/

/**** BEGIN LP, EVENT PROCESSING FUNCTION DECLS ****/

/* ROSS LP processing functions */  
static void model_net_base_lp_init(
        model_net_base_state * ns,
        tw_lp * lp);
static void model_net_base_event(
        model_net_base_state * ns,
        tw_bf * b,
        model_net_wrap_msg * m,
        tw_lp * lp);
static void model_net_base_event_rc(
        model_net_base_state * ns,
        tw_bf * b,
        model_net_wrap_msg * m,
        tw_lp * lp);
static void model_net_base_finalize(
        model_net_base_state * ns,
        tw_lp * lp);

/* event type handlers */
static void handle_new_msg(
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp);
static void handle_sched_next(
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp);
static void handle_new_msg_rc(
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp);
static void handle_sched_next_rc(
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp);

/* ROSS function pointer table for this LP */
tw_lptype model_net_base_lp = {
102 103 104 105 106 107 108
    (init_f) model_net_base_lp_init,
    (pre_run_f) NULL,
    (event_f) model_net_base_event,
    (revent_f) model_net_base_event_rc,
    (final_f)  model_net_base_finalize, 
    (map_f) codes_mapping,
    sizeof(model_net_base_state),
109 110 111 112 113 114
};

/**** END LP, EVENT PROCESSING FUNCTION DECLS ****/

/**** BEGIN IMPLEMENTATIONS ****/

115 116 117 118
void model_net_base_register(int *do_config_nets){
    // here, we initialize ALL lp types to use the base type
    for (int i = 0; i < MAX_NETS; i++){
        if (do_config_nets[i]){
119 120 121 122 123 124 125
            // some model-net lps need custom registration hooks (dragonfly).
            // Those that don't NULL out the reg. function
            if (method_array[i]->mn_register == NULL)
                lp_type_register(model_net_lp_config_names[i],
                        &model_net_base_lp);
            else
                method_array[i]->mn_register(&model_net_base_lp);
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
        }
    }
}

static void base_read_config(const char * anno, model_net_base_params *p){
    char sched[MAX_NAME_LENGTH];
    long int packet_size_l = 0;
    uint64_t packet_size;
    int ret;

    ret = configuration_get_value(&config, "PARAMS", "modelnet_scheduler",
            anno, sched, MAX_NAME_LENGTH);
    configuration_get_value_longint(&config, "PARAMS", "packet_size", anno,
            &packet_size_l);
    packet_size = packet_size_l;

    if (ret > 0){
        int i;
        for (i = 0; i < MAX_SCHEDS; i++){
            if (strcmp(sched_names[i], sched) == 0){
146
                p->sched_params.type = i;
147 148 149 150 151
                break;
            }
        }
        if (i == MAX_SCHEDS){
            tw_error(TW_LOC,"Unknown value for PARAMS:modelnet-scheduler : "
152
                    "%s", sched); 
153 154 155 156
        }
    }
    else{
        // default: FCFS
157
        p->sched_params.type = MN_SCHED_FCFS;
158 159
    }

160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
    // get scheduler-specific parameters
    if (p->sched_params.type == MN_SCHED_PRIO){
        // prio scheduler uses default parameters 
        int             * num_prios = &p->sched_params.u.prio.num_prios;
        enum sched_type * sub_stype = &p->sched_params.u.prio.sub_stype;
        // number of priorities to allocate
        ret = configuration_get_value_int(&config, "PARAMS",
                "prio-sched-num-prios", anno, num_prios);
        if (ret != 0)
            *num_prios = 10;

        ret = configuration_get_value(&config, "PARAMS",
                "prio-sched-sub-sched", anno, sched, MAX_NAME_LENGTH);
        if (ret == 0)
            *sub_stype = MN_SCHED_FCFS;
        else{
            int i;
            for (i = 0; i < MAX_SCHEDS; i++){
                if (strcmp(sched_names[i], sched) == 0){
                    *sub_stype = i;
                    break;
                }
            }
            if (i == MAX_SCHEDS){
                tw_error(TW_LOC, "Unknown value for "
                        "PARAMS:prio-sched-sub-sched %s", sched);
            }
            else if (i == MN_SCHED_PRIO){
                tw_error(TW_LOC, "priority scheduler cannot be used as a "
                        "priority scheduler's sub sched "
                        "(PARAMS:prio-sched-sub-sched)");
            }
        }
193 194
    }

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
    if (p->sched_params.type == MN_SCHED_FCFS_FULL ||
            (p->sched_params.type == MN_SCHED_PRIO &&
             p->sched_params.u.prio.sub_stype == MN_SCHED_FCFS_FULL)){
        // override packet size to something huge (leave a bit in the unlikely
        // case that an op using packet size causes overflow)
        packet_size = 1ull << 62;
    }
    else if (!packet_size &&
            (p->sched_params.type != MN_SCHED_FCFS_FULL ||
             (p->sched_params.type == MN_SCHED_PRIO &&
              p->sched_params.u.prio.sub_stype != MN_SCHED_FCFS_FULL))){
        packet_size = 512;
        fprintf(stderr, "WARNING, no packet size specified, setting packet "
                "size to %llu\n", packet_size);
    }


212 213 214 215
    p->packet_size = packet_size;
}

void model_net_base_configure(){
216 217 218 219 220
    uint32_t h1=0, h2=0;

    bj_hashlittle2(MN_NAME, strlen(MN_NAME), &h1, &h2);
    model_net_base_magic = h1+h2;

221 222 223
    // set up offsets - doesn't matter if they are actually used or not
    msg_offsets[SIMPLENET] =
        offsetof(model_net_wrap_msg, msg.m_snet);
224 225
    msg_offsets[SIMPLEP2P] =
        offsetof(model_net_wrap_msg, msg.m_sp2p);
226 227 228 229 230 231
    msg_offsets[TORUS] =
        offsetof(model_net_wrap_msg, msg.m_torus);
    msg_offsets[DRAGONFLY] =
        offsetof(model_net_wrap_msg, msg.m_dfly);
    msg_offsets[LOGGP] =
        offsetof(model_net_wrap_msg, msg.m_loggp);
232

233 234 235 236 237 238 239 240 241 242
    // perform the configuration(s)
    // This part is tricky, as we basically have to look up all annotations that
    // have LP names of the form modelnet_*. For each of those, we need to read
    // the base parameters
    // - the init is a little easier as we can use the LP-id to look up the
    // annotation

    // first grab all of the annotations and store locally
    for (int c = 0; c < lpconf.lpannos_count; c++){
        const config_anno_map_t *amap = &lpconf.lpannos[c];
243
        if (strncmp("modelnet_", amap->lp_name.ptr, 9) == 0){
244 245 246
            for (int n = 0; n < amap->num_annos; n++){
                int a;
                for (a = 0; a < num_params; a++){
247 248
                    if (annos[a] != NULL && amap->annotations[n].ptr != NULL &&
                            strcmp(amap->annotations[n].ptr, annos[a]) == 0){
249 250 251 252 253
                        break;
                    }
                }
                if (a == num_params){
                    // found a new annotation
254
                    annos[num_params++] = amap->annotations[n].ptr;
255 256 257 258 259 260 261 262 263 264 265 266 267
                }
            }
            if (amap->has_unanno_lp){
                int a;
                for (a = 0; a < num_params; a++){
                    if (annos[a] == NULL)
                        break;
                }
                if (a == num_params){
                    // found a new (empty) annotation
                    annos[num_params++] = NULL;
                }
            }
268 269
        }
    }
270 271 272 273 274 275

    // now that we have all of the annos for all of the networks, loop through
    // and read the configs
    for (int i = 0; i < num_params; i++){
        base_read_config(annos[i], &all_params[i]);
    }
276 277 278 279 280 281
}

void model_net_base_lp_init(
        model_net_base_state * ns,
        tw_lp * lp){
    // obtain the underlying lp type through codes-mapping
282
    char lp_type_name[MAX_NAME_LENGTH], anno[MAX_NAME_LENGTH];
283 284 285
    int dummy;

    codes_mapping_get_lp_info(lp->gid, NULL, &dummy, 
286 287
            lp_type_name, &dummy, anno, &dummy, &dummy);

288 289
    ns->msg_id = 0;

290 291 292 293 294 295 296 297
    // get annotation-specific parameters
    for (int i = 0; i < num_params; i++){
        if ((anno[0]=='\0' && annos[i] == NULL) ||
                strcmp(anno, annos[i]) == 0){
            ns->params = &all_params[i];
            break;
        }
    }
298 299 300 301 302 303 304 305 306

    // find the corresponding method name / index
    for (int i = 0; i < MAX_NETS; i++){
        if (strcmp(model_net_lp_config_names[i], lp_type_name) == 0){
            ns->net_id = i;
            break;
        }
    }

307 308 309 310 311 312 313
    ns->sched_send = malloc(sizeof(model_net_sched));
    ns->sched_recv = malloc(sizeof(model_net_sched));
    // init both the sender queue and the 'receiver' queue 
    model_net_sched_init(&ns->params->sched_params, 0, method_array[ns->net_id],
            ns->sched_send);
    model_net_sched_init(&ns->params->sched_params, 1, method_array[ns->net_id],
            ns->sched_recv);
314

315 316 317 318 319 320 321 322 323 324 325 326 327 328
    ns->sub_type = model_net_get_lp_type(ns->net_id);
    // NOTE: some models actually expect LP state to be 0 initialized...
    // *cough anything that uses mn_stats_array cough*
    ns->sub_state = calloc(1, ns->sub_type->state_sz);

    // initialize the model-net method
    ns->sub_type->init(ns->sub_state, lp);
}

void model_net_base_event(
        model_net_base_state * ns,
        tw_bf * b,
        model_net_wrap_msg * m,
        tw_lp * lp){
329
    assert(m->h.magic == model_net_base_magic);
330
    
331
    switch (m->h.event_type){
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
        case MN_BASE_NEW_MSG:
            handle_new_msg(ns, b, m, lp);
            break;
        case MN_BASE_SCHED_NEXT:
            handle_sched_next(ns, b, m, lp);
            break;
        case MN_BASE_PASS: ;
            void * sub_msg = ((char*)m)+msg_offsets[ns->net_id];
            ns->sub_type->event(ns->sub_state, b, sub_msg, lp);
            break;
        /* ... */
        default:
            assert(!"model_net_base event type not known");
            break;
    }
}

void model_net_base_event_rc(
        model_net_base_state * ns,
        tw_bf * b,
        model_net_wrap_msg * m,
        tw_lp * lp){
354
    assert(m->h.magic == model_net_base_magic);
355
    
356
    switch (m->h.event_type){
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
        case MN_BASE_NEW_MSG:
            handle_new_msg_rc(ns, b, m, lp);
            break;
        case MN_BASE_SCHED_NEXT:
            handle_sched_next_rc(ns, b, m, lp);
            break;
        case MN_BASE_PASS: ;
            void * sub_msg = ((char*)m)+msg_offsets[ns->net_id];
            ns->sub_type->revent(ns->sub_state, b, sub_msg, lp);
            break;
        /* ... */
        default:
            assert(!"model_net_base event type not known");
            break;
    }
372 373

    *(int*)b = 0;
374 375 376 377 378 379 380 381 382
}

void model_net_base_finalize(
        model_net_base_state * ns,
        tw_lp * lp){
    ns->sub_type->final(ns->sub_state, lp);
    free(ns->sub_state);
}

383
/// bitfields used:
384
/// c31 - we initiated a sched_next event
385 386 387 388 389
void handle_new_msg(
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp){
390
    // simply pass down to the scheduler
391
    model_net_request *r = &m->msg.m_base.req;
392 393
    // don't forget to set packet size, now that we're responsible for it!
    r->packet_size = ns->params->packet_size;
394
    r->msg_id = ns->msg_id++;
395 396
    void * m_data = m+1;
    void *remote = NULL, *local = NULL;
397
    if (r->remote_event_size > 0){
398 399
        remote = m_data;
        m_data = (char*)m_data + r->remote_event_size;
400 401
    }
    if (r->self_event_size > 0){
402
        local = m_data;
403
    }
404
    
405
    // set message-specific params
406 407 408 409 410 411
    int is_from_remote = m->msg.m_base.is_from_remote;
    model_net_sched *ss = is_from_remote ? ns->sched_recv : ns->sched_send;
    int *in_sched_loop = is_from_remote  ? 
        &ns->in_sched_recv_loop : &ns->in_sched_send_loop;
    model_net_sched_add(r, &m->msg.m_base.sched_params, r->remote_event_size,
            remote, r->self_event_size, local, ss, &m->msg.m_base.rc, lp);
412
    
413
    if (*in_sched_loop == 0){
414 415
        b->c31 = 1;
        /* No need to issue an extra sched-next event if we're currently idle */
416
        *in_sched_loop = 1;
417 418 419 420 421
        /* NOTE: we can do this because the sched rc struct in the event is
         * *very* lightly used (there's harmless overlap in usage for the
         * priority scheduler) */
        handle_sched_next(ns, b, m, lp);
        assert(*in_sched_loop); // we shouldn't have fallen out of the loop
422 423
    }
}
424 425 426

void handle_new_msg_rc(
        model_net_base_state *ns,
427
        tw_bf *b,
428 429
        model_net_wrap_msg *m,
        tw_lp *lp){
430 431 432 433 434
    int is_from_remote = m->msg.m_base.is_from_remote;
    model_net_sched *ss = is_from_remote ? ns->sched_recv : ns->sched_send;
    int *in_sched_loop = is_from_remote  ? 
        &ns->in_sched_recv_loop : &ns->in_sched_send_loop;

435 436
    if (b->c31) {
        handle_sched_next_rc(ns, b, m, lp);
437
        *in_sched_loop = 0;
438
    }
439
    model_net_sched_add_rc(ss, &m->msg.m_base.rc, lp);
440
}
441 442 443 444

/// bitfields used
/// c0 - scheduler loop is finished
void handle_sched_next(
445 446 447 448
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp){
449
    tw_stime poffset;
450 451 452 453 454
    int is_from_remote = m->msg.m_base.is_from_remote;
    model_net_sched * ss = is_from_remote ? ns->sched_recv : ns->sched_send;
    int *in_sched_loop = is_from_remote ?
        &ns->in_sched_recv_loop : &ns->in_sched_send_loop;
    int ret = model_net_sched_next(&poffset, ss, m+1, &m->msg.m_base.rc, lp);
455 456 457 458
    // we only need to know whether scheduling is finished or not - if not,
    // go to the 'next iteration' of the loop
    if (ret == -1){
        b->c0 = 1;
459
        *in_sched_loop = 0;
460
    }
461 462 463
    // Currently, only a subset of the network implementations use the
    // callback-based scheduling loop (model_net_method_idle_event).
    // For all others, we need to schedule the next packet
464
    // immediately
465
    else if (ns->net_id == SIMPLEP2P || ns->net_id == TORUS){
466
        tw_event *e = tw_event_new(lp->gid, 
467
                poffset+codes_local_latency(lp), lp);
468
        model_net_wrap_msg *m_wrap = tw_event_data(e);
469
        msg_set_header(model_net_base_magic, MN_BASE_SCHED_NEXT, lp->gid,
470 471
                &m_wrap->h);
        m_wrap->msg.m_base.is_from_remote = is_from_remote;
472 473
        // no need to set m_base here
        tw_event_send(e);
474 475
    }
}
476

477 478 479 480 481
void handle_sched_next_rc(
        model_net_base_state * ns,
        tw_bf *b,
        model_net_wrap_msg * m,
        tw_lp * lp){
482 483 484 485
    int is_from_remote = m->msg.m_base.is_from_remote;
    model_net_sched * ss = is_from_remote ? ns->sched_recv : ns->sched_send;
    int *in_sched_loop = is_from_remote ?
        &ns->in_sched_recv_loop : &ns->in_sched_send_loop;
486

487
    model_net_sched_next_rc(ss, m+1, &m->msg.m_base.rc, lp);
488
    if (b->c0){
489
        *in_sched_loop = 1;
490
    }
491
    else if (ns->net_id == SIMPLEP2P || ns->net_id == TORUS){
492 493
        codes_local_latency_reverse(lp);
    }
494 495 496 497 498 499 500 501 502 503 504 505 506
}

/**** END IMPLEMENTATIONS ****/

tw_event * model_net_method_event_new(
        tw_lpid dest_gid,
        tw_stime offset_ts,
        tw_lp *sender,
        int net_id,
        void **msg_data,
        void **extra_data){
    tw_event *e = tw_event_new(dest_gid, offset_ts, sender);
    model_net_wrap_msg *m_wrap = tw_event_data(e);
507 508
    msg_set_header(model_net_base_magic, MN_BASE_PASS, sender->gid,
            &m_wrap->h);
509 510 511 512 513 514 515 516
    *msg_data = ((char*)m_wrap)+msg_offsets[net_id];
    // extra_data is optional
    if (extra_data != NULL){
        *extra_data = m_wrap + 1;
    }
    return e;
}

517 518 519 520 521 522 523 524 525 526 527 528
void model_net_method_send_msg_recv_event(
        tw_lpid final_dest_lp,
        tw_lpid dest_mn_lp,
        tw_lpid src_lp, // the "actual" source (as opposed to the model net lp)
        uint64_t msg_size,
        int is_pull,
        uint64_t pull_size,
        int remote_event_size,
        const mn_sched_params *sched_params,
        const char * category,
        int net_id,
        void * msg,
529
        tw_stime offset,
530 531
        tw_lp *sender){
    tw_event *e = 
532
        tw_event_new(dest_mn_lp, offset+codes_local_latency(sender), sender);
533 534 535 536 537 538 539 540
    model_net_wrap_msg *m = tw_event_data(e);
    msg_set_header(model_net_base_magic, MN_BASE_NEW_MSG, sender->gid, &m->h);

    if (sched_params != NULL)
        m->msg.m_base.sched_params = *sched_params;
    else
        model_net_sched_set_default_params(&m->msg.m_base.sched_params);

541 542 543
    model_net_request *r = &m->msg.m_base.req;
    r->final_dest_lp = final_dest_lp;
    r->src_lp = src_lp;
544
    // for "recv" events, set the "dest" to this LP in the case of a pull event
545 546 547 548 549 550 551 552 553
    r->dest_mn_lp = sender->gid;
    r->pull_size = pull_size;
    r->msg_size = msg_size;
    // TODO: document why we're setting packet_size this way
    r->packet_size = msg_size;
    r->net_id = net_id;
    r->is_pull = is_pull;
    r->remote_event_size = remote_event_size;
    r->self_event_size = 0;
554 555
    m->msg.m_base.is_from_remote = 1;

556 557
    strncpy(r->category, category, CATEGORY_NAME_MAX-1);
    r->category[CATEGORY_NAME_MAX-1] = '\0';
558 559 560 561 562 563 564 565 566

    if (remote_event_size > 0){
        void * m_dat = model_net_method_get_edata(net_id, msg);
        memcpy(m+1, m_dat, remote_event_size);
    }

    tw_event_send(e);
}

Jonathan Jenkins's avatar
Jonathan Jenkins committed
567 568 569 570
void model_net_method_send_msg_recv_event_rc(tw_lp *sender){
    codes_local_latency_reverse(sender);
}

571 572 573

void model_net_method_idle_event(tw_stime offset_ts, int is_recv_queue,
        tw_lp * lp){
574 575
    tw_event *e = tw_event_new(lp->gid, offset_ts, lp);
    model_net_wrap_msg *m_wrap = tw_event_data(e);
576 577
    msg_set_header(model_net_base_magic, MN_BASE_SCHED_NEXT, lp->gid,
            &m_wrap->h);
578
    m_wrap->msg.m_base.is_from_remote = is_recv_queue;
579 580 581
    tw_event_send(e);
}

582 583 584 585 586 587 588 589 590 591 592 593
void * model_net_method_get_edata(int net_id, void *msg){
    return (char*)msg + sizeof(model_net_wrap_msg) - msg_offsets[net_id];
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */