example.c 17.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

/* SUMMARY:
 *
 * This is a sample code to demonstrate CODES usage and best practices.  It
 * sets up a number of servers, each of which is paired up with a simplenet LP
 * to serve as the NIC.  Each server exchanges a sequence of requests and acks
 * with one peer and measures the throughput in terms of payload bytes (ack
 * size) moved per second.
 */

#include <string.h>
#include <assert.h>
#include <ross.h>

#include "codes/lp-io.h"
#include "codes/codes.h"
#include "codes/codes_mapping.h"
#include "codes/configuration.h"
#include "codes/model-net.h"
#include "codes/lp-type-lookup.h"
26
#include "codes/local-storage-model.h"
27

28 29
static int num_reqs = 0;/* number of requests sent by each server (read from config) */
static int payload_sz = 0; /* size of simulated data payload, bytes (read from config) */
30

31 32
/* model-net ID, can be either simple-net, dragonfly or torus (more may be
 * added) */
33 34 35 36
static int net_id = 0;
static int num_servers = 0;
static int offset = 2;

37
/* expected LP group name in configure files for this program */
38
static char *group_name = "SERVERS";
39 40 41 42
/* expected parameter group name for rounds of communication */
static char *param_group_nm = "server_pings";
static char *num_reqs_key = "num_reqs";
static char *payload_sz_key = "payload_sz";
43

44 45 46
typedef struct svr_msg svr_msg;
typedef struct svr_state svr_state;

47
/* types of events that will constitute server activities */
48 49 50 51 52 53 54 55
enum svr_event
{
    KICKOFF,    /* initial event */
    REQ,        /* request event */
    ACK,        /* ack event */
    LOCAL      /* local event */
};

56 57 58
/* this struct serves as the ***persistent*** state of the LP representing the 
 * server in question. This struct is setup when the LP initialization function
 * ptr is called */
59 60 61 62 63 64
struct svr_state
{
    int msg_sent_count;   /* requests sent */
    int msg_recvd_count;  /* requests recvd */
    int local_recvd_count; /* number of local messages received */
    tw_stime start_ts;    /* time that we started sending requests */
65
    tw_stime end_ts;      /* time that last request finished */
66 67
};

68 69
/* this struct serves as the ***temporary*** event data, which can be thought
 * of as a message between two LPs. */
70 71 72 73 74 75 76 77
struct svr_msg
{
    enum svr_event svr_event_type;
    tw_lpid src;          /* source of this request or ack */

    int incremented_flag; /* helper for reverse computation */
};

78 79 80 81 82 83
/* ROSS expects four functions per LP:
 * - an LP initialization function, called for each LP
 * - an event processing function
 * - a *reverse* event processing function (rollback), and
 * - a finalization/cleanup function when the simulation ends
 */
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
static void svr_init(
    svr_state * ns,
    tw_lp * lp);
static void svr_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void svr_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void svr_finalize(
    svr_state * ns,
    tw_lp * lp);

101 102
/* set up the function pointers for ROSS, as well as the size of the LP state
 * structure (NOTE: ROSS is in charge of event and state (de-)allocation) */
103
tw_lptype svr_lp = {
104 105 106 107 108 109 110
    (init_f) svr_init,
    (pre_run_f) NULL,
    (event_f) svr_event,
    (revent_f) svr_rev_event,
    (final_f)  svr_finalize, 
    (map_f) codes_mapping,
    sizeof(svr_state),
111 112 113 114 115 116
};

extern const tw_lptype* svr_get_lp_type();
static void svr_add_lp_type();
static tw_stime ns_to_s(tw_stime ns);
static tw_stime s_to_ns(tw_stime ns);
117 118 119

/* as we only have a single event processing entry point and multiple event
 * types, for clarity we define "handlers" for each (reverse) event type */
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
static void handle_kickoff_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void handle_ack_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void handle_req_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void handle_local_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
   tw_lp * lp);
static void handle_local_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
   tw_lp * lp);
static void handle_kickoff_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void handle_ack_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void handle_req_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);

161 162 163 164 165 166 167 168 169 170 171 172
/* for this simulation, each server contacts its neighboring server in an id.
 * this function shows how to use the codes_mapping API to calculate IDs when
 * having to contend with multiple LP types and counts. Note that in this simple
 * example codes_mapping is overkill. */
static tw_lpid get_next_server(tw_lpid sender_id);

/* arguments to be handled by ROSS - strings passed in are expected to be
 * pre-allocated */
static char conf_file_name[256] = {0};
/* this struct contains default parameters used by ROSS, as well as
 * user-specific arguments to be handled by the ROSS config sys. Pass it in
 * prior to calling tw_init */
173 174 175
const tw_optdef app_opt [] =
{
	TWOPT_GROUP("Model net test case" ),
176
        TWOPT_CHAR("codes-config", conf_file_name, "name of codes configuration file"),
177 178 179 180 181 182 183 184 185
	TWOPT_END()
};

int main(
    int argc,
    char **argv)
{
    int nprocs;
    int rank;
186
    int num_nets, *net_ids;
187 188

    /* TODO: explain why we need this (ROSS has cutoff??) */
189 190 191
    g_tw_ts_end = s_to_ns(60*60*24*365); /* one year, in nsecs */

    /* ROSS initialization function calls */
192 193 194
    tw_opt_add(app_opt); /* add user-defined args */
    /* initialize ROSS and parse args. NOTE: tw_init calls MPI_Init */
    tw_init(&argc, &argv); 
195

196
    if (!conf_file_name[0]) 
197
    {
198 199 200
        fprintf(stderr, "Expected \"codes-config\" option, please see --help.\n");
        MPI_Finalize();
        return 1;
201
    }
202

203 204 205
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  
206 207 208 209 210 211 212 213
    /* loading the config file into the codes-mapping utility, giving us the
     * parsed config object in return. 
     * "config" is a global var defined by codes-mapping */
    if (configuration_load(conf_file_name, MPI_COMM_WORLD, &config)){
        fprintf(stderr, "Error loading config file %s.\n", conf_file_name);
        MPI_Finalize();
        return 1;
    }
214

215 216 217 218 219 220 221 222 223 224 225 226
    /* register model-net LPs with ROSS */
    model_net_register();

    /* register the server LP type with ROSS */
    svr_add_lp_type();

    /* Setup takes the global config object, the registered LPs, and
     * generates/places the LPs as specified in the configuration file.
     * This should only be called after ALL LP types have been registered in 
     * codes */
    codes_mapping_setup();

227
    /* Setup the model-net parameters specified in the global config object,
228 229
     * returned are the identifier(s) for the network type. In this example, we
     * only expect one*/
230
    net_ids = model_net_configure(&num_nets);
231 232 233
    assert(num_nets==1);
    net_id = *net_ids;
    free(net_ids);
234 235 236
    /* in this example, we are using simplenet, which simulates point to point 
     * communication between any two entities (other networks are trickier to
     * setup). Hence: */
237 238 239 240 241 242
    if(net_id != SIMPLENET)
    {
	    printf("\n The test works with simple-net configuration only! ");
	    MPI_Finalize();
	    return 0;
    }
243
    
244 245 246
    /* calculate the number of servers in this simulation,
     * ignoring annotations */
    num_servers = codes_mapping_get_lp_count(group_name, 0, "server", NULL, 1);
247

248 249 250
    /* for this example, we read from a separate configuration group for
     * server message parameters. Since they are constant for all LPs,
     * go ahead and read them prior to running */
251 252
    configuration_get_value_int(&config, param_group_nm, num_reqs_key, NULL, &num_reqs);
    configuration_get_value_int(&config, param_group_nm, payload_sz_key, NULL, &payload_sz);
253

254
    /* begin simulation */ 
255
    tw_run();
256 257

    /* model-net has the capability of outputting network transmission stats */
258 259 260 261 262 263 264 265 266 267 268 269 270
    model_net_report_stats(net_id);

    tw_end();
    return 0;
}

const tw_lptype* svr_get_lp_type()
{
	    return(&svr_lp);
}

static void svr_add_lp_type()
{
271 272 273
    /* lp_type_register should be called exactly once per process per 
     * LP type */
    lp_type_register("server", svr_get_lp_type());
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
}

static void svr_init(
    svr_state * ns,
    tw_lp * lp)
{
    tw_event *e;
    svr_msg *m;
    tw_stime kickoff_time;
    
    memset(ns, 0, sizeof(*ns));

    /* each server sends a dummy event to itself that will kick off the real
     * simulation
     */

    /* skew each kickoff event slightly to help avoid event ties later on */
    kickoff_time = g_tw_lookahead + tw_rand_unif(lp->rng); 

293
    /* first create the event (time arg is an offset, not absolute time) */
294
    e = codes_event_new(lp->gid, kickoff_time, lp);
295 296
    /* after event is created, grab the allocated message and set msg-specific
     * data */ 
297 298
    m = tw_event_data(e);
    m->svr_event_type = KICKOFF;
299
    /* event is ready to be processed, send it off */
300 301 302 303 304
    tw_event_send(e);

    return;
}

305 306
/* event processing entry point
 * - simply forward the message to the appropriate handler */
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333
static void svr_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
   switch (m->svr_event_type)
    {
        case REQ:
            handle_req_event(ns, b, m, lp);
            break;
        case ACK:
            handle_ack_event(ns, b, m, lp);
            break;
        case KICKOFF:
            handle_kickoff_event(ns, b, m, lp);
            break;
	case LOCAL:
	   handle_local_event(ns, b, m, lp); 
	 break;
        default:
	    printf("\n Invalid message type %d ", m->svr_event_type);
            assert(0);
        break;
    }
}

334 335
/* reverse event processing entry point
 * - simply forward the message to the appropriate handler */
336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
static void svr_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
    switch (m->svr_event_type)
    {
        case REQ:
            handle_req_rev_event(ns, b, m, lp);
            break;
        case ACK:
            handle_ack_rev_event(ns, b, m, lp);
            break;
        case KICKOFF:
            handle_kickoff_rev_event(ns, b, m, lp);
            break;
	case LOCAL:
	    handle_local_rev_event(ns, b, m, lp);    
	    break;
        default:
            assert(0);
            break;
    }

    return;
}

364
/* once the simulation is over, do some output */
365 366 367 368
static void svr_finalize(
    svr_state * ns,
    tw_lp * lp)
{
369 370 371 372 373 374 375 376
    printf("server %llu recvd %d bytes in %lf seconds, %lf MiB/s sent_count %d recvd_count %d local_count %d \n", 
            (unsigned long long)(lp->gid/2),
            payload_sz*ns->msg_recvd_count,
            ns_to_s(ns->end_ts-ns->start_ts),
            ((double)(payload_sz*num_reqs)/(double)(1024*1024)/ns_to_s(ns->end_ts-ns->start_ts)),
            ns->msg_sent_count,
            ns->msg_recvd_count,
            ns->local_recvd_count);
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
    return;
}

/* convert ns to seconds */
static tw_stime ns_to_s(tw_stime ns)
{
    return(ns / (1000.0 * 1000.0 * 1000.0));
}

/* convert seconds to ns */
static tw_stime s_to_ns(tw_stime ns)
{
    return(ns * (1000.0 * 1000.0 * 1000.0));
}

392 393 394 395 396 397 398
/* see declaration for more general info */
tw_lpid get_next_server(tw_lpid sender_id)
{
    tw_lpid rtn_id;
    /* first, get callers LP and group info from codes-mapping. Caching this 
     * info in the LP struct isn't a bad idea for preventing a huge number of
     * lookups */
399 400
    char grp_name[MAX_NAME_LENGTH], lp_type_name[MAX_NAME_LENGTH],
         annotation[MAX_NAME_LENGTH];
401 402
    int  lp_type_id, grp_id, grp_rep_id, offset, num_reps;
    int dest_rep_id;
403 404
    codes_mapping_get_lp_info(sender_id, grp_name, &grp_id, lp_type_name,
            &lp_type_id, annotation, &grp_rep_id, &offset);
405 406 407 408 409 410
    /* in this example, we assume that, for our group of servers, each 
     * "repetition" consists of a single server/NIC pair. Hence, we grab the 
     * server ID for the next repetition, looping around if necessary */
    num_reps = codes_mapping_get_group_reps(grp_name);
    dest_rep_id = (grp_rep_id+1) % num_reps;
    /* finally, get the server (exactly 1 server per rep -> offset w/in rep = 0 */
411 412
    codes_mapping_get_lp_id(grp_name, lp_type_name, NULL, 1, dest_rep_id,
            0, &rtn_id);
413 414 415
    return rtn_id;
}

416 417 418 419 420 421 422
/* handle initial event */
static void handle_kickoff_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
    int dest_id;
    int use_brute_force_map = 0;
    /* normally, when using ROSS, events are allocated as a result of the event
     * creation process. However, since we are now asking model-net to
     * communicate with an entity on our behalf, we need to generate both the
     * message to the recipient and an optional callback message 
     * - thankfully, memory need not persist past the model_net_event call - it
     *   copies the messages */
    svr_msg m_local;
    svr_msg m_remote;

    m_local.svr_event_type = LOCAL;
    m_local.src = lp->gid;
    m_remote.svr_event_type = REQ;
    m_remote.src = lp->gid;
438 439 440 441

    /* record when transfers started on this server */
    ns->start_ts = tw_now(lp);

442 443 444 445 446 447 448 449 450 451 452
    /* each server sends a request to the next highest server 
     * In this simulation, LP determination is simple: LPs are assigned
     * round robin as in serv_1, net_1, serv_2, net_2, etc. 
     * However, that may not always be the case, so we also show a more
     * complicated way to map through codes_mapping */
    if (use_brute_force_map)
        dest_id = (lp->gid + offset)%(num_servers*2);
    else
    {
        dest_id = get_next_server(lp->gid);
    }
453

454
    /* model-net needs to know about (1) higher-level destination LP which is a neighboring server in this case
455
     * (2) struct and size of remote message and (3) struct and size of local message (a local message can be null) */
456
    model_net_event(net_id, "test", dest_id, payload_sz, 0.0, sizeof(svr_msg), 
457
            (const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
458 459 460
    ns->msg_sent_count++;
}

461 462
/* at the moment, no need for local callbacks from model-net, so we maintain a
 * count for debugging purposes */ 
463 464 465 466 467 468 469 470 471
static void handle_local_event(
		svr_state * ns,
		tw_bf * b,
		svr_msg * m,
		tw_lp * lp)
{
    ns->local_recvd_count++;
}

472
/* handle recving ack
473 474
 * for this simulation, we repeatedly ping the destination server until num_reqs
 * of size payload_sz have been satisfied - we begin the next req when we
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491
 * receive an ACK from the destination server */
static void handle_ack_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
    /* the ACK actually doesn't come from the NIC on the other server -
     * model-net "hides" the NIC LP from us so we only see the original
     * destination server */

    /* safety check that this request got to the right server, both with our
     * brute-force lp calculation and our more generic codes-mapping 
     * calculation */
    assert(m->src == (lp->gid + offset)%(num_servers*2) &&
           m->src == get_next_server(lp->gid));

492
    if(ns->msg_sent_count < num_reqs)
493 494 495 496 497 498 499 500 501 502 503
    {
        /* again, allocate our own msgs so model-net can transmit on our behalf */
        svr_msg m_local;
        svr_msg m_remote;

        m_local.svr_event_type = LOCAL;
        m_local.src = lp->gid;
        m_remote.svr_event_type = REQ;
        m_remote.src = lp->gid;

        /* send another request */
504
	model_net_event(net_id, "test", m->src, payload_sz, 0.0, sizeof(svr_msg), 
505 506 507 508 509 510 511 512 513
                (const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
        ns->msg_sent_count++;
        m->incremented_flag = 1;
        
    }
    else
    {
	/* threshold count reached, stop sending messages */
        m->incremented_flag = 0;
514
        ns->end_ts = tw_now(lp);
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
    }
    return;
}

/* handle receiving request */
static void handle_req_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
    svr_msg m_local;
    svr_msg m_remote;

    m_local.svr_event_type = LOCAL;
    m_local.src = lp->gid;
    m_remote.svr_event_type = ACK;
    m_remote.src = lp->gid;

    /* safety check that this request got to the right server */
    
    assert(lp->gid == (m->src + offset)%(num_servers*2) &&
           lp->gid == get_next_server(m->src));
    ns->msg_recvd_count++;

    /* send ack back */
    /* simulated payload of 1 MiB */
    /* also trigger a local event for completion of payload msg */
    /* remote host will get an ack event */
   
545
    model_net_event(net_id, "test", m->src, payload_sz, 0.0, sizeof(svr_msg), 
546 547 548 549 550 551 552 553 554
            (const void*)&m_remote, sizeof(svr_msg), (const void*)&m_local, lp);
    return;
}

/* for us, reverse events are very easy, the only LP state that needs to be
 * rolled back are the counts.
 * for more complex simulations, this will not be the case (e.g., state
 * containing queues) */

555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
static void handle_local_rev_event(
	       svr_state * ns,
	       tw_bf * b,
	       svr_msg * m,
	       tw_lp * lp)
{
   ns->local_recvd_count--;
}
/* reverse handler for req event */
static void handle_req_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
    ns->msg_recvd_count--;
571
    /* model-net has its own reverse computation support */ 
572
    model_net_event_rc(net_id, lp, payload_sz);
573 574 575 576 577 578 579 580 581 582 583 584 585

    return;
}


/* reverse handler for kickoff */
static void handle_kickoff_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
    ns->msg_sent_count--;
586
    model_net_event_rc(net_id, lp, payload_sz);
587 588 589 590 591 592 593 594 595 596 597 598 599

    return;
}

/* reverse handler for ack*/
static void handle_ack_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
    if(m->incremented_flag)
    {
600
        model_net_event_rc(net_id, lp, payload_sz);
601 602 603 604 605 606 607 608 609 610 611 612 613
        ns->msg_sent_count--;
    }
    return;
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */