modelnet-test.c 11.8 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5 6 7 8
 */

/* SUMMARY:
 *
9
 * This is a test harness for the modelnet module.  It sets up a number of
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 * servers, each of which is paired up with a simplenet LP to serve as the
 * NIC.  Each server exchanges a sequence of requests and acks with one peer
 * and measures the throughput in terms of payload bytes (ack size) moved
 * per second.
 */

#include <string.h>
#include <assert.h>
#include <ross.h>

#include "codes/model-net.h"
#include "codes/lp-io.h"
#include "codes/codes.h"
#include "codes/codes_mapping.h"
#include "codes/configuration.h"
#include "codes/lp-type-lookup.h"

27
#define SVR_LP_NM "server"
28
#define NUM_REQS 2  /* number of requests sent by each server */
29
#define PAYLOAD_SZ 4096 /* size of simulated data payload, bytes  */
30 31

static int net_id = 0;
32
static int num_servers = 0;
33

34
/* whether to pull instead of push */
35
static int do_pull = 0;
36

37 38 39
static int num_servers_per_rep = 0;
static int lps_per_rep = 0;

40 41 42
typedef struct svr_msg svr_msg;
typedef struct svr_state svr_state;

43 44
char router_name[MAX_NAME_LENGTH];

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
/* types of events that will constitute triton requests */
enum svr_event
{
    KICKOFF,    /* initial event */
    REQ,        /* request event */
    ACK,        /* ack event */
    LOCAL      /* local event */
};

struct svr_state
{
    int msg_sent_count;   /* requests sent */
    int msg_recvd_count;  /* requests recvd */
    int local_recvd_count; /* number of local messages received */
    tw_stime start_ts;    /* time that we started sending requests */
60
    tw_stime end_ts;      /* time that we ended sending requests */
61
    tw_lpid svr_rel_id; /* relative ID of the server */
62 63 64 65 66
};

struct svr_msg
{
    enum svr_event svr_event_type;
67
//    enum net_event net_event_type;
68 69
    tw_lpid src;          /* source of this request or ack */

70 71 72
    // rc for modelnet calls
    model_net_event_return ret;

73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
    int incremented_flag; /* helper for reverse computation */
};

static void svr_init(
    svr_state * ns,
    tw_lp * lp);
static void svr_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void svr_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp);
static void svr_finalize(
    svr_state * ns,
    tw_lp * lp);

tw_lptype svr_lp = {
94 95 96 97
    (init_f) svr_init,
    (pre_run_f) NULL,
    (event_f) svr_event,
    (revent_f) svr_rev_event,
98 99
    (commit_f) NULL,
    (final_f)  svr_finalize,
100 101
    (map_f) codes_mapping,
    sizeof(svr_state),
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
};

extern const tw_lptype* svr_get_lp_type();
static void svr_add_lp_type();
static tw_stime ns_to_s(tw_stime ns);
static tw_stime s_to_ns(tw_stime ns);
static void handle_kickoff_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp);
static void handle_ack_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp);
static void handle_req_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp);
120 121
static void handle_local_event(svr_state * ns);
static void handle_local_rev_event(svr_state * ns);
122 123 124 125 126 127 128 129 130 131 132 133
static void handle_kickoff_rev_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp);
static void handle_ack_rev_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp);
static void handle_req_rev_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp);
134 135 136 137 138 139 140

const tw_optdef app_opt [] =
{
	TWOPT_GROUP("Model net test case" ),
	TWOPT_END()
};

141 142 143 144 145 146
int main(
    int argc,
    char **argv)
{
    int nprocs;
    int rank;
147 148
    int num_nets;
    int *net_ids;
149 150
    //printf("\n Config count %d ",(int) config.lpgroups_count);
    g_tw_ts_end = s_to_ns(60*60*24*365); /* one year, in nsecs */
151
    lp_io_handle handle;
152 153 154 155

    tw_opt_add(app_opt);
    tw_init(&argc, &argv);

156
    if(argc < 2)
157
    {
158
	    printf("\n Usage: mpirun <args> --sync=2/3 mapping_file_name.conf (optional --nkp) ");
159
	    MPI_Finalize();
160
	    return 0;
161 162 163
    }
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
164

165
    configuration_load(argv[2], MPI_COMM_WORLD, &config);
166 167

    model_net_register();
168
    svr_add_lp_type();
169

170
    codes_mapping_setup();
171

172
    net_ids = model_net_configure(&num_nets);
173
    assert(num_nets>=1);
174 175 176
    net_id = *net_ids;
    free(net_ids);

177
    num_servers = codes_mapping_get_lp_count("MODELNET_GRP", 0, SVR_LP_NM,
178
            NULL, 1);
179

180 181 182 183 184
    if(lp_io_prepare("modelnet-test", LP_IO_UNIQ_SUFFIX, &handle, MPI_COMM_WORLD) < 0)
    {
        return(-1);
    }

185
    tw_run();
186 187
    model_net_report_stats(net_id);

188 189 190 191 192
    if(lp_io_flush(handle, MPI_COMM_WORLD) < 0)
    {
        return(-1);
    }

193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
    tw_end();
    return 0;
}

const tw_lptype* svr_get_lp_type()
{
	    return(&svr_lp);
}

static void svr_add_lp_type()
{
  lp_type_register("server", svr_get_lp_type());
}

static void svr_init(
    svr_state * ns,
    tw_lp * lp)
{
    tw_event *e;
    svr_msg *m;
    tw_stime kickoff_time;
214

215 216
    memset(ns, 0, sizeof(*ns));

217
    ns->svr_rel_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0);
218 219 220 221 222
    /* each server sends a dummy event to itself that will kick off the real
     * simulation
     */

    /* skew each kickoff event slightly to help avoid event ties later on */
223
    kickoff_time = g_tw_lookahead + tw_rand_unif(lp->rng);
224

225
    e = tw_event_new(lp->gid, kickoff_time, lp);
226 227 228 229 230 231 232 233 234 235 236 237 238
    m = tw_event_data(e);
    m->svr_event_type = KICKOFF;
    tw_event_send(e);

    return;
}

static void svr_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
239
    (void)b;
240 241 242
   switch (m->svr_event_type)
    {
        case REQ:
243
            handle_req_event(ns, m, lp);
244 245
            break;
        case ACK:
246
            handle_ack_event(ns, m, lp);
247 248
            break;
        case KICKOFF:
249
            handle_kickoff_event(ns, m, lp);
250 251
            break;
	case LOCAL:
252
	   handle_local_event(ns);
253 254 255 256 257 258 259 260 261 262 263 264 265 266
	 break;
        default:
	    printf("\n Invalid message type %d ", m->svr_event_type);
            assert(0);
        break;
    }
}

static void svr_rev_event(
    svr_state * ns,
    tw_bf * b,
    svr_msg * m,
    tw_lp * lp)
{
267
    (void)b;
268 269 270
    switch (m->svr_event_type)
    {
        case REQ:
271
            handle_req_rev_event(ns, m, lp);
272 273
            break;
        case ACK:
274
            handle_ack_rev_event(ns, m, lp);
275 276
            break;
        case KICKOFF:
277
            handle_kickoff_rev_event(ns, m, lp);
278 279
            break;
	case LOCAL:
280
	    handle_local_rev_event(ns);
281 282 283 284 285 286 287 288 289 290 291 292 293
	    break;
        default:
            assert(0);
            break;
    }

    return;
}

static void svr_finalize(
    svr_state * ns,
    tw_lp * lp)
{
294
    printf("server %llu recvd %d bytes in %f seconds, %f MiB/s sent_count %d recvd_count %d local_count %d \n", (unsigned long long)lp->gid, PAYLOAD_SZ*ns->msg_recvd_count, ns_to_s(ns->end_ts-ns->start_ts),
295
        ((double)(PAYLOAD_SZ*NUM_REQS)/(double)(1024*1024)/ns_to_s(ns->end_ts-ns->start_ts)), ns->msg_sent_count, ns->msg_recvd_count, ns->local_recvd_count);
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
    return;
}

/* convert ns to seconds */
static tw_stime ns_to_s(tw_stime ns)
{
    return(ns / (1000.0 * 1000.0 * 1000.0));
}

/* convert seconds to ns */
static tw_stime s_to_ns(tw_stime ns)
{
    return(ns * (1000.0 * 1000.0 * 1000.0));
}

/* handle initial event */
static void handle_kickoff_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp)
{
    svr_msg * m_local = malloc(sizeof(svr_msg));
    svr_msg * m_remote = malloc(sizeof(svr_msg));

//    m_local->svr_event_type = REQ;
    m_local->svr_event_type = LOCAL;
    m_local->src = lp->gid;

    memcpy(m_remote, m_local, sizeof(svr_msg));
325
    m_remote->svr_event_type = (do_pull) ? ACK : REQ;
326 327 328 329 330
    //printf("handle_kickoff_event(), lp %llu.\n", (unsigned long long)lp->gid);

    /* record when transfers started on this server */
    ns->start_ts = tw_now(lp);

331 332
    int dest_id = (ns->svr_rel_id + 1) % num_servers;
    
333
    /* each server sends a request to the next highest server */
334
    if (do_pull){
335
        m->ret = model_net_pull_event(net_id, "test", dest_id, PAYLOAD_SZ, 0.0,
336 337 338
                sizeof(svr_msg), (const void*)m_remote, lp);
    }
    else{
339
        m->ret = model_net_event(net_id, "test", dest_id, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
340
    }
341 342 343
    ns->msg_sent_count++;
}

344
static void handle_local_event(svr_state * ns)
345 346 347 348
{
    ns->local_recvd_count++;
}

349
static void handle_local_rev_event(svr_state * ns)
350 351 352 353 354 355 356 357 358 359
{
   ns->local_recvd_count--;
}
/* reverse handler for req event */
static void handle_req_rev_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp)
{
    ns->msg_recvd_count--;
360
    model_net_event_rc2(lp, &m->ret);
361 362 363 364 365 366 367 368 369 370 371 372

    return;
}


/* reverse handler for kickoff */
static void handle_kickoff_rev_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp)
{
    ns->msg_sent_count--;
373
    if (do_pull){
374
        model_net_event_rc2(lp, &m->ret);
375 376
    }
    else{
377
        model_net_event_rc2(lp, &m->ret);
378
    }
379 380 381 382 383 384 385 386 387 388 389 390

    return;
}

/* reverse handler for ack*/
static void handle_ack_rev_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp)
{
    if(m->incremented_flag)
    {
391
        model_net_event_rc2(lp, &m->ret);
392 393
        ns->msg_sent_count--;
    }
394
    // don't worry about resetting end_ts - just let the ack
395
    // event bulldoze it
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
    return;
}

/* handle recving ack */
static void handle_ack_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp)
{
    svr_msg * m_local = malloc(sizeof(svr_msg));
    svr_msg * m_remote = malloc(sizeof(svr_msg));

//    m_local->svr_event_type = REQ;
    m_local->svr_event_type = LOCAL;
    m_local->src = lp->gid;

    memcpy(m_remote, m_local, sizeof(svr_msg));
413
    m_remote->svr_event_type = (do_pull) ? ACK : REQ;
414

415
    //printf("handle_ack_event(), lp %llu.\n", (unsigned long long)lp->gid);
416 417 418

    /* safety check that this request got to the right server */
//    printf("\n m->src %d lp->gid %d ", m->src, lp->gid);
419
    tw_lpid dest_id = codes_mapping_get_lpid_from_relative(m->src, NULL, SVR_LP_NM, NULL, 0);
420 421 422 423 424 425 426 427

    /* in the "pull" case, src should actually be self */
    if (do_pull){
        assert(m->src == lp->gid);
    }
    else{
        assert(m->src == dest_id);
    }
428 429 430 431

    if(ns->msg_sent_count < NUM_REQS)
    {
        /* send another request */
432
        if (do_pull){
433
            m->ret = model_net_pull_event(net_id, "test", dest_id, PAYLOAD_SZ, 0.0,
434 435 436
                    sizeof(svr_msg), (const void*)m_remote, lp);
        }
        else{
437
            m->ret = model_net_event(net_id, "test", dest_id, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
438
        }
439 440 441 442 443
        ns->msg_sent_count++;
        m->incremented_flag = 1;
    }
    else
    {
444
        ns->end_ts = tw_now(lp);
445 446 447 448 449 450
        m->incremented_flag = 0;
    }

    return;
}

451
/* handle receiving request
452 453
 * (note: this should never be called when doing the "pulling" version of
 * the program) */
454 455 456 457 458
static void handle_req_event(
    svr_state * ns,
    svr_msg * m,
    tw_lp * lp)
{
459
    assert(!do_pull);
460 461 462 463 464 465 466 467
    svr_msg * m_local = malloc(sizeof(svr_msg));
    svr_msg * m_remote = malloc(sizeof(svr_msg));

    m_local->svr_event_type = LOCAL;
    m_local->src = lp->gid;

    memcpy(m_remote, m_local, sizeof(svr_msg));
    m_remote->svr_event_type = ACK;
468
    //printf("handle_req_event(), lp %llu src %llu .\n", (unsigned long long)lp->gid, (unsigned long long) m->src);
469 470

    /* safety check that this request got to the right server */
471
//    printf("\n m->src %d lp->gid %d ", m->src, lp->gid);
472 473 474 475 476 477
    ns->msg_recvd_count++;

    /* send ack back */
    /* simulated payload of 1 MiB */
    /* also trigger a local event for completion of payload msg */
    /* remote host will get an ack event */
478 479

   // mm Q: What should be the size of an ack message? may be a few bytes? or larger..?
480
    m->ret = model_net_event(net_id, "test", m->src, PAYLOAD_SZ, 0.0, sizeof(svr_msg), (const void*)m_remote, sizeof(svr_msg), (const void*)m_local, lp);
481 482 483 484 485 486 487 488 489
//    printf("\n Sending ack to LP %d %d ", m->src, m_remote->src);
    return;
}

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
490
 *
491 492
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */