model-net.c 18.2 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5
 */
Philip Carns's avatar
Philip Carns committed
6

7
8
9
#include <string.h>
#include <assert.h>

10
#include "codes/model-net.h"
11
#include "codes/model-net-method.h"
12
#include "codes/model-net-lp.h"
13
#include "codes/model-net-sched.h"
14
#include "codes/codes.h"
15
#include <codes/codes_mapping.h>
16

17
18
19
20
#define STR_SIZE 16
#define PROC_TIME 10.0

extern struct model_net_method simplenet_method;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
21
extern struct model_net_method simplep2p_method;
22
extern struct model_net_method torus_method;
23
extern struct model_net_method dragonfly_method;
Philip Carns's avatar
Philip Carns committed
24
extern struct model_net_method loggp_method;
25

26
27
28
29
30
31
32
33
34
35
36
37
#define X(a,b,c,d) b,
char * model_net_lp_config_names[] = {
    NETWORK_DEF
};
#undef X

#define X(a,b,c,d) c,
char * model_net_method_names[] = {
    NETWORK_DEF
};
#undef X

38
/* Global array initialization, terminated with a NULL entry */
39
40
41
42
43
#define X(a,b,c,d) d,
struct model_net_method* method_array[] = { 
    NETWORK_DEF
};
#undef X
44

45
// counter and offset for the MN_START_SEQ / MN_END_SEQ macros
46
int mn_in_sequence = 0;
47
48
tw_stime mn_msg_offset = 0.0;

49
50
51
52
// message parameters for use via model_net_set_msg_param
static int is_msg_params_set[MAX_MN_MSG_PARAM_TYPES];
static mn_sched_params sched_params;

53
54
55
56
57
58
59
60
61
62
63
// global listing of lp types found by model_net_register
// - needs to be held between the register and configure calls
static int do_config_nets[MAX_NETS];

void model_net_register(){
    // first set up which networks need to be registered, then pass off to base
    // LP to do its thing
    memset(do_config_nets, 0, MAX_NETS * sizeof(*do_config_nets));
    for (int grp = 0; grp < lpconf.lpgroups_count; grp++){
        config_lpgroup_t *lpgroup = &lpconf.lpgroups[grp];
        for (int lpt = 0; lpt < lpgroup->lptypes_count; lpt++){
64
            char const *nm = lpgroup->lptypes[lpt].name.ptr;
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
            for (int n = 0; n < MAX_NETS; n++){
                if (!do_config_nets[n] && 
                        strcmp(model_net_lp_config_names[n], nm) == 0){
                    do_config_nets[n] = 1;
                    break;
                }
            }
        }
    }
    model_net_base_register(do_config_nets);
}

int* model_net_configure(int *id_count){
    // first call the base LP configure, which sets up the general parameters
    model_net_base_configure();

    // do network-specific configures
    *id_count = 0;
    for (int i = 0; i < MAX_NETS; i++) {
        if (do_config_nets[i]){
            method_array[i]->mn_configure();
            (*id_count)++;
        }
    }

    // allocate the output
    int *ids = malloc(*id_count * sizeof(int));
    // read the ordering provided by modelnet_order
    char **values;
    size_t length;
    int ret = configuration_get_multivalue(&config, "PARAMS", "modelnet_order",
            NULL, &values, &length);
    if (ret != 1){
        tw_error(TW_LOC, "unable to read PARAMS:modelnet_order variable\n");
    }
    if (length != (size_t) *id_count){
        tw_error(TW_LOC, "number of networks in PARAMS:modelnet_order "
                "do not match number in LPGROUPS\n");
    }
    // set the index
    for (int i = 0; i < *id_count; i++){
        ids[i] = -1;
        for (int n = 0; n < MAX_NETS; n++){
            if (strcmp(values[i], model_net_method_names[n]) == 0){
                if (!do_config_nets[n]){
                    tw_error(TW_LOC, "network in PARAMS:modelnet_order not "
                            "present in LPGROUPS: %s\n", values[i]);
                }
                ids[i] = n;
                break;
            }
        }
        if (ids[i] == -1){
            tw_error(TW_LOC, "unknown network in PARAMS:modelnet_order: %s\n",
                    values[i]);
        }
        free(values[i]);
    }
    free(values);

125
126
127
128
    // init the per-msg params here
    memset(is_msg_params_set, 0,
            MAX_MN_MSG_PARAM_TYPES*sizeof(*is_msg_params_set));

129
    return ids;
130
131
}

132
133
134
int model_net_get_id(char *name){
    int i;
    for(i=0; method_array[i] != NULL; i++) {
135
        if(strcmp(model_net_method_names[i], name) == 0) {
136
137
138
139
140
141
            return i;
        }
    }
    return -1;
}

142
143
144
void model_net_write_stats(tw_lpid lpid, struct mn_stats* stat)
{
    int ret;
145
    char id[19+CATEGORY_NAME_MAX+1];
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
    char data[1024];

    sprintf(id, "model-net-category-%s", stat->category);
    sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t" 
        "recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
        (long)lpid,
        stat->send_count,
        stat->send_bytes,
        stat->send_time,
        stat->recv_count,
        stat->recv_bytes,
        stat->recv_time,
        stat->max_event_size);

    ret = lp_io_write(lpid, id, strlen(data), data);
    assert(ret == 0);

    return;
}

void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[])
{

    int i;
    struct mn_stats all;

    memset(&all, 0, sizeof(all));
    sprintf(all.category, "all");

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) > 0)
        {
            all.send_count += mn_stats_array[i].send_count;
            all.send_bytes += mn_stats_array[i].send_bytes;
            all.send_time += mn_stats_array[i].send_time;
            all.recv_count += mn_stats_array[i].recv_count;
            all.recv_bytes += mn_stats_array[i].recv_bytes;
            all.recv_time += mn_stats_array[i].recv_time;
            if(mn_stats_array[i].max_event_size > all.max_event_size)
                all.max_event_size = mn_stats_array[i].max_event_size;

            model_net_write_stats(lpid, &mn_stats_array[i]);
        }
    }
    model_net_write_stats(lpid, &all);
}

194
struct mn_stats* model_net_find_stats(char const * category, mn_stats mn_stats_array[])
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
{
    int i;
    int new_flag = 0;
    int found_flag = 0;

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 1;
            break;
        }
        if(strcmp(category, mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 0;
            break;
        }
    }
    assert(found_flag);

    if(new_flag)
    {
        strcpy(mn_stats_array[i].category, category);
    }
    return(&mn_stats_array[i]);
}

224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
static model_net_event_return model_net_noop_event(
        tw_lpid final_dest_lp,
        int is_pull,
        tw_stime offset,
        int remote_event_size,
        void const * remote_event,
        int self_event_size,
        void const * self_event,
        tw_lp *sender)
{
    model_net_event_return num_rng_calls = 0;
    tw_stime poffset = mn_in_sequence ? mn_msg_offset : 0.0;

    if (self_event_size && self_event != NULL) {
        poffset += codes_local_latency(sender);
        num_rng_calls++;
        tw_event *e = tw_event_new(sender->gid, poffset+offset, sender);
        memcpy(tw_event_data(e), self_event, self_event_size);
        tw_event_send(e);
    }

    if (remote_event_size && remote_event != NULL) {
        poffset += codes_local_latency(sender);
        num_rng_calls++;
        tw_event *e = tw_event_new(final_dest_lp, poffset+offset, sender);
        memcpy(tw_event_data(e), remote_event, remote_event_size);
        tw_event_send(e);
    }

    if (mn_in_sequence)
        mn_msg_offset += poffset;

    return num_rng_calls;
}

static model_net_event_return model_net_event_impl_base(
260
        int net_id,
261
262
        struct codes_mctx const * send_map_ctx,
        struct codes_mctx const * recv_map_ctx,
263
        char const * category, 
264
265
266
267
268
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        int is_pull,
        tw_stime offset,
        int remote_event_size,
269
        void const * remote_event,
270
        int self_event_size,
271
        void const * self_event,
272
        tw_lp *sender) {
273

274
275
276
277
278
279
    if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg) 
            > g_tw_msg_sz){
        tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
                         "%d but ROSS is configured for events of size %zd\n",
                         remote_event_size+self_event_size+sizeof(model_net_wrap_msg),
                         g_tw_msg_sz);
280
        return -1;
281
    }
282

283
284
285
286
287
    tw_lpid src_mn_lp = model_net_find_local_device_mctx(net_id, send_map_ctx,
            sender->gid);
    tw_lpid dest_mn_lp = model_net_find_local_device_mctx(net_id, recv_map_ctx,
            final_dest_lp);

288
289
290
291
292
    if (src_mn_lp == dest_mn_lp)
        return model_net_noop_event(final_dest_lp, is_pull, offset,
                remote_event_size, remote_event, self_event_size, self_event,
                sender);

293
    tw_stime poffset = codes_local_latency(sender);
294
    if (mn_in_sequence){
295
296
297
298
        tw_stime tmp = mn_msg_offset;
        mn_msg_offset += poffset;
        poffset += tmp;
    }
299
300

    tw_event *e = tw_event_new(src_mn_lp, poffset+offset, sender);
301
302

    model_net_wrap_msg *m = tw_event_data(e);
303
    msg_set_header(model_net_base_magic, MN_BASE_NEW_MSG, sender->gid, &m->h);
304
305

    // set the request struct 
306
    model_net_request *r = &m->msg.m_base.req;
307
308
    r->net_id = net_id;
    r->final_dest_lp = final_dest_lp;
309
    r->dest_mn_lp = dest_mn_lp;
310
    r->src_lp = sender->gid;
311
312
313
    r->msg_size = message_size;
    r->remote_event_size = remote_event_size;
    r->self_event_size = self_event_size;
314
    r->is_pull = is_pull;
315
316
    strncpy(r->category, category, CATEGORY_NAME_MAX-1);
    r->category[CATEGORY_NAME_MAX-1]='\0';
317
318
319

    // this is an outgoing message
    m->msg.m_base.is_from_remote = 0;
320

321
322
323
324
    // set the msg-specific params
    if (is_msg_params_set[MN_SCHED_PARAM_PRIO])
        m->msg.m_base.sched_params = sched_params;
    else // set the default
325
        model_net_sched_set_default_params(&m->msg.m_base.sched_params);
326
327
328
329
    // once params are set, clear the flags 
    memset(is_msg_params_set, 0,
            MAX_MN_MSG_PARAM_TYPES*sizeof(*is_msg_params_set));

330
331
332
333
334
335
336
337
    void *e_msg = (m+1);
    if (remote_event_size > 0){
        memcpy(e_msg, remote_event, remote_event_size);
        e_msg = (char*)e_msg + remote_event_size; 
    }
    if (self_event_size > 0){
        memcpy(e_msg, self_event, self_event_size);
    }
338

339
340
    //print_base(m);
    tw_event_send(e);
341
342

    return 1;
343
344
345
}
static void model_net_event_impl_base_rc(tw_lp *sender){
    codes_local_latency_reverse(sender);
346
}
347

348
model_net_event_return model_net_event(
349
    int net_id,
350
    char const * category, 
351
352
353
354
    tw_lpid final_dest_lp, 
    uint64_t message_size, 
    tw_stime offset,
    int remote_event_size,
355
    void const * remote_event,
356
    int self_event_size,
357
    void const * self_event,
358
359
    tw_lp *sender)
{
360
361
362
363
    return model_net_event_impl_base(net_id, CODES_MCTX_DEFAULT,
            CODES_MCTX_DEFAULT, category, final_dest_lp, message_size, 0,
            offset, remote_event_size, remote_event, self_event_size,
            self_event, sender);
364
365
}

366
model_net_event_return model_net_event_annotated(
367
        int net_id,
368
369
        char const * annotation,
        char const * category, 
370
371
372
373
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        tw_stime offset,
        int remote_event_size,
374
        void const * remote_event,
375
        int self_event_size,
376
        void const * self_event,
377
        tw_lp *sender){
378
    struct codes_mctx mc = codes_mctx_set_group_modulo(annotation, true);
379
    return model_net_event_impl_base(net_id, &mc, &mc, category, final_dest_lp,
380
381
            message_size, 0, offset, remote_event_size, remote_event,
            self_event_size, self_event, sender);
382
383
}

384
model_net_event_return model_net_event_mctx(
385
386
387
388
389
390
391
392
393
394
395
396
        int net_id,
        struct codes_mctx const * send_map_ctx,
        struct codes_mctx const * recv_map_ctx,
        char const * category, 
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        tw_stime offset,
        int remote_event_size,
        void const * remote_event,
        int self_event_size,
        void const * self_event,
        tw_lp *sender){
397
398
399
400
    return model_net_event_impl_base(net_id, send_map_ctx, recv_map_ctx,
            category, final_dest_lp, message_size, 0, offset,
            remote_event_size, remote_event, self_event_size, self_event,
            sender);
401
402
}

403
model_net_event_return model_net_pull_event(
404
        int net_id,
405
        char const *category,
406
407
408
409
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
410
        void const *self_event,
411
412
413
        tw_lp *sender){
    /* NOTE: for a pull, we are filling the *remote* event - it will be remote
     * from the destination's POV */
414
415
416
    return model_net_event_impl_base(net_id, CODES_MCTX_DEFAULT,
            CODES_MCTX_DEFAULT, category, final_dest_lp, message_size, 1,
            offset, self_event_size, self_event, 0, NULL, sender);
417
418
}

419
model_net_event_return model_net_pull_event_annotated(
420
        int net_id,
421
422
        char const * annotation,
        char const *category,
423
424
425
426
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
427
        void const *self_event,
428
429
430
        tw_lp *sender){
    /* NOTE: for a pull, we are filling the *remote* event - it will be remote
     * from the destination's POV */
431
    struct codes_mctx mc = codes_mctx_set_group_modulo(annotation, true);
432
    return model_net_event_impl_base(net_id, &mc, &mc, category, final_dest_lp,
433
434
            message_size, 1, offset, self_event_size, self_event, 0, NULL,
            sender);
435
436
}

437
model_net_event_return model_net_pull_event_mctx(
438
        int net_id,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
439
440
        struct codes_mctx const * send_map_ctx,
        struct codes_mctx const * recv_map_ctx,
441
442
443
444
445
446
447
448
449
        char const *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
        void const *self_event,
        tw_lp *sender){
    /* NOTE: for a pull, we are filling the *remote* event - it will be remote
     * from the destination's POV */
450
451
    return model_net_event_impl_base(net_id, send_map_ctx, recv_map_ctx,
            category, final_dest_lp, message_size, 1, offset, self_event_size,
452
453
454
            self_event, 0, NULL, sender);
}

455
456
457
458
459
460
461
462
void model_net_event_rc2(
        tw_lp *sender,
        model_net_event_return const * ret)
{
    for (int i = 0; i < *ret; i++)
        codes_local_latency_reverse(sender);
}

463
464
465
466
void model_net_event_rc(
        int net_id,
        tw_lp *sender,
        uint64_t message_size){
467
    model_net_event_impl_base_rc(sender);
468
469
470
471
472
}

void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender) {
473
    model_net_event_impl_base_rc(sender);
474
475
}

476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
void model_net_set_msg_param(
        enum msg_param_type type,
        int sub_type,
        const void * params){
    switch(type){
        case MN_MSG_PARAM_SCHED:
            is_msg_params_set[MN_MSG_PARAM_SCHED] = 1;
            switch(sub_type){
                case MN_SCHED_PARAM_PRIO:
                    sched_params.prio = *(int*)params;
                    break;
                default:
                    tw_error(TW_LOC, "unknown or unsupported "
                            "MN_MSG_PARAM_SCHED parameter type");
            }
            break;
        default:
            tw_error(TW_LOC, "unknown or unsupported msg_param_type");
    }
}

497
/* returns the message size, can be either simplenet, dragonfly or torus message size*/
498
int model_net_get_msg_sz(int net_id)
499
500
501
{
   // TODO: Add checks on network name
   // TODO: Add dragonfly and torus network models
502
503
   return sizeof(model_net_wrap_msg);
#if 0
504
   if(net_id < 0 || net_id >= MAX_NETS)
505
506
507
508
509
510
     {
      printf("%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
      exit(-1);
     }

       return method_array[net_id]->mn_get_msg_sz();
511
#endif
512
513
514
}

/* returns the packet size in the modelnet struct */
515
uint64_t model_net_get_packet_size(int net_id)
516
{
517
  if(net_id < 0 || net_id >= MAX_NETS)
518
519
520
521
522
523
524
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }
  return method_array[net_id]->packet_size; // TODO: where to set the packet size?
}

525
/* This event does a collective operation call for model-net */
526
void model_net_event_collective(int net_id, char const * category, int message_size, int remote_event_size, const void* remote_event, tw_lp* sender)
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
{
  if(net_id < 0 || net_id > MAX_NETS)
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }
  return method_array[net_id]->mn_collective_call(category, message_size, remote_event_size, remote_event, sender);
}

/* reverse event of the collective operation call */
void model_net_event_collective_rc(int net_id, int message_size, tw_lp* sender)
{
  if(net_id < 0 || net_id > MAX_NETS)
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }
  return method_array[net_id]->mn_collective_call_rc(message_size, sender);
}

547
548
549
/* returns lp type for modelnet */
const tw_lptype* model_net_get_lp_type(int net_id)
{
550
    if(net_id < 0 || net_id >= MAX_NETS)
551
552
553
554
555
556
557
558
559
560
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }

   // TODO: ADd checks by network names
   // Add dragonfly and torus network models
   return method_array[net_id]->mn_get_lp_type();
}

561
562
void model_net_report_stats(int net_id)
{
563
  if(net_id < 0 || net_id >= MAX_NETS)
564
565
566
567
568
569
570
  {
    fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
    exit(-1);
   }

     // TODO: ADd checks by network names
     //    // Add dragonfly and torus network models
571
572
   method_array[net_id]->mn_report_stats();
   return;
573
}
574

575
tw_lpid model_net_find_local_device(
576
577
578
        int          net_id,
        const char * annotation,
        int          ignore_annotations,
579
580
581
582
583
584
585
586
587
        tw_lpid      sender_gid)
{
    struct codes_mctx const * mc_p;
    struct codes_mctx mc;
    if (ignore_annotations)
        mc_p = CODES_MCTX_DEFAULT;
    else {
        mc = codes_mctx_set_group_modulo(annotation, ignore_annotations);
        mc_p = &mc;
588
    }
589
    return model_net_find_local_device_mctx(net_id, mc_p, sender_gid);
590
591
}

592
593
594
595
tw_lpid model_net_find_local_device_mctx(
        int net_id,
        struct codes_mctx const * map_ctx,
        tw_lpid sender_gid)
596
{
597
598
    return codes_mctx_to_lpid(map_ctx, model_net_lp_config_names[net_id],
            sender_gid);
599
600
}

601
602
603
604
605
606
607
608
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */