model-net.c 17 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5
 */
Philip Carns's avatar
Philip Carns committed
6

7
8
9
#include <string.h>
#include <assert.h>

10
#include "codes/model-net.h"
11
#include "codes/model-net-method.h"
12
#include "codes/model-net-lp.h"
13
#include "codes/model-net-sched.h"
14
#include "codes/codes.h"
15

16
17
18
19
#define STR_SIZE 16
#define PROC_TIME 10.0

extern struct model_net_method simplenet_method;
20
extern struct model_net_method simplewan_method;
21
extern struct model_net_method torus_method;
22
extern struct model_net_method dragonfly_method;
Philip Carns's avatar
Philip Carns committed
23
extern struct model_net_method loggp_method;
24

25
26
27
28
29
30
31
32
33
34
35
36
#define X(a,b,c,d) b,
char * model_net_lp_config_names[] = {
    NETWORK_DEF
};
#undef X

#define X(a,b,c,d) c,
char * model_net_method_names[] = {
    NETWORK_DEF
};
#undef X

37
/* Global array initialization, terminated with a NULL entry */
38
39
40
41
42
#define X(a,b,c,d) d,
struct model_net_method* method_array[] = { 
    NETWORK_DEF
};
#undef X
43

44
45
46
int in_sequence = 0;
tw_stime mn_msg_offset = 0.0;

47
int model_net_setup(char* name,
48
		    uint64_t packet_size,
49
50
		    const void* net_params)
{
51
     int i;
52
53
54
    /* find struct for underlying method (according to configuration file) */
     for(i=0; method_array[i] != NULL; i++)
     {
55
     	if(strcmp(model_net_method_names[i], name) == 0)
56
57
58
59
60
61
	{
	   method_array[i]->mn_setup(net_params);
	   method_array[i]->packet_size = packet_size;
	   return(i);
	}
     }
62
     fprintf(stderr, "Error: undefined network name %s (Available options simplenet, torus, dragonfly) \n", name);
63
64
65
     return -1; // indicating error
}

66
67
68
int model_net_get_id(char *name){
    int i;
    for(i=0; method_array[i] != NULL; i++) {
69
        if(strcmp(model_net_method_names[i], name) == 0) {
70
71
72
73
74
75
            return i;
        }
    }
    return -1;
}

76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
void model_net_write_stats(tw_lpid lpid, struct mn_stats* stat)
{
    int ret;
    char id[32];
    char data[1024];

    sprintf(id, "model-net-category-%s", stat->category);
    sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t" 
        "recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
        (long)lpid,
        stat->send_count,
        stat->send_bytes,
        stat->send_time,
        stat->recv_count,
        stat->recv_bytes,
        stat->recv_time,
        stat->max_event_size);

    ret = lp_io_write(lpid, id, strlen(data), data);
    assert(ret == 0);

    return;
}

void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[])
{

    int i;
    struct mn_stats all;

    memset(&all, 0, sizeof(all));
    sprintf(all.category, "all");

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) > 0)
        {
            all.send_count += mn_stats_array[i].send_count;
            all.send_bytes += mn_stats_array[i].send_bytes;
            all.send_time += mn_stats_array[i].send_time;
            all.recv_count += mn_stats_array[i].recv_count;
            all.recv_bytes += mn_stats_array[i].recv_bytes;
            all.recv_time += mn_stats_array[i].recv_time;
            if(mn_stats_array[i].max_event_size > all.max_event_size)
                all.max_event_size = mn_stats_array[i].max_event_size;

            model_net_write_stats(lpid, &mn_stats_array[i]);
        }
    }
    model_net_write_stats(lpid, &all);
}

struct mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[])
{
    int i;
    int new_flag = 0;
    int found_flag = 0;

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 1;
            break;
        }
        if(strcmp(category, mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 0;
            break;
        }
    }
    assert(found_flag);

    if(new_flag)
    {
        strcpy(mn_stats_array[i].category, category);
    }
    return(&mn_stats_array[i]);
}

158
static void model_net_event_impl_base(
159
160
161
162
163
164
165
166
167
168
169
        int net_id,
        char* category, 
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        int is_pull,
        tw_stime offset,
        int remote_event_size,
        const void* remote_event,
        int self_event_size,
        const void* self_event,
        tw_lp *sender) {
170

171
172
173
174
175
176
177
178
    if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg) 
            > g_tw_msg_sz){
        tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
                         "%d but ROSS is configured for events of size %zd\n",
                         remote_event_size+self_event_size+sizeof(model_net_wrap_msg),
                         g_tw_msg_sz);
        return;
    }
179

180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
    tw_lpid mn_lp = model_net_find_local_device(net_id, sender);
    tw_stime poffset = codes_local_latency(sender);
    if (in_sequence){
        tw_stime tmp = mn_msg_offset;
        mn_msg_offset += poffset;
        poffset += tmp;
    }
    tw_event *e = codes_event_new(mn_lp, poffset+offset, sender);

    model_net_wrap_msg *m = tw_event_data(e);
    m->event_type = MN_BASE_NEW_MSG;
    m->magic = model_net_base_magic;

    // set the request struct 
    model_net_request *r = &m->msg.m_base.u.req;
    r->net_id = net_id;
196
    r->packet_size = model_net_get_packet_size(net_id);
197
    r->final_dest_lp = final_dest_lp;
198
    r->src_lp = sender->gid;
199
200
201
    r->msg_size = message_size;
    r->remote_event_size = remote_event_size;
    r->self_event_size = self_event_size;
202
    r->is_pull = is_pull;
203
204
205
206
207
208
209
210
211
212
213
    strncpy(r->category, category, CATEGORY_NAME_MAX-1);
    r->category[CATEGORY_NAME_MAX-1]='\0';
    
    void *e_msg = (m+1);
    if (remote_event_size > 0){
        memcpy(e_msg, remote_event, remote_event_size);
        e_msg = (char*)e_msg + remote_event_size; 
    }
    if (self_event_size > 0){
        memcpy(e_msg, self_event, self_event_size);
    }
214

215
216
217
218
219
    //print_base(m);
    tw_event_send(e);
}
static void model_net_event_impl_base_rc(tw_lp *sender){
    codes_local_latency_reverse(sender);
220
}
221
222
223
224
225
226
227
228
229
230
231
232
233

void model_net_event(
    int net_id,
    char* category, 
    tw_lpid final_dest_lp, 
    uint64_t message_size, 
    tw_stime offset,
    int remote_event_size,
    const void* remote_event,
    int self_event_size,
    const void* self_event,
    tw_lp *sender)
{
234
235
236
    model_net_event_impl_base(net_id, category, final_dest_lp, message_size,
            0, offset, remote_event_size, remote_event, self_event_size,
            self_event, sender);
237
238
239
240
241
242
243
244
245
246
247
248
249
}

void model_net_pull_event(
        int net_id,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
        const void *self_event,
        tw_lp *sender){
    /* NOTE: for a pull, we are filling the *remote* event - it will be remote
     * from the destination's POV */
250
251
    model_net_event_impl_base(net_id, category, final_dest_lp, message_size,
            1, offset, self_event_size, self_event, 0, NULL, sender);
252
253
254
}


255
256
257
int model_net_set_params()
{
  char mn_name[MAX_NAME_LENGTH];
258
  char sched[MAX_NAME_LENGTH];
259
260
  long int packet_size_l = 0;
  uint64_t packet_size;
261
  int net_id=-1;
262
  int ret;
263
264
265
266

  config_lpgroups_t paramconf;
  configuration_get_lpgroups(&config, "PARAMS", &paramconf);
  configuration_get_value(&config, "PARAMS", "modelnet", mn_name, MAX_NAME_LENGTH);
267
268
  ret = configuration_get_value(&config, "PARAMS", "modelnet_scheduler", sched,
          MAX_NAME_LENGTH);
269
270

  configuration_get_value_longint(&config, "PARAMS", "packet_size", &packet_size_l);
271
  packet_size = packet_size_l;
272

273
    if (ret > 0){
274
275
276
277
278
279
        int i;
        for (i = 0; i < MAX_SCHEDS; i++){
            if (strcmp(sched_names[i], sched) == 0){
                mn_sched_type = i;
                break;
            }
280
        }
281
282
283
        if (i == MAX_SCHEDS){
            fprintf(stderr, 
                    "Unknown value for PARAMS:modelnet-scheduler : %s\n", 
284
285
286
287
288
289
290
291
292
                    sched);
            abort();
        }
    }
    else{
        // default: FCFS
        mn_sched_type = MN_SCHED_FCFS;
    }

293
294
295
296
297
298
299
300
301
302
303
    if (mn_sched_type == MN_SCHED_FCFS_FULL){
        // override packet size to something huge (leave a bit in the unlikely
        // case that an op using packet size causes overflow)
        packet_size = 1ull << 62;
    }
    else if (!packet_size && mn_sched_type != MN_SCHED_FCFS_FULL)
    {
        packet_size = 512;
        fprintf(stderr, "\n Warning, no packet size specified, setting packet size to %llu ", packet_size);
    }

304
  if(strcmp(model_net_method_names[SIMPLENET],mn_name)==0)
305
306
307
308
309
310
   {
     double net_startup_ns, net_bw_mbps;
     simplenet_param net_params;
     
     configuration_get_value_double(&config, "PARAMS", "net_startup_ns", &net_startup_ns);
     configuration_get_value_double(&config, "PARAMS", "net_bw_mbps", &net_bw_mbps);
311
312
     net_params.net_startup_ns = net_startup_ns;
     net_params.net_bw_mbps =  net_bw_mbps;
313
     net_id = model_net_setup(model_net_method_names[SIMPLENET], packet_size, (const void*)&net_params); /* Sets the network as simplenet and packet size 512 */
314
   }
315
  else if (strcmp(model_net_method_names[SIMPLEWAN],mn_name)==0){
316
317
318
    simplewan_param net_params;
    configuration_get_value_relpath(&config, "PARAMS", "net_startup_ns_file", net_params.startup_filename, MAX_NAME_LENGTH);
    configuration_get_value_relpath(&config, "PARAMS", "net_bw_mbps_file", net_params.bw_filename, MAX_NAME_LENGTH);
319
    net_id = model_net_setup(model_net_method_names[SIMPLEWAN], packet_size, (const void*)&net_params);
320
  }
321
   else if(strcmp(model_net_method_names[LOGGP],mn_name)==0)
Philip Carns's avatar
Philip Carns committed
322
323
324
325
   {
     char net_config_file[256];
     loggp_param net_params;
     
326
     configuration_get_value_relpath(&config, "PARAMS", "net_config_file", net_config_file, 256);
Philip Carns's avatar
Philip Carns committed
327
     net_params.net_config_file = net_config_file;
328
     net_id = model_net_setup(model_net_method_names[LOGGP], packet_size, (const void*)&net_params); /* Sets the network as loggp and packet size 512 */
Philip Carns's avatar
Philip Carns committed
329
330
   }

331
  else if(strcmp(model_net_method_names[DRAGONFLY], mn_name)==0)	  
332
    {
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
       dragonfly_param net_params;
       int num_routers=0, num_vcs=0, local_vc_size=0, global_vc_size=0, cn_vc_size=0;
       double local_bandwidth=0.0, cn_bandwidth=0.0, global_bandwidth=0.0;
       
       configuration_get_value_int(&config, "PARAMS", "num_routers", &num_routers);
       if(!num_routers)
	{
	   num_routers = 4; 
	   printf("\n Number of dimensions not specified, setting to %d ", num_routers);
        } 
       net_params.num_routers = num_routers; 

       configuration_get_value_int(&config, "PARAMS", "num_vcs", &num_vcs);
       if(!num_vcs)
       {
          num_vcs = 1;
	  printf("\n Number of virtual channels not specified, setting to %d ", num_vcs);
       }
       net_params.num_vcs = num_vcs;

       configuration_get_value_int(&config, "PARAMS", "local_vc_size", &local_vc_size);
       if(!local_vc_size)
	{
	   local_vc_size = 1024;
	   printf("\n Buffer size of local channels not specified, setting to %d ", local_vc_size);
	}
       net_params.local_vc_size = local_vc_size;

       configuration_get_value_int(&config, "PARAMS", "global_vc_size", &global_vc_size);
       if(!global_vc_size)
	{
	  global_vc_size = 2048;
	  printf("\n Buffer size of global channels not specified, setting to %d ", global_vc_size);
	}
       net_params.global_vc_size = global_vc_size;

       configuration_get_value_int(&config, "PARAMS", "cn_vc_size", &cn_vc_size);
       if(!cn_vc_size)
	 {
	    cn_vc_size = 1024;
	    printf("\n Buffer size of compute node channels not specified, setting to %d ", cn_vc_size);
	 }
       net_params.cn_vc_size = cn_vc_size;

	configuration_get_value_double(&config, "PARAMS", "local_bandwidth", &local_bandwidth);
        if(!local_bandwidth)
	  {
	    local_bandwidth = 5.25;
	    printf("\n Bandwidth of local channels not specified, setting to %lf ", local_bandwidth);
	 }
       net_params.local_bandwidth = local_bandwidth;

       configuration_get_value_double(&config, "PARAMS", "global_bandwidth", &global_bandwidth);
        if(!global_bandwidth)
	{
	     global_bandwidth = 4.7;
	     printf("\n Bandwidth of global channels not specified, setting to %lf ", global_bandwidth);
	}
	net_params.global_bandwidth = global_bandwidth;

	configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", &cn_bandwidth);
	if(!cn_bandwidth)
	 {
	     cn_bandwidth = 5.25;
	     printf("\n Bandwidth of compute node channels not specified, setting to %lf ", cn_bandwidth);
	}
	net_params.cn_bandwidth = cn_bandwidth;

       char routing[MAX_NAME_LENGTH];
       configuration_get_value(&config, "PARAMS", "routing", routing, MAX_NAME_LENGTH);
       if(strcmp(routing, "minimal") == 0)
	   net_params.routing = 0;
       else if(strcmp(routing, "nonminimal")==0 || strcmp(routing,"non-minimal")==0)
	       net_params.routing = 1;
       else
       {
       	   printf("\n No routing protocol specified, setting to minimal routing");
   	   net_params.routing = 0;	   
       }
412
    net_id = model_net_setup(model_net_method_names[DRAGONFLY], packet_size, (const void*)&net_params);   
413
    }
414
   else if(strcmp(model_net_method_names[TORUS], mn_name)==0)
415
     {
416
417
	torus_param net_params;
	char dim_length[MAX_NAME_LENGTH];
418
	int n_dims=0, buffer_size=0, num_vc=0, i=0, chunk_size = 0;
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
	double link_bandwidth=0;

	configuration_get_value_int(&config, "PARAMS", "n_dims", &n_dims);
	if(!n_dims)
	{
	   n_dims = 4; /* a 4-D torus */
	   printf("\n Number of dimensions not specified, setting to %d ", n_dims);
	}
	
	configuration_get_value_double(&config, "PARAMS", "link_bandwidth", &link_bandwidth);	
	if(!link_bandwidth)
	{
		link_bandwidth = 2.0; /*default bg/q configuration */
		printf("\n Link bandwidth not specified, setting to %lf ", link_bandwidth);
	}

	configuration_get_value_int(&config, "PARAMS", "buffer_size", &buffer_size);
	if(!buffer_size)
	{
		buffer_size = 2048;
		printf("\n Buffer size not specified, setting to %d ",buffer_size);
	}

442
443
444
445
446
447
	configuration_get_value_int(&config, "PARAMS", "chunk_size", &chunk_size);
	if(!chunk_size)
	{
	       chunk_size = 32;
	       printf("\n Chunk size not specified, setting to %d ", chunk_size);
	}
448
449
450
451
452
453
454
455
456
457
458
459
	configuration_get_value_int(&config, "PARAMS", "num_vc", &num_vc);
	if(!num_vc)
	{
		num_vc = 1; /*by default, we have one for taking packets, another for taking credit*/
		printf("\n num_vc not specified, setting to %d ", num_vc);
	}

        configuration_get_value(&config, "PARAMS", "dim_length", dim_length, MAX_NAME_LENGTH);
        char* token;
	net_params.n_dims=n_dims;
	net_params.num_vc=num_vc;
	net_params.buffer_size=buffer_size;
460
	net_params.chunk_size = chunk_size;
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
	net_params.link_bandwidth=link_bandwidth;
	net_params.dim_length=malloc(n_dims*sizeof(int));
        token = strtok(dim_length, ",");	
	while(token != NULL)
	{
	   sscanf(token, "%d", &net_params.dim_length[i]);
	   if(!net_params.dim_length[i])
	   {
	      printf("\n Invalid torus dimension specified %d, exitting... ", net_params.dim_length[i]);
	      MPI_Finalize();
	      exit(-1);
	   }
	   i++;
	   token = strtok(NULL,",");
	}
476
	net_id = model_net_setup(model_net_method_names[TORUS], packet_size, (const void*)&net_params);
477
478
479
     }
  else
       printf("\n Invalid network argument %s ", mn_name);
480
  model_net_base_init();
481
482
  return net_id;
}
483

484
485
486
487
void model_net_event_rc(
        int net_id,
        tw_lp *sender,
        uint64_t message_size){
488
    model_net_event_impl_base_rc(sender);
489
490
491
492
493
}

void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender) {
494
    model_net_event_impl_base_rc(sender);
495
496
}

497
/* returns the message size, can be either simplenet, dragonfly or torus message size*/
498
int model_net_get_msg_sz(int net_id)
499
500
501
{
   // TODO: Add checks on network name
   // TODO: Add dragonfly and torus network models
502
503
   return sizeof(model_net_wrap_msg);
#if 0
504
   if(net_id < 0 || net_id >= MAX_NETS)
505
506
507
508
509
510
     {
      printf("%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
      exit(-1);
     }

       return method_array[net_id]->mn_get_msg_sz();
511
#endif
512
513
514
}

/* returns the packet size in the modelnet struct */
515
uint64_t model_net_get_packet_size(int net_id)
516
{
517
  if(net_id < 0 || net_id >= MAX_NETS)
518
519
520
521
522
523
524
525
526
527
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }
  return method_array[net_id]->packet_size; // TODO: where to set the packet size?
}

/* returns lp type for modelnet */
const tw_lptype* model_net_get_lp_type(int net_id)
{
528
    if(net_id < 0 || net_id >= MAX_NETS)
529
530
531
532
533
534
535
536
537
538
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }

   // TODO: ADd checks by network names
   // Add dragonfly and torus network models
   return method_array[net_id]->mn_get_lp_type();
}

539
540
void model_net_report_stats(int net_id)
{
541
  if(net_id < 0 || net_id >= MAX_NETS)
542
543
544
545
546
547
548
  {
    fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
    exit(-1);
   }

     // TODO: ADd checks by network names
     //    // Add dragonfly and torus network models
549
550
   method_array[net_id]->mn_report_stats();
   return;
551
}
552
553
554
555
556
557

tw_lpid model_net_find_local_device(int net_id, tw_lp *sender)
{
    return(method_array[net_id]->model_net_method_find_local_device(sender));
}

558
559
560
561
562
563
564
565
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */