model-net.c 16 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5
 */
Philip Carns's avatar
Philip Carns committed
6

7
8
9
#include <string.h>
#include <assert.h>

10
#include "codes/model-net.h"
11
#include "codes/model-net-method.h"
12
13
#include "codes/model-net-lp.h"
#include "codes/codes.h"
14

15
16
17
18
#define STR_SIZE 16
#define PROC_TIME 10.0

extern struct model_net_method simplenet_method;
19
extern struct model_net_method simplewan_method;
20
extern struct model_net_method torus_method;
21
extern struct model_net_method dragonfly_method;
Philip Carns's avatar
Philip Carns committed
22
extern struct model_net_method loggp_method;
23

24
25
26
27
28
29
30
31
32
33
34
35
#define X(a,b,c,d) b,
char * model_net_lp_config_names[] = {
    NETWORK_DEF
};
#undef X

#define X(a,b,c,d) c,
char * model_net_method_names[] = {
    NETWORK_DEF
};
#undef X

36
/* Global array initialization, terminated with a NULL entry */
37
38
39
40
41
#define X(a,b,c,d) d,
struct model_net_method* method_array[] = { 
    NETWORK_DEF
};
#undef X
42

43
44
45
int in_sequence = 0;
tw_stime mn_msg_offset = 0.0;

46
int model_net_setup(char* name,
47
		    uint64_t packet_size,
48
49
		    const void* net_params)
{
50
     int i;
51
52
53
    /* find struct for underlying method (according to configuration file) */
     for(i=0; method_array[i] != NULL; i++)
     {
54
     	if(strcmp(model_net_method_names[i], name) == 0)
55
56
57
58
59
60
	{
	   method_array[i]->mn_setup(net_params);
	   method_array[i]->packet_size = packet_size;
	   return(i);
	}
     }
61
     fprintf(stderr, "Error: undefined network name %s (Available options simplenet, torus, dragonfly) \n", name);
62
63
64
     return -1; // indicating error
}

65
66
67
int model_net_get_id(char *name){
    int i;
    for(i=0; method_array[i] != NULL; i++) {
68
        if(strcmp(model_net_method_names[i], name) == 0) {
69
70
71
72
73
74
            return i;
        }
    }
    return -1;
}

75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
void model_net_write_stats(tw_lpid lpid, struct mn_stats* stat)
{
    int ret;
    char id[32];
    char data[1024];

    sprintf(id, "model-net-category-%s", stat->category);
    sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t" 
        "recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
        (long)lpid,
        stat->send_count,
        stat->send_bytes,
        stat->send_time,
        stat->recv_count,
        stat->recv_bytes,
        stat->recv_time,
        stat->max_event_size);

    ret = lp_io_write(lpid, id, strlen(data), data);
    assert(ret == 0);

    return;
}

void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[])
{

    int i;
    struct mn_stats all;

    memset(&all, 0, sizeof(all));
    sprintf(all.category, "all");

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) > 0)
        {
            all.send_count += mn_stats_array[i].send_count;
            all.send_bytes += mn_stats_array[i].send_bytes;
            all.send_time += mn_stats_array[i].send_time;
            all.recv_count += mn_stats_array[i].recv_count;
            all.recv_bytes += mn_stats_array[i].recv_bytes;
            all.recv_time += mn_stats_array[i].recv_time;
            if(mn_stats_array[i].max_event_size > all.max_event_size)
                all.max_event_size = mn_stats_array[i].max_event_size;

            model_net_write_stats(lpid, &mn_stats_array[i]);
        }
    }
    model_net_write_stats(lpid, &all);
}

struct mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[])
{
    int i;
    int new_flag = 0;
    int found_flag = 0;

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 1;
            break;
        }
        if(strcmp(category, mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 0;
            break;
        }
    }
    assert(found_flag);

    if(new_flag)
    {
        strcpy(mn_stats_array[i].category, category);
    }
    return(&mn_stats_array[i]);
}

157
static void model_net_event_impl_base(
158
159
160
161
162
163
164
165
166
167
168
        int net_id,
        char* category, 
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        int is_pull,
        tw_stime offset,
        int remote_event_size,
        const void* remote_event,
        int self_event_size,
        const void* self_event,
        tw_lp *sender) {
169

170
171
172
173
174
175
176
177
    if (remote_event_size + self_event_size + sizeof(model_net_wrap_msg) 
            > g_tw_msg_sz){
        tw_error(TW_LOC, "Error: model_net trying to transmit an event of size "
                         "%d but ROSS is configured for events of size %zd\n",
                         remote_event_size+self_event_size+sizeof(model_net_wrap_msg),
                         g_tw_msg_sz);
        return;
    }
178

179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
    tw_lpid mn_lp = model_net_find_local_device(net_id, sender);
    tw_stime poffset = codes_local_latency(sender);
    if (in_sequence){
        tw_stime tmp = mn_msg_offset;
        mn_msg_offset += poffset;
        poffset += tmp;
    }
    tw_event *e = codes_event_new(mn_lp, poffset+offset, sender);

    model_net_wrap_msg *m = tw_event_data(e);
    m->event_type = MN_BASE_NEW_MSG;
    m->magic = model_net_base_magic;

    m->msg.m_base.src = sender->gid;
    
    // set the request struct 
    model_net_request *r = &m->msg.m_base.u.req;
    r->net_id = net_id;
    r->final_dest_lp = final_dest_lp;
    r->msg_size = message_size;
    r->remote_event_size = remote_event_size;
    r->self_event_size = self_event_size;
    strncpy(r->category, category, CATEGORY_NAME_MAX-1);
    r->category[CATEGORY_NAME_MAX-1]='\0';
    
    void *e_msg = (m+1);
    if (remote_event_size > 0){
        memcpy(e_msg, remote_event, remote_event_size);
        e_msg = (char*)e_msg + remote_event_size; 
    }
    if (self_event_size > 0){
        memcpy(e_msg, self_event, self_event_size);
    }
212

213
214
215
216
217
    //print_base(m);
    tw_event_send(e);
}
static void model_net_event_impl_base_rc(tw_lp *sender){
    codes_local_latency_reverse(sender);
218
}
219
220
221
222
223
224
225
226
227
228
229
230
231

void model_net_event(
    int net_id,
    char* category, 
    tw_lpid final_dest_lp, 
    uint64_t message_size, 
    tw_stime offset,
    int remote_event_size,
    const void* remote_event,
    int self_event_size,
    const void* self_event,
    tw_lp *sender)
{
232
233
234
    model_net_event_impl_base(net_id, category, final_dest_lp, message_size,
            0, offset, remote_event_size, remote_event, self_event_size,
            self_event, sender);
235
236
237
238
239
240
241
242
243
244
245
246
247
}

void model_net_pull_event(
        int net_id,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
        const void *self_event,
        tw_lp *sender){
    /* NOTE: for a pull, we are filling the *remote* event - it will be remote
     * from the destination's POV */
248
249
    model_net_event_impl_base(net_id, category, final_dest_lp, message_size,
            1, offset, self_event_size, self_event, 0, NULL, sender);
250
251
252
}


253
254
255
int model_net_set_params()
{
  char mn_name[MAX_NAME_LENGTH];
256
257
  long int packet_size_l = 0;
  uint64_t packet_size;
258
  int net_id=-1;
259
260
261
262

  config_lpgroups_t paramconf;
  configuration_get_lpgroups(&config, "PARAMS", &paramconf);
  configuration_get_value(&config, "PARAMS", "modelnet", mn_name, MAX_NAME_LENGTH);
263
264
  configuration_get_value_longint(&config, "PARAMS", "packet_size", &packet_size_l);
  packet_size = packet_size_l;
265

266
267
268
  if(!packet_size)
  {
	packet_size = 512;
269
	printf("\n Warning, no packet size specified, setting packet size to %llu ", packet_size);
270
  }
271
  if(strcmp(model_net_method_names[SIMPLENET],mn_name)==0)
272
273
274
275
276
277
   {
     double net_startup_ns, net_bw_mbps;
     simplenet_param net_params;
     
     configuration_get_value_double(&config, "PARAMS", "net_startup_ns", &net_startup_ns);
     configuration_get_value_double(&config, "PARAMS", "net_bw_mbps", &net_bw_mbps);
278
279
     net_params.net_startup_ns = net_startup_ns;
     net_params.net_bw_mbps =  net_bw_mbps;
280
     net_id = model_net_setup(model_net_method_names[SIMPLENET], packet_size, (const void*)&net_params); /* Sets the network as simplenet and packet size 512 */
281
   }
282
  else if (strcmp(model_net_method_names[SIMPLEWAN],mn_name)==0){
283
284
285
    simplewan_param net_params;
    configuration_get_value_relpath(&config, "PARAMS", "net_startup_ns_file", net_params.startup_filename, MAX_NAME_LENGTH);
    configuration_get_value_relpath(&config, "PARAMS", "net_bw_mbps_file", net_params.bw_filename, MAX_NAME_LENGTH);
286
    net_id = model_net_setup(model_net_method_names[SIMPLEWAN], packet_size, (const void*)&net_params);
287
  }
288
   else if(strcmp(model_net_method_names[LOGGP],mn_name)==0)
Philip Carns's avatar
Philip Carns committed
289
290
291
292
   {
     char net_config_file[256];
     loggp_param net_params;
     
293
     configuration_get_value_relpath(&config, "PARAMS", "net_config_file", net_config_file, 256);
Philip Carns's avatar
Philip Carns committed
294
     net_params.net_config_file = net_config_file;
295
     net_id = model_net_setup(model_net_method_names[LOGGP], packet_size, (const void*)&net_params); /* Sets the network as loggp and packet size 512 */
Philip Carns's avatar
Philip Carns committed
296
297
   }

298
  else if(strcmp(model_net_method_names[DRAGONFLY], mn_name)==0)	  
299
    {
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
       dragonfly_param net_params;
       int num_routers=0, num_vcs=0, local_vc_size=0, global_vc_size=0, cn_vc_size=0;
       double local_bandwidth=0.0, cn_bandwidth=0.0, global_bandwidth=0.0;
       
       configuration_get_value_int(&config, "PARAMS", "num_routers", &num_routers);
       if(!num_routers)
	{
	   num_routers = 4; 
	   printf("\n Number of dimensions not specified, setting to %d ", num_routers);
        } 
       net_params.num_routers = num_routers; 

       configuration_get_value_int(&config, "PARAMS", "num_vcs", &num_vcs);
       if(!num_vcs)
       {
          num_vcs = 1;
	  printf("\n Number of virtual channels not specified, setting to %d ", num_vcs);
       }
       net_params.num_vcs = num_vcs;

       configuration_get_value_int(&config, "PARAMS", "local_vc_size", &local_vc_size);
       if(!local_vc_size)
	{
	   local_vc_size = 1024;
	   printf("\n Buffer size of local channels not specified, setting to %d ", local_vc_size);
	}
       net_params.local_vc_size = local_vc_size;

       configuration_get_value_int(&config, "PARAMS", "global_vc_size", &global_vc_size);
       if(!global_vc_size)
	{
	  global_vc_size = 2048;
	  printf("\n Buffer size of global channels not specified, setting to %d ", global_vc_size);
	}
       net_params.global_vc_size = global_vc_size;

       configuration_get_value_int(&config, "PARAMS", "cn_vc_size", &cn_vc_size);
       if(!cn_vc_size)
	 {
	    cn_vc_size = 1024;
	    printf("\n Buffer size of compute node channels not specified, setting to %d ", cn_vc_size);
	 }
       net_params.cn_vc_size = cn_vc_size;

	configuration_get_value_double(&config, "PARAMS", "local_bandwidth", &local_bandwidth);
        if(!local_bandwidth)
	  {
	    local_bandwidth = 5.25;
	    printf("\n Bandwidth of local channels not specified, setting to %lf ", local_bandwidth);
	 }
       net_params.local_bandwidth = local_bandwidth;

       configuration_get_value_double(&config, "PARAMS", "global_bandwidth", &global_bandwidth);
        if(!global_bandwidth)
	{
	     global_bandwidth = 4.7;
	     printf("\n Bandwidth of global channels not specified, setting to %lf ", global_bandwidth);
	}
	net_params.global_bandwidth = global_bandwidth;

	configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", &cn_bandwidth);
	if(!cn_bandwidth)
	 {
	     cn_bandwidth = 5.25;
	     printf("\n Bandwidth of compute node channels not specified, setting to %lf ", cn_bandwidth);
	}
	net_params.cn_bandwidth = cn_bandwidth;

       char routing[MAX_NAME_LENGTH];
       configuration_get_value(&config, "PARAMS", "routing", routing, MAX_NAME_LENGTH);
       if(strcmp(routing, "minimal") == 0)
	   net_params.routing = 0;
       else if(strcmp(routing, "nonminimal")==0 || strcmp(routing,"non-minimal")==0)
	       net_params.routing = 1;
       else
       {
       	   printf("\n No routing protocol specified, setting to minimal routing");
   	   net_params.routing = 0;	   
       }
379
    net_id = model_net_setup(model_net_method_names[DRAGONFLY], packet_size, (const void*)&net_params);   
380
    }
381
   else if(strcmp(model_net_method_names[TORUS], mn_name)==0)
382
     {
383
384
	torus_param net_params;
	char dim_length[MAX_NAME_LENGTH];
385
	int n_dims=0, buffer_size=0, num_vc=0, i=0, chunk_size = 0;
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
	double link_bandwidth=0;

	configuration_get_value_int(&config, "PARAMS", "n_dims", &n_dims);
	if(!n_dims)
	{
	   n_dims = 4; /* a 4-D torus */
	   printf("\n Number of dimensions not specified, setting to %d ", n_dims);
	}
	
	configuration_get_value_double(&config, "PARAMS", "link_bandwidth", &link_bandwidth);	
	if(!link_bandwidth)
	{
		link_bandwidth = 2.0; /*default bg/q configuration */
		printf("\n Link bandwidth not specified, setting to %lf ", link_bandwidth);
	}

	configuration_get_value_int(&config, "PARAMS", "buffer_size", &buffer_size);
	if(!buffer_size)
	{
		buffer_size = 2048;
		printf("\n Buffer size not specified, setting to %d ",buffer_size);
	}

409
410
411
412
413
414
	configuration_get_value_int(&config, "PARAMS", "chunk_size", &chunk_size);
	if(!chunk_size)
	{
	       chunk_size = 32;
	       printf("\n Chunk size not specified, setting to %d ", chunk_size);
	}
415
416
417
418
419
420
421
422
423
424
425
426
	configuration_get_value_int(&config, "PARAMS", "num_vc", &num_vc);
	if(!num_vc)
	{
		num_vc = 1; /*by default, we have one for taking packets, another for taking credit*/
		printf("\n num_vc not specified, setting to %d ", num_vc);
	}

        configuration_get_value(&config, "PARAMS", "dim_length", dim_length, MAX_NAME_LENGTH);
        char* token;
	net_params.n_dims=n_dims;
	net_params.num_vc=num_vc;
	net_params.buffer_size=buffer_size;
427
	net_params.chunk_size = chunk_size;
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
	net_params.link_bandwidth=link_bandwidth;
	net_params.dim_length=malloc(n_dims*sizeof(int));
        token = strtok(dim_length, ",");	
	while(token != NULL)
	{
	   sscanf(token, "%d", &net_params.dim_length[i]);
	   if(!net_params.dim_length[i])
	   {
	      printf("\n Invalid torus dimension specified %d, exitting... ", net_params.dim_length[i]);
	      MPI_Finalize();
	      exit(-1);
	   }
	   i++;
	   token = strtok(NULL,",");
	}
443
	net_id = model_net_setup(model_net_method_names[TORUS], packet_size, (const void*)&net_params);
444
445
446
     }
  else
       printf("\n Invalid network argument %s ", mn_name);
447
  model_net_base_init();
448
449
  return net_id;
}
450

451
452
453
454
void model_net_event_rc(
        int net_id,
        tw_lp *sender,
        uint64_t message_size){
455
    model_net_event_impl_base_rc(sender);
456
457
458
459
460
}

void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender) {
461
    model_net_event_impl_base_rc(sender);
462
463
}

464
/* returns the message size, can be either simplenet, dragonfly or torus message size*/
465
int model_net_get_msg_sz(int net_id)
466
467
468
{
   // TODO: Add checks on network name
   // TODO: Add dragonfly and torus network models
469
470
   return sizeof(model_net_wrap_msg);
#if 0
471
   if(net_id < 0 || net_id >= MAX_NETS)
472
473
474
475
476
477
     {
      printf("%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
      exit(-1);
     }

       return method_array[net_id]->mn_get_msg_sz();
478
#endif
479
480
481
}

/* returns the packet size in the modelnet struct */
482
uint64_t model_net_get_packet_size(int net_id)
483
{
484
  if(net_id < 0 || net_id >= MAX_NETS)
485
486
487
488
489
490
491
492
493
494
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }
  return method_array[net_id]->packet_size; // TODO: where to set the packet size?
}

/* returns lp type for modelnet */
const tw_lptype* model_net_get_lp_type(int net_id)
{
495
    if(net_id < 0 || net_id >= MAX_NETS)
496
497
498
499
500
501
502
503
504
505
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }

   // TODO: ADd checks by network names
   // Add dragonfly and torus network models
   return method_array[net_id]->mn_get_lp_type();
}

506
507
void model_net_report_stats(int net_id)
{
508
  if(net_id < 0 || net_id >= MAX_NETS)
509
510
511
512
513
514
515
  {
    fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
    exit(-1);
   }

     // TODO: ADd checks by network names
     //    // Add dragonfly and torus network models
516
517
   method_array[net_id]->mn_report_stats();
   return;
518
}
519
520
521
522
523
524

tw_lpid model_net_find_local_device(int net_id, tw_lp *sender)
{
    return(method_array[net_id]->model_net_method_find_local_device(sender));
}

525
526
527
528
529
530
531
532
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */