model-net.c 17.8 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5
 */
Philip Carns's avatar
Philip Carns committed
6

7
8
9
#include <string.h>
#include <assert.h>

10
#include "codes/model-net.h"
11
#include "codes/model-net-method.h"
12

13
14
#define PULL_MSG_SIZE 128

15
16
17
18
#define STR_SIZE 16
#define PROC_TIME 10.0

extern struct model_net_method simplenet_method;
19
extern struct model_net_method simplewan_method;
20
extern struct model_net_method torus_method;
21
extern struct model_net_method dragonfly_method;
Philip Carns's avatar
Philip Carns committed
22
extern struct model_net_method loggp_method;
23
24
25

/* Global array initialization, terminated with a NULL entry */
static struct model_net_method* method_array[] =
26
    {&simplenet_method, &simplewan_method, &torus_method, &dragonfly_method, &loggp_method, NULL};
27

28
29
30
int in_sequence = 0;
tw_stime mn_msg_offset = 0.0;

31
static int model_net_get_msg_sz(int net_id);
32

33
int model_net_setup(char* name,
34
		    uint64_t packet_size,
35
36
		    const void* net_params)
{
37
     int i;
38
39
40
41
42
43
44
    /* find struct for underlying method (according to configuration file) */
     for(i=0; method_array[i] != NULL; i++)
     {
     	if(strcmp(method_array[i]->method_name, name) == 0)
	{
	   method_array[i]->mn_setup(net_params);
	   method_array[i]->packet_size = packet_size;
45
	   model_net_add_lp_type(i);
46
47
48
	   return(i);
	}
     }
49
     fprintf(stderr, "Error: undefined network name %s (Available options simplenet, torus, dragonfly) \n", name);
50
51
52
     return -1; // indicating error
}

53
54
55
56
57
58
59
60
61
62
int model_net_get_id(char *name){
    int i;
    for(i=0; method_array[i] != NULL; i++) {
        if(strcmp(method_array[i]->method_name, name) == 0) {
            return i;
        }
    }
    return -1;
}

63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
void model_net_write_stats(tw_lpid lpid, struct mn_stats* stat)
{
    int ret;
    char id[32];
    char data[1024];

    sprintf(id, "model-net-category-%s", stat->category);
    sprintf(data, "lp:%ld\tsend_count:%ld\tsend_bytes:%ld\tsend_time:%f\t" 
        "recv_count:%ld\trecv_bytes:%ld\trecv_time:%f\tmax_event_size:%ld\n",
        (long)lpid,
        stat->send_count,
        stat->send_bytes,
        stat->send_time,
        stat->recv_count,
        stat->recv_bytes,
        stat->recv_time,
        stat->max_event_size);

    ret = lp_io_write(lpid, id, strlen(data), data);
    assert(ret == 0);

    return;
}

void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[])
{

    int i;
    struct mn_stats all;

    memset(&all, 0, sizeof(all));
    sprintf(all.category, "all");

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) > 0)
        {
            all.send_count += mn_stats_array[i].send_count;
            all.send_bytes += mn_stats_array[i].send_bytes;
            all.send_time += mn_stats_array[i].send_time;
            all.recv_count += mn_stats_array[i].recv_count;
            all.recv_bytes += mn_stats_array[i].recv_bytes;
            all.recv_time += mn_stats_array[i].recv_time;
            if(mn_stats_array[i].max_event_size > all.max_event_size)
                all.max_event_size = mn_stats_array[i].max_event_size;

            model_net_write_stats(lpid, &mn_stats_array[i]);
        }
    }
    model_net_write_stats(lpid, &all);
}

struct mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[])
{
    int i;
    int new_flag = 0;
    int found_flag = 0;

    for(i=0; i<CATEGORY_MAX; i++)
    {
        if(strlen(mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 1;
            break;
        }
        if(strcmp(category, mn_stats_array[i].category) == 0)
        {
            found_flag = 1;
            new_flag = 0;
            break;
        }
    }
    assert(found_flag);

    if(new_flag)
    {
        strcpy(mn_stats_array[i].category, category);
    }
    return(&mn_stats_array[i]);
}

145
146
147
148
149
150
151
152
153
154
155
156
157
static void model_net_event_impl(
        int net_id,
        char* category, 
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        int is_pull,
        uint64_t pull_msg_size,
        tw_stime offset,
        int remote_event_size,
        const void* remote_event,
        int self_event_size,
        const void* self_event,
        tw_lp *sender) {
158
    /* determine packet size for underlying method */
159
160
161
     uint64_t packet_size = model_net_get_packet_size(net_id);
     uint64_t num_packets = message_size/packet_size; /* Number of packets to be issued by the API */
     uint64_t i;
162
163
     int last = 0;

164
     //printf("\n number of packets %d message size %d ", num_packets, message_size);
Philip Carns's avatar
Philip Carns committed
165
166
167
168
169
170
171
172
173
     if((remote_event_size + self_event_size + model_net_get_msg_sz(net_id))
        > g_tw_msg_sz)
     {
        fprintf(stderr, "Error: model_net trying to transmit an event of size %d but ROSS is configured for events of size %zd\n",
            (remote_event_size + self_event_size + model_net_get_msg_sz(net_id)),
            g_tw_msg_sz);
        abort();
     }

174
175
176
     if(message_size % packet_size)
	num_packets++; /* Handle the left out data if message size is not exactly divisible by packet size */

177
178
179
     if(message_size < packet_size)
         num_packets = 1;

180
     /*Determine the network name*/
181
     if(net_id < 0 || net_id >= MAX_NETS)
182
     {
183
        fprintf(stderr, "Error: undefined network ID %d (Available options 0 (simplenet), 1 (torus) 2 (dragonfly) ) \n", net_id);
184
185
186
187
188
189
190
191
192
193
194
	exit(-1);
     }

    /* issue N packets using method API */
    /* somehow mark the final packet as the one responsible for delivering
     * the self event and remote event 
     *
     * local event is delivered to caller of this function, remote event is
     * passed along through network hops and delivered to final_dest_lp
     */

195
     tw_stime poffset = (in_sequence) ? mn_msg_offset : 0.0;
196
197
198
199
200
201
202
203
204
205
     for( i = 0; i < num_packets; i++ )
       {
	  /*Mark the last packet to the net method API*/
	   if(i == num_packets - 1)
           {
	      last = 1;
              /* also calculate the last packet's size */
              packet_size = message_size - ((num_packets-1)*packet_size);
            }
	  /* Number of packets and packet ID is passed to the underlying network to mark the final packet for local event completion*/
206
207
208
	  poffset += method_array[net_id]->model_net_method_packet_event(category,
                  final_dest_lp, packet_size, is_pull, pull_msg_size, poffset+offset,
                  remote_event_size, remote_event, self_event_size, self_event, sender, last);
209
       }
210
    if (in_sequence) mn_msg_offset = poffset;
211
212
213
    return;
}

214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248

void model_net_event(
    int net_id,
    char* category, 
    tw_lpid final_dest_lp, 
    uint64_t message_size, 
    tw_stime offset,
    int remote_event_size,
    const void* remote_event,
    int self_event_size,
    const void* self_event,
    tw_lp *sender)
{
    model_net_event_impl(net_id, category, final_dest_lp, message_size, 0, 0,
            offset, remote_event_size, remote_event, self_event_size,
            self_event, sender); 
}

void model_net_pull_event(
        int net_id,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
        const void *self_event,
        tw_lp *sender){
    /* NOTE: for a pull, we are filling the *remote* event - it will be remote
     * from the destination's POV */
    model_net_event_impl(net_id, category, final_dest_lp, PULL_MSG_SIZE, 1,
            message_size, offset, self_event_size, self_event, 0, NULL,
            sender); 
}


249
250
251
int model_net_set_params()
{
  char mn_name[MAX_NAME_LENGTH];
252
253
  long int packet_size_l = 0;
  uint64_t packet_size;
254
  int net_id=-1;
255
256
257
258

  config_lpgroups_t paramconf;
  configuration_get_lpgroups(&config, "PARAMS", &paramconf);
  configuration_get_value(&config, "PARAMS", "modelnet", mn_name, MAX_NAME_LENGTH);
259
260
  configuration_get_value_longint(&config, "PARAMS", "packet_size", &packet_size_l);
  packet_size = packet_size_l;
261

262
263
264
  if(!packet_size)
  {
	packet_size = 512;
265
	printf("\n Warning, no packet size specified, setting packet size to %llu ", packet_size);
266
267
268
269
270
271
272
273
  }
  if(strcmp("simplenet",mn_name)==0)
   {
     double net_startup_ns, net_bw_mbps;
     simplenet_param net_params;
     
     configuration_get_value_double(&config, "PARAMS", "net_startup_ns", &net_startup_ns);
     configuration_get_value_double(&config, "PARAMS", "net_bw_mbps", &net_bw_mbps);
274
275
     net_params.net_startup_ns = net_startup_ns;
     net_params.net_bw_mbps =  net_bw_mbps;
276
277
     net_id = model_net_setup("simplenet", packet_size, (const void*)&net_params); /* Sets the network as simplenet and packet size 512 */
   }
278
279
280
281
282
283
  else if (strcmp("simplewan",mn_name)==0){
    simplewan_param net_params;
    configuration_get_value_relpath(&config, "PARAMS", "net_startup_ns_file", net_params.startup_filename, MAX_NAME_LENGTH);
    configuration_get_value_relpath(&config, "PARAMS", "net_bw_mbps_file", net_params.bw_filename, MAX_NAME_LENGTH);
    net_id = model_net_setup("simplewan", packet_size, (const void*)&net_params);
  }
Philip Carns's avatar
Philip Carns committed
284
285
286
287
288
   else if(strcmp("loggp",mn_name)==0)
   {
     char net_config_file[256];
     loggp_param net_params;
     
289
     configuration_get_value_relpath(&config, "PARAMS", "net_config_file", net_config_file, 256);
Philip Carns's avatar
Philip Carns committed
290
291
292
293
     net_params.net_config_file = net_config_file;
     net_id = model_net_setup("loggp", packet_size, (const void*)&net_params); /* Sets the network as loggp and packet size 512 */
   }

294
295
  else if(strcmp("dragonfly", mn_name)==0)	  
    {
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
       dragonfly_param net_params;
       int num_routers=0, num_vcs=0, local_vc_size=0, global_vc_size=0, cn_vc_size=0;
       double local_bandwidth=0.0, cn_bandwidth=0.0, global_bandwidth=0.0;
       
       configuration_get_value_int(&config, "PARAMS", "num_routers", &num_routers);
       if(!num_routers)
	{
	   num_routers = 4; 
	   printf("\n Number of dimensions not specified, setting to %d ", num_routers);
        } 
       net_params.num_routers = num_routers; 

       configuration_get_value_int(&config, "PARAMS", "num_vcs", &num_vcs);
       if(!num_vcs)
       {
          num_vcs = 1;
	  printf("\n Number of virtual channels not specified, setting to %d ", num_vcs);
       }
       net_params.num_vcs = num_vcs;

       configuration_get_value_int(&config, "PARAMS", "local_vc_size", &local_vc_size);
       if(!local_vc_size)
	{
	   local_vc_size = 1024;
	   printf("\n Buffer size of local channels not specified, setting to %d ", local_vc_size);
	}
       net_params.local_vc_size = local_vc_size;

       configuration_get_value_int(&config, "PARAMS", "global_vc_size", &global_vc_size);
       if(!global_vc_size)
	{
	  global_vc_size = 2048;
	  printf("\n Buffer size of global channels not specified, setting to %d ", global_vc_size);
	}
       net_params.global_vc_size = global_vc_size;

       configuration_get_value_int(&config, "PARAMS", "cn_vc_size", &cn_vc_size);
       if(!cn_vc_size)
	 {
	    cn_vc_size = 1024;
	    printf("\n Buffer size of compute node channels not specified, setting to %d ", cn_vc_size);
	 }
       net_params.cn_vc_size = cn_vc_size;

	configuration_get_value_double(&config, "PARAMS", "local_bandwidth", &local_bandwidth);
        if(!local_bandwidth)
	  {
	    local_bandwidth = 5.25;
	    printf("\n Bandwidth of local channels not specified, setting to %lf ", local_bandwidth);
	 }
       net_params.local_bandwidth = local_bandwidth;

       configuration_get_value_double(&config, "PARAMS", "global_bandwidth", &global_bandwidth);
        if(!global_bandwidth)
	{
	     global_bandwidth = 4.7;
	     printf("\n Bandwidth of global channels not specified, setting to %lf ", global_bandwidth);
	}
	net_params.global_bandwidth = global_bandwidth;

	configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", &cn_bandwidth);
	if(!cn_bandwidth)
	 {
	     cn_bandwidth = 5.25;
	     printf("\n Bandwidth of compute node channels not specified, setting to %lf ", cn_bandwidth);
	}
	net_params.cn_bandwidth = cn_bandwidth;

       char routing[MAX_NAME_LENGTH];
       configuration_get_value(&config, "PARAMS", "routing", routing, MAX_NAME_LENGTH);
       if(strcmp(routing, "minimal") == 0)
	   net_params.routing = 0;
       else if(strcmp(routing, "nonminimal")==0 || strcmp(routing,"non-minimal")==0)
	       net_params.routing = 1;
       else
       {
       	   printf("\n No routing protocol specified, setting to minimal routing");
   	   net_params.routing = 0;	   
       }
    net_id = model_net_setup("dragonfly", packet_size, (const void*)&net_params);   
376
377
378
    }
   else if(strcmp("torus", mn_name)==0)
     {
379
380
	torus_param net_params;
	char dim_length[MAX_NAME_LENGTH];
381
	int n_dims=0, buffer_size=0, num_vc=0, i=0, chunk_size = 0;
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
	double link_bandwidth=0;

	configuration_get_value_int(&config, "PARAMS", "n_dims", &n_dims);
	if(!n_dims)
	{
	   n_dims = 4; /* a 4-D torus */
	   printf("\n Number of dimensions not specified, setting to %d ", n_dims);
	}
	
	configuration_get_value_double(&config, "PARAMS", "link_bandwidth", &link_bandwidth);	
	if(!link_bandwidth)
	{
		link_bandwidth = 2.0; /*default bg/q configuration */
		printf("\n Link bandwidth not specified, setting to %lf ", link_bandwidth);
	}

	configuration_get_value_int(&config, "PARAMS", "buffer_size", &buffer_size);
	if(!buffer_size)
	{
		buffer_size = 2048;
		printf("\n Buffer size not specified, setting to %d ",buffer_size);
	}

405
406
407
408
409
410
	configuration_get_value_int(&config, "PARAMS", "chunk_size", &chunk_size);
	if(!chunk_size)
	{
	       chunk_size = 32;
	       printf("\n Chunk size not specified, setting to %d ", chunk_size);
	}
411
412
413
414
415
416
417
418
419
420
421
422
	configuration_get_value_int(&config, "PARAMS", "num_vc", &num_vc);
	if(!num_vc)
	{
		num_vc = 1; /*by default, we have one for taking packets, another for taking credit*/
		printf("\n num_vc not specified, setting to %d ", num_vc);
	}

        configuration_get_value(&config, "PARAMS", "dim_length", dim_length, MAX_NAME_LENGTH);
        char* token;
	net_params.n_dims=n_dims;
	net_params.num_vc=num_vc;
	net_params.buffer_size=buffer_size;
423
	net_params.chunk_size = chunk_size;
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
	net_params.link_bandwidth=link_bandwidth;
	net_params.dim_length=malloc(n_dims*sizeof(int));
        token = strtok(dim_length, ",");	
	while(token != NULL)
	{
	   sscanf(token, "%d", &net_params.dim_length[i]);
	   if(!net_params.dim_length[i])
	   {
	      printf("\n Invalid torus dimension specified %d, exitting... ", net_params.dim_length[i]);
	      MPI_Finalize();
	      exit(-1);
	   }
	   i++;
	   token = strtok(NULL,",");
	}
	net_id = model_net_setup("torus", packet_size, (const void*)&net_params);
440
441
442
443
444
     }
  else
       printf("\n Invalid network argument %s ", mn_name);
  return net_id;
}
445
static void model_net_event_impl_rc(
446
447
    int net_id,
    tw_lp *sender,
448
    uint64_t message_size)
449
450
451
452
453
454
{
    /* this will be used for reverse computation of anything calculated
     * within th model_net_event() function call itself (not reverse
     * handling for the underlying methods, which will have their own events
     * and reverse handlers
     */
455
    uint64_t packet_size = model_net_get_packet_size(net_id);
456
457
458
459
460
461
462
463
464
465
466
467
468
469
    int num_packets = message_size/packet_size; /* For rolling back */
    int i;

    if(message_size % packet_size)
      num_packets++;

     for( i = 0; i < num_packets; i++ )
       {
	  /* Number of packets and packet ID is passed to the underlying network to mark the final packet for local event completion*/
	  method_array[net_id]->model_net_method_packet_event_rc(sender);	  
       }
    return;
} 

470
471
472
473
474
475
476
477
478
479
480
481
482
void model_net_event_rc(
        int net_id,
        tw_lp *sender,
        uint64_t message_size){
    model_net_event_impl_rc(net_id,sender,message_size);
}

void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender) {
    model_net_event_impl_rc(net_id, sender, PULL_MSG_SIZE);
}

483
/* returns the message size, can be either simplenet, dragonfly or torus message size*/
484
static int model_net_get_msg_sz(int net_id)
485
486
487
{
   // TODO: Add checks on network name
   // TODO: Add dragonfly and torus network models
488
   if(net_id < 0 || net_id >= MAX_NETS)
489
490
491
492
493
494
495
496
497
     {
      printf("%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
      exit(-1);
     }

       return method_array[net_id]->mn_get_msg_sz();
}

/* returns the packet size in the modelnet struct */
498
uint64_t model_net_get_packet_size(int net_id)
499
{
500
  if(net_id < 0 || net_id >= MAX_NETS)
501
502
503
504
505
506
507
508
509
510
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }
  return method_array[net_id]->packet_size; // TODO: where to set the packet size?
}

/* returns lp type for modelnet */
const tw_lptype* model_net_get_lp_type(int net_id)
{
511
    if(net_id < 0 || net_id >= MAX_NETS)
512
513
514
515
516
517
518
519
520
521
     {
       fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
       exit(-1);
     }

   // TODO: ADd checks by network names
   // Add dragonfly and torus network models
   return method_array[net_id]->mn_get_lp_type();
}

522
523
void model_net_report_stats(int net_id)
{
524
  if(net_id < 0 || net_id >= MAX_NETS)
525
526
527
528
529
530
531
  {
    fprintf(stderr, "%s Error: Uninitializied modelnet network, call modelnet_init first\n", __FUNCTION__);
    exit(-1);
   }

     // TODO: ADd checks by network names
     //    // Add dragonfly and torus network models
532
533
   method_array[net_id]->mn_report_stats();
   return;
534
}
535
536
537
538
539
540
541
542
/* registers the lp type */
void model_net_add_lp_type(int net_id)
{
 switch(net_id)
 {
   case SIMPLENET:
       lp_type_register("modelnet_simplenet", model_net_get_lp_type(net_id));
   break;
543
544
545
   case SIMPLEWAN:
        lp_type_register("modelnet_simplewan", model_net_get_lp_type(net_id));
        break;
546
547
548
   case TORUS:
       lp_type_register("modelnet_torus", model_net_get_lp_type(net_id));
       break;
549
550
551
   case DRAGONFLY:
       lp_type_register("modelnet_dragonfly", model_net_get_lp_type(net_id));
       break;
552
553
554
   case LOGGP:
       lp_type_register("modelnet_loggp", model_net_get_lp_type(net_id));
       break;
555
   default:
556
557
558
559
560
561
    {
        printf("\n Invalid net_id specified ");
	exit(-1);
    }
 }
}
562
563
564
565
566
567

tw_lpid model_net_find_local_device(int net_id, tw_lp *sender)
{
    return(method_array[net_id]->model_net_method_find_local_device(sender));
}

568
569
570
571
572
573
574
575
/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */