dragonfly.c 120 KB
Newer Older
Philip Carns's avatar
Philip Carns committed
1
2
3
4
5
6
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

7
// Local router ID: 0 --- total_router-1
8
// Router LP ID 
9
10
// Terminal LP ID

11
12
#include <ross.h>

13
#define DEBUG_LP 892
14
#include "codes/jenkins-hash.h"
15
16
17
18
#include "codes/codes_mapping.h"
#include "codes/codes.h"
#include "codes/model-net.h"
#include "codes/model-net-method.h"
19
20
#include "codes/model-net-lp.h"
#include "codes/net/dragonfly.h"
21
#include "sys/file.h"
22
#include "codes/quickhash.h"
23
#include "codes/rc-stack.h"
24

Matthieu Dorier's avatar
Matthieu Dorier committed
25
26
27
28
29
#ifdef ENABLE_CORTEX
#include <cortex/cortex.h>
#include <cortex/topology.h>
#endif

30
#define CREDIT_SZ 8
31
32
#define MEAN_PROCESS 1.0

33
34
35
/* collective specific parameters */
#define TREE_DEGREE 4
#define LEVEL_DELAY 1000
36
#define DRAGONFLY_COLLECTIVE_DEBUG 0
37
38
39
#define NUM_COLLECTIVES  1
#define COLLECTIVE_COMPUTATION_DELAY 5700
#define DRAGONFLY_FAN_OUT_DELAY 20.0
40
#define WINDOW_LENGTH 0
41
#define DFLY_HASH_TABLE_SIZE 4999
42

43
// debugging parameters
44
45
#define TRACK -1
#define TRACK_PKT -1
46
#define TRACK_MSG -1
47
#define PRINT_ROUTER_TABLE 1
Misbah Mubarak's avatar
Misbah Mubarak committed
48
#define DEBUG 0
49
#define USE_DIRECT_SCHEME 1
50
#define MAX_STATS 65536
51

52
53
54
55
#define LP_CONFIG_NM_TERM (model_net_lp_config_names[DRAGONFLY])
#define LP_METHOD_NM_TERM (model_net_method_names[DRAGONFLY])
#define LP_CONFIG_NM_ROUT (model_net_lp_config_names[DRAGONFLY_ROUTER])
#define LP_METHOD_NM_ROUT (model_net_method_names[DRAGONFLY_ROUTER])
56

57
58
59
60
61
#ifdef ENABLE_CORTEX
/* This structure is defined at the end of the file */
extern cortex_topology dragonfly_cortex_topology;
#endif

62
int debug_slot_count = 0;
63
long term_ecount, router_ecount, term_rev_ecount, router_rev_ecount;
64
long packet_gen = 0, packet_fin = 0;
65

66
67
static double maxd(double a, double b) { return a < b ? b : a; }

68
/* minimal and non-minimal packet counts for adaptive routing*/
69
static int minimal_count=0, nonmin_count=0;
70
static int num_routers_per_mgrp = 0;
71

72
typedef struct dragonfly_param dragonfly_param;
73
/* annotation-specific parameters (unannotated entry occurs at the 
74
75
76
77
 * last index) */
static uint64_t                  num_params = 0;
static dragonfly_param         * all_params = NULL;
static const config_anno_map_t * anno_map   = NULL;
78
79

/* global variables for codes mapping */
80
static char lp_group_name[MAX_NAME_LENGTH];
81
82
static int mapping_grp_id, mapping_type_id, mapping_rep_id, mapping_offset;

83
84
85
86
87
88
/* router magic number */
int router_magic_num = 0;

/* terminal magic number */
int terminal_magic_num = 0;

89
90
FILE * dragonfly_log = NULL;

91
int sample_bytes_written = 0;
92
int sample_rtr_bytes_written = 0;
93

94
95
char dfly_cn_sample_file[MAX_NAME_LENGTH];
char dfly_rtr_sample_file[MAX_NAME_LENGTH];
96

97
98
99
100
101
102
103
typedef struct terminal_message_list terminal_message_list;
struct terminal_message_list {
    terminal_message msg;
    char* event_data;
    terminal_message_list *next;
    terminal_message_list *prev;
};
104

Nikhil's avatar
Nikhil committed
105
static void init_terminal_message_list(terminal_message_list *this, 
106
107
108
109
110
111
    terminal_message *inmsg) {
    this->msg = *inmsg;
    this->event_data = NULL;
    this->next = NULL;
    this->prev = NULL;
}
112

Nikhil's avatar
Nikhil committed
113
static void delete_terminal_message_list(terminal_message_list *this) {
114
115
116
    if(this->event_data != NULL) free(this->event_data);
    free(this);
}
117

118
119
120
121
122
123
124
125
126
127
128
129
130
131
struct dragonfly_param
{
    // configuration parameters
    int num_routers; /*Number of routers in a group*/
    double local_bandwidth;/* bandwidth of the router-router channels within a group */
    double global_bandwidth;/* bandwidth of the inter-group router connections */
    double cn_bandwidth;/* bandwidth of the compute node channels connected to routers */
    int num_vcs; /* number of virtual channels */
    int local_vc_size; /* buffer size of the router-router channels */
    int global_vc_size; /* buffer size of the global channels */
    int cn_vc_size; /* buffer size of the compute node channels */
    int chunk_size; /* full-sized packets are broken into smaller chunks.*/
    // derived parameters
    int num_cn;
132
    int num_groups;
133
    int num_real_groups;
134
135
    int radix;
    int total_routers;
136
    int total_terminals;
137
    int num_global_channels;
138
139
140
141
    double cn_delay;
    double local_delay;
    double global_delay;
    double credit_delay;
142
    double router_delay;
143
144
};

145
146
147
148
149
150
struct dfly_hash_key
{
    uint64_t message_id;
    tw_lpid sender_id;
};

151
152
153
154
struct dfly_router_sample
{
    tw_lpid router_id;
    tw_stime* busy_time;
155
    int64_t* link_traffic_sample;
156
    tw_stime end_time;
157
158
    long fwd_events;
    long rev_events;
159
160
161
};

struct dfly_cn_sample
162
163
164
165
166
167
168
169
{
   tw_lpid terminal_id;
   long fin_chunks_sample;
   long data_size_sample;
   double fin_hops_sample;
   tw_stime fin_chunks_time;
   tw_stime busy_time_sample;
   tw_stime end_time;
170
171
   long fwd_events;
   long rev_events;
172
173
};

174
175
176
177
struct dfly_qhash_entry
{
   struct dfly_hash_key key;
   char * remote_event_data;
178
   uint64_t num_chunks;
179
180
181
182
   int remote_event_size;
   struct qhash_head hash_link;
};

183
184
185
186
187
188
189
190
/* handles terminal and router events like packet generate/send/receive/buffer */
typedef enum event_t event_t;
typedef struct terminal_state terminal_state;
typedef struct router_state router_state;

/* dragonfly compute node data structure */
struct terminal_state
{
191
   uint64_t packet_counter;
192

193
194
195
   int packet_gen;
   int packet_fin;

196
   // Dragonfly specific parameters
197
198
   unsigned int router_id;
   unsigned int terminal_id;
199
200
201

   // Each terminal will have an input and output channel with the router
   int* vc_occupancy; // NUM_VC
202
   int num_vcs;
203
   tw_stime terminal_available_time;
204
205
206
   terminal_message_list **terminal_msgs;
   terminal_message_list **terminal_msgs_tail;
   int in_send_loop;
207
208
209
210
// Terminal generate, sends and arrival T_SEND, T_ARRIVAL, T_GENERATE
// Router-Router Intra-group sends and receives RR_LSEND, RR_LARRIVE
// Router-Router Inter-group sends and receives RR_GSEND, RR_GARRIVE
   struct mn_stats dragonfly_stats_array[CATEGORY_MAX];
211
212
213
  /* collective init time */
  tw_stime collective_init_time;

214
  /* node ID in the tree */ 
215
216
   tw_lpid node_id;

217
   /* messages sent & received in collectives may get interchanged several times so we have to save the 
218
     origin server information in the node's state */
219
220
   tw_lpid origin_svr; 
  
221
222
223
224
225
226
227
228
229
230
231
  /* parent node ID of the current node */
   tw_lpid parent_node_id;
   /* array of children to be allocated in terminal_init*/
   tw_lpid* children;

   /* children of a node can be less than or equal to the tree degree */
   int num_children;

   short is_root;
   short is_leaf;

232
   struct rc_stack * st;
233
234
   int issueIdle;
   int terminal_length;
235

236
237
238
   /* to maintain a count of child nodes that have fanned in at the parent during the collective
      fan-in phase*/
   int num_fan_nodes;
239
240

   const char * anno;
241
   dragonfly_param *params;
242

243
244
245
   struct qhash_table *rank_tbl;
   uint64_t rank_tbl_pop;

Misbah Mubarak's avatar
Misbah Mubarak committed
246
   tw_stime   total_time;
247
   uint64_t total_msg_size;
248
   double total_hops;
249
   long finished_msgs;
250
   long finished_chunks;
251
   long finished_packets;
252

253
   tw_stime * last_buf_full;
254
   tw_stime busy_time;
255
   char output_buf[4096];
256
257
   /* For LP suspend functionality */
   int error_ct;
258
259
260
261
262
263
264
265
266

   /* For sampling */
   long fin_chunks_sample;
   long data_size_sample;
   double fin_hops_sample;
   tw_stime fin_chunks_time;
   tw_stime busy_time_sample;

   char sample_buf[4096];
267
   struct dfly_cn_sample * sample_stat;
268
269
   int op_arr_size;
   int max_arr_size;
270
   
271
272
273
   /* for logging forward and reverse events */
   long fwd_events;
   long rev_events;
274
275
276
277
278
279
280

   /* following used for ROSS model-level stats collection */
   long fin_chunks_ross_sample;
   long data_size_ross_sample;
   long fin_hops_ross_sample;
   tw_stime fin_chunks_time_ross_sample;
   tw_stime busy_time_ross_sample;
281
   struct dfly_cn_sample ross_sample;
282
};
283

284
285
286
287
288
/* terminal event type (1-4) */
enum event_t
{
  T_GENERATE=1,
  T_ARRIVE,
289
  T_SEND,
290
  T_BUFFER,
291
292
  R_SEND,
  R_ARRIVE,
293
294
295
296
  R_BUFFER,
  D_COLLECTIVE_INIT,
  D_COLLECTIVE_FAN_IN,
  D_COLLECTIVE_FAN_OUT
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
};
/* status of a virtual channel can be idle, active, allocated or wait for credit */
enum vc_status
{
   VC_IDLE,
   VC_ACTIVE,
   VC_ALLOC,
   VC_CREDIT
};

/* whether the last hop of a packet was global, local or a terminal */
enum last_hop
{
   GLOBAL,
   LOCAL,
   TERMINAL
};

/* three forms of routing algorithms available, adaptive routing is not
 * accurate and fully functional in the current version as the formulas
 * for detecting load on global channels are not very accurate */
enum ROUTING_ALGO
{
320
321
    MINIMAL = 0,
    NON_MINIMAL,
322
323
    ADAPTIVE,
    PROG_ADAPTIVE
324
325
326
327
328
};

struct router_state
{
   unsigned int router_id;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
329
   int group_id;
330
331
332
   int op_arr_size;
   int max_arr_size;

333
334
   int* global_channel; 
   
335
   tw_stime* next_output_available_time;
336
   tw_stime* cur_hist_start_time;
337
   tw_stime** last_buf_full;
338

339
   tw_stime* busy_time;
340
   tw_stime* busy_time_sample;
341

342
343
344
345
346
   terminal_message_list ***pending_msgs;
   terminal_message_list ***pending_msgs_tail;
   terminal_message_list ***queued_msgs;
   terminal_message_list ***queued_msgs_tail;
   int *in_send_loop;
347
   int *queued_count;
348
   struct rc_stack * st;
349
   
350
   int** vc_occupancy;
351
   int64_t* link_traffic;
352
   int64_t * link_traffic_sample;
353
354

   const char * anno;
355
   dragonfly_param *params;
356
357
358

   int* prev_hist_num;
   int* cur_hist_num;
359
   
360
   char output_buf[4096];
361
   char output_buf2[4096];
362
363

   struct dfly_router_sample * rsamples;
364
   
365
366
   long fwd_events;
   long rev_events;
367
368
369
370

   /* following used for ROSS model-level stats collection */
   tw_stime* busy_time_ross_sample;
   int64_t * link_traffic_ross_sample;
371
   struct dfly_router_sample ross_rsample;
372
373
};

374
375
376
377
/* had to pull some of the ROSS model stats collection stuff up here */
void dragonfly_event_collect(terminal_message *m, tw_lp *lp, char *buffer, int *collect_flag);
void dragonfly_model_stat_collect(terminal_state *s, tw_lp *lp, char *buffer);
void dfly_router_model_stat_collect(router_state *s, tw_lp *lp, char *buffer);
378
379
380
381
static void ross_dragonfly_rsample_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample);
static void ross_dragonfly_rsample_rc_fn(router_state * s, tw_bf * bf, tw_lp * lp, struct dfly_router_sample *sample);
static void ross_dragonfly_sample_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample);
static void ross_dragonfly_sample_rc_fn(terminal_state * s, tw_bf * bf, tw_lp * lp, struct dfly_cn_sample *sample);
382
383
384
385
386
387
388

st_model_types dragonfly_model_types[] = {
    {(rbev_trace_f) dragonfly_event_collect,
     sizeof(int),
     (ev_trace_f) dragonfly_event_collect,
     sizeof(int),
     (model_stat_f) dragonfly_model_stat_collect,
389
390
391
392
     sizeof(tw_lpid) + sizeof(long) * 2 + sizeof(double) + sizeof(tw_stime) *2,
     (sample_event_f) ross_dragonfly_sample_fn,
     (sample_revent_f) ross_dragonfly_sample_rc_fn,
     sizeof(struct dfly_cn_sample) } , 
393
394
395
396
397
    {(rbev_trace_f) dragonfly_event_collect,
     sizeof(int),
     (ev_trace_f) dragonfly_event_collect,
     sizeof(int),
     (model_stat_f) dfly_router_model_stat_collect,
398
399
400
401
402
     0, //updated in router_setup() since it's based on the radix
     (sample_event_f) ross_dragonfly_rsample_fn,
     (sample_revent_f) ross_dragonfly_rsample_rc_fn,
     0 } , //updated in router_setup() since it's based on the radix    
    {NULL, 0, NULL, 0, NULL, 0, NULL, NULL, 0}
403
};
404
/* End of ROSS model stats collection */
405
406
407

static short routing = MINIMAL;

408
409
static tw_stime         dragonfly_total_time = 0;
static tw_stime         dragonfly_max_latency = 0;
410
static tw_stime         max_collective = 0;
411

412

413
414
static long long       total_hops = 0;
static long long       N_finished_packets = 0;
415
416
417
static long long       total_msg_sz = 0;
static long long       N_finished_msgs = 0;
static long long       N_finished_chunks = 0;
418

419
420
421
422
static int dragonfly_rank_hash_compare(
        void *key, struct qhash_head *link)
{
    struct dfly_hash_key *message_key = (struct dfly_hash_key *)key;
423
    struct dfly_qhash_entry *tmp = NULL;
424
425

    tmp = qhash_entry(link, struct dfly_qhash_entry, hash_link);
426
    
427
428
429
430
431
432
    if (tmp->key.message_id == message_key->message_id
            && tmp->key.sender_id == message_key->sender_id)
        return 1;

    return 0;
}
433
434
static int dragonfly_hash_func(void *k, int table_size)
{
435
    struct dfly_hash_key *tmp = (struct dfly_hash_key *)k;
436
    //uint32_t pc = 0, pb = 0;	
437
438
    //bj_hashlittle2(tmp, sizeof(*tmp), &pc, &pb);
    uint64_t key = (~tmp->message_id) + (tmp->message_id << 18);
439
440
    key = key * 21;
    key = ~key ^ (tmp->sender_id >> 4);
441
    key = key * tmp->sender_id; 
442
443
    return (int)(key & (table_size - 1));
    //return (int)(pc % (table_size - 1));
444
445
}

446
447
448
449
450
451
452
/* convert GiB/s and bytes to ns */
static tw_stime bytes_to_ns(uint64_t bytes, double GB_p_s)
{
    tw_stime time;

    /* bytes to GB */
    time = ((double)bytes)/(1024.0*1024.0*1024.0);
453
    /* GiB to s */
454
455
456
457
458
459
    time = time / GB_p_s;
    /* s to ns */
    time = time * 1000.0 * 1000.0 * 1000.0;

    return(time);
}
460

461
462
/* returns the dragonfly message size */
static int dragonfly_get_msg_sz(void)
463
{
464
465
	   return sizeof(terminal_message);
}
466

467
468
static void free_tmp(void * ptr)
{
469
    struct dfly_qhash_entry * dfly = ptr; 
470
471
472
473
474
475
    
    if(dfly->remote_event_data)
        free(dfly->remote_event_data);
   
    if(dfly)
        free(dfly);
476
}
477
static void append_to_terminal_message_list(  
478
479
        terminal_message_list ** thisq,
        terminal_message_list ** thistail,
480
        int index, 
481
482
483
484
485
486
        terminal_message_list *msg) {
    if(thisq[index] == NULL) {
        thisq[index] = msg;
    } else {
        thistail[index]->next = msg;
        msg->prev = thistail[index];
487
    } 
488
    thistail[index] = msg;
489
490
}

491
static void prepend_to_terminal_message_list(  
492
493
        terminal_message_list ** thisq,
        terminal_message_list ** thistail,
494
        int index, 
495
496
497
498
499
500
        terminal_message_list *msg) {
    if(thisq[index] == NULL) {
        thistail[index] = msg;
    } else {
        thisq[index]->prev = msg;
        msg->next = thisq[index];
501
    } 
502
503
    thisq[index] = msg;
}
504

505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
static terminal_message_list* return_head(
        terminal_message_list ** thisq,
        terminal_message_list ** thistail,
        int index) {
    terminal_message_list *head = thisq[index];
    if(head != NULL) {
        thisq[index] = head->next;
        if(head->next != NULL) {
            head->next->prev = NULL;
            head->next = NULL;
        } else {
            thistail[index] = NULL;
        }
    }
    return head;
520
521
}

522
523
524
525
526
static terminal_message_list* return_tail(
        terminal_message_list ** thisq,
        terminal_message_list ** thistail,
        int index) {
    terminal_message_list *tail = thistail[index];
527
    assert(tail);
528
529
530
531
532
533
534
535
536
    if(tail->prev != NULL) {
        tail->prev->next = NULL;
        thistail[index] = tail->prev;
        tail->prev = NULL;
    } else {
        thistail[index] = NULL;
        thisq[index] = NULL;
    }
    return tail;
537
538
}

539
static void dragonfly_read_config(const char * anno, dragonfly_param *params){
540
541
542
543
544
545
    uint32_t h1 = 0, h2 = 0; 
    bj_hashlittle2(LP_METHOD_NM_TERM, strlen(LP_METHOD_NM_TERM), &h1, &h2);
    terminal_magic_num = h1 + h2;
    
    bj_hashlittle2(LP_METHOD_NM_ROUT, strlen(LP_METHOD_NM_ROUT), &h1, &h2);
    router_magic_num = h1 + h2;
546
547
    // shorthand
    dragonfly_param *p = params;
548

549
    int rc = configuration_get_value_int(&config, "PARAMS", "num_routers", anno,
550
            &p->num_routers);
551
    if(rc) {
552
553
554
555
556
        p->num_routers = 4;
        fprintf(stderr, "Number of dimensions not specified, setting to %d\n",
                p->num_routers);
    }

557
    p->num_vcs = 3;
558

559
560
    rc = configuration_get_value_int(&config, "PARAMS", "local_vc_size", anno, &p->local_vc_size);
    if(rc) {
561
562
563
564
        p->local_vc_size = 1024;
        fprintf(stderr, "Buffer size of local channels not specified, setting to %d\n", p->local_vc_size);
    }

565
566
    rc = configuration_get_value_int(&config, "PARAMS", "global_vc_size", anno, &p->global_vc_size);
    if(rc) {
567
568
569
570
        p->global_vc_size = 2048;
        fprintf(stderr, "Buffer size of global channels not specified, setting to %d\n", p->global_vc_size);
    }

571
572
    rc = configuration_get_value_int(&config, "PARAMS", "cn_vc_size", anno, &p->cn_vc_size);
    if(rc) {
573
574
575
576
        p->cn_vc_size = 1024;
        fprintf(stderr, "Buffer size of compute node channels not specified, setting to %d\n", p->cn_vc_size);
    }

577
578
    rc = configuration_get_value_int(&config, "PARAMS", "chunk_size", anno, &p->chunk_size);
    if(rc) {
579
        p->chunk_size = 512;
580
        fprintf(stderr, "Chunk size for packets is specified, setting to %d\n", p->chunk_size);
581
582
    }

583
584
    rc = configuration_get_value_double(&config, "PARAMS", "local_bandwidth", anno, &p->local_bandwidth);
    if(rc) {
585
586
587
588
        p->local_bandwidth = 5.25;
        fprintf(stderr, "Bandwidth of local channels not specified, setting to %lf\n", p->local_bandwidth);
    }

589
590
    rc = configuration_get_value_double(&config, "PARAMS", "global_bandwidth", anno, &p->global_bandwidth);
    if(rc) {
591
592
593
594
        p->global_bandwidth = 4.7;
        fprintf(stderr, "Bandwidth of global channels not specified, setting to %lf\n", p->global_bandwidth);
    }

595
596
    rc = configuration_get_value_double(&config, "PARAMS", "cn_bandwidth", anno, &p->cn_bandwidth);
    if(rc) {
597
598
599
600
        p->cn_bandwidth = 5.25;
        fprintf(stderr, "Bandwidth of compute node channels not specified, setting to %lf\n", p->cn_bandwidth);
    }

601
602
603
604
    p->router_delay = 50;
    configuration_get_value_double(&config, "PARAMS", "router_delay", anno,
            &p->router_delay);

605
    configuration_get_value(&config, "PARAMS", "cn_sample_file", anno, dfly_cn_sample_file,
606
            MAX_NAME_LENGTH);
607
    configuration_get_value(&config, "PARAMS", "rt_sample_file", anno, dfly_rtr_sample_file,
608
            MAX_NAME_LENGTH);
609
    
610
611
    char routing_str[MAX_NAME_LENGTH];
    configuration_get_value(&config, "PARAMS", "routing", anno, routing_str,
612
            MAX_NAME_LENGTH);
613
614
    if(strcmp(routing_str, "minimal") == 0)
        routing = MINIMAL;
615
    else if(strcmp(routing_str, "nonminimal")==0 || 
616
            strcmp(routing_str,"non-minimal")==0)
617
618
619
620
621
        routing = NON_MINIMAL;
    else if (strcmp(routing_str, "adaptive") == 0)
        routing = ADAPTIVE;
    else if (strcmp(routing_str, "prog-adaptive") == 0)
	routing = PROG_ADAPTIVE;
622
623
    else
    {
624
        fprintf(stderr, 
625
                "No routing protocol specified, setting to minimal routing\n");
626
        routing = -1;
627
628
629
630
631
632
    }

    // set the derived parameters
    p->num_cn = p->num_routers/2;
    p->num_global_channels = p->num_routers/2;
    p->num_groups = p->num_routers * p->num_cn + 1;
633
    p->radix = (p->num_routers + p->num_global_channels + p->num_cn);
634
    p->total_routers = p->num_groups * p->num_routers;
635
    p->total_terminals = p->total_routers * p->num_cn;
636
    int rank;
637
    MPI_Comm_rank(MPI_COMM_CODES, &rank);
638
639
640
641
642
    if(!rank) {
        printf("\n Total nodes %d routers %d groups %d radix %d \n",
                p->num_cn * p->total_routers, p->total_routers, p->num_groups,
                p->radix);
    }
643
    
644
645
646
    p->cn_delay = bytes_to_ns(p->chunk_size, p->cn_bandwidth);
    p->local_delay = bytes_to_ns(p->chunk_size, p->local_bandwidth);
    p->global_delay = bytes_to_ns(p->chunk_size, p->global_bandwidth);
647
    p->credit_delay = bytes_to_ns(CREDIT_SZ, p->local_bandwidth); //assume 8 bytes packet
648
649
}

650
static void dragonfly_configure(){
651
    anno_map = codes_mapping_get_lp_anno_map(LP_CONFIG_NM_TERM);
652
653
    assert(anno_map);
    num_params = anno_map->num_annos + (anno_map->has_unanno_lp > 0);
654
    all_params = malloc(num_params * sizeof(*all_params));
655

Jonathan Jenkins's avatar
Jonathan Jenkins committed
656
    for (int i = 0; i < anno_map->num_annos; i++){
657
        const char * anno = anno_map->annotations[i].ptr;
658
659
660
661
662
        dragonfly_read_config(anno, &all_params[i]);
    }
    if (anno_map->has_unanno_lp > 0){
        dragonfly_read_config(NULL, &all_params[anno_map->num_annos]);
    }
663
664
665
#ifdef ENABLE_CORTEX
	model_net_topology = dragonfly_cortex_topology;
#endif
666
667
668
669
670
}

/* report dragonfly statistics like average and maximum packet latency, average number of hops traversed */
static void dragonfly_report_stats()
{
671
672
   long long avg_hops, total_finished_packets, total_finished_chunks;
   long long total_finished_msgs, final_msg_sz;
673
   tw_stime avg_time, max_time;
674
   int total_minimal_packets, total_nonmin_packets;
675
   long total_gen, total_fin;
676

677
678
679
680
681
682
683
   MPI_Reduce( &total_hops, &avg_hops, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &N_finished_packets, &total_finished_packets, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &N_finished_msgs, &total_finished_msgs, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &N_finished_chunks, &total_finished_chunks, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &total_msg_sz, &final_msg_sz, 1, MPI_LONG_LONG, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &dragonfly_total_time, &avg_time, 1,MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &dragonfly_max_latency, &max_time, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_CODES);
684
   
685
686
   MPI_Reduce( &packet_gen, &total_gen, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES);
   MPI_Reduce( &packet_fin, &total_fin, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_CODES);
687
   if(routing == ADAPTIVE || routing == PROG_ADAPTIVE)
688
    {
689
690
	MPI_Reduce(&minimal_count, &total_minimal_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES);
 	MPI_Reduce(&nonmin_count, &total_nonmin_packets, 1, MPI_INT, MPI_SUM, 0, MPI_COMM_CODES);
691
    }
692

693
694
   /* print statistics */
   if(!g_tw_mynode)
695
696
   {	
      printf(" Average number of hops traversed %f average chunk latency %lf us maximum chunk latency %lf us avg message size %lf bytes finished messages %lld finished chunks %lld \n", 
697
              (float)avg_hops/total_finished_chunks, avg_time/(total_finished_chunks*1000), max_time/1000, (float)final_msg_sz/total_finished_msgs, total_finished_msgs, total_finished_chunks);
698
     if(routing == ADAPTIVE || routing == PROG_ADAPTIVE)
699
              printf("\n ADAPTIVE ROUTING STATS: %d chunks routed minimally %d chunks routed non-minimally completed packets %lld \n", 
700
                      total_minimal_packets, total_nonmin_packets, total_finished_chunks);
701
 
702
      printf("\n Total packets generated %ld finished %ld \n", total_gen, total_fin);
703
   }
704
705
   return;
}
706

Nikhil's avatar
Nikhil committed
707
static void dragonfly_collective_init(terminal_state * s,
708
709
           		   tw_lp * lp)
{
710
711
712
    // TODO: be annotation-aware
    codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL,
            &mapping_type_id, NULL, &mapping_rep_id, &mapping_offset);
713
    int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM,
714
            NULL, 1);
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
    int num_reps = codes_mapping_get_group_reps(lp_group_name);
    s->node_id = (mapping_rep_id * num_lps) + mapping_offset;

    int i;
   /* handle collective operations by forming a tree of all the LPs */
   /* special condition for root of the tree */
   if( s->node_id == 0)
    {
        s->parent_node_id = -1;
        s->is_root = 1;
   }
   else
   {
       s->parent_node_id = (s->node_id - ((s->node_id - 1) % TREE_DEGREE)) / TREE_DEGREE;
       s->is_root = 0;
   }
   s->children = (tw_lpid*)malloc(TREE_DEGREE * sizeof(tw_lpid));

   /* set the isleaf to zero by default */
   s->is_leaf = 1;
   s->num_children = 0;

   /* calculate the children of the current node. If its a leaf, no need to set children,
      only set isleaf and break the loop*/

   for( i = 0; i < TREE_DEGREE; i++ )
    {
        tw_lpid next_child = (TREE_DEGREE * s->node_id) + i + 1;
Jonathan Jenkins's avatar
Jonathan Jenkins committed
743
        if(next_child < ((tw_lpid)num_lps * (tw_lpid)num_reps))
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
        {
            s->num_children++;
            s->is_leaf = 0;
            s->children[i] = next_child;
        }
        else
           s->children[i] = -1;
    }

#if DRAGONFLY_COLLECTIVE_DEBUG == 1
   printf("\n LP %ld parent node id ", s->node_id);

   for( i = 0; i < TREE_DEGREE; i++ )
        printf(" child node ID %ld ", s->children[i]);
   printf("\n");

   if(s->is_leaf)
        printf("\n LP %ld is leaf ", s->node_id);
#endif
}

765
/* initialize a dragonfly compute node terminal */
Nikhil's avatar
Nikhil committed
766
static void 
767
terminal_init( terminal_state * s, 
768
769
	       tw_lp * lp )
{
770
771
772
    s->packet_gen = 0;
    s->packet_fin = 0;

773
    
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
    int i;
    char anno[MAX_NAME_LENGTH];

    // Assign the global router ID
    // TODO: be annotation-aware
    codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL,
            &mapping_type_id, anno, &mapping_rep_id, &mapping_offset);
    if (anno[0] == '\0'){
        s->anno = NULL;
        s->params = &all_params[num_params-1];
    }
    else{
        s->anno = strdup(anno);
        int id = configuration_get_annotation_index(anno, anno_map);
        s->params = &all_params[id];
    }

791
792
   //int num_lps = codes_mapping_get_lp_count(lp_group_name, 1, LP_CONFIG_NM_TERM,
   //        s->anno, 0);
793

794
   s->terminal_id = codes_mapping_get_lp_relative_id(lp->gid, 0, 0);  
795
   
796
   s->router_id=(int)s->terminal_id / s->params->num_cn;
797
798
   s->terminal_available_time = 0.0;
   s->packet_counter = 0;
799
   
800
   s->finished_msgs = 0;
Misbah Mubarak's avatar
Misbah Mubarak committed
801
802
803
   s->finished_chunks = 0;
   s->finished_packets = 0;
   s->total_time = 0.0;
804
   s->total_msg_size = 0;
805
   s->num_vcs = 1;
806

807
   s->last_buf_full = (tw_stime*)malloc(s->num_vcs * sizeof(tw_stime));
808
809
   s->busy_time = 0.0;

810
811
812
   s->fwd_events = 0;
   s->rev_events = 0;

813
   rc_stack_create(&s->st);
814
815
816
817
   s->vc_occupancy = (int*)malloc(s->num_vcs * sizeof(int));

   for( i = 0; i < s->num_vcs; i++ )
    {
818
      s->last_buf_full[i] = 0.0;
819
820
821
      s->vc_occupancy[i]=0;
    }

822

823
   s->rank_tbl = NULL;
824
   s->terminal_msgs = 
825
       (terminal_message_list**)malloc(1*sizeof(terminal_message_list*));
826
   s->terminal_msgs_tail = 
827
828
829
       (terminal_message_list**)malloc(1*sizeof(terminal_message_list*));
   s->terminal_msgs[0] = NULL;
   s->terminal_msgs_tail[0] = NULL;
830
   s->terminal_length = 0;
831
   s->in_send_loop = 0;
832
   s->issueIdle = 0;
833

834
835
836
837
838
839
840
   /* set up for ROSS stats sampling */
   s->fin_chunks_ross_sample = 0;
   s->data_size_ross_sample = 0;
   s->fin_hops_ross_sample = 0;
   s->fin_chunks_time_ross_sample = 0.0;
   s->busy_time_ross_sample = 0.0;

841
842
843
844
   dragonfly_collective_init(s, lp);
   return;
}

845
/* sets up the router virtual channels, global channels, 
846
 * local channels, compute node channels */
Nikhil's avatar
Nikhil committed
847
static void router_setup(router_state * r, tw_lp * lp)
848
{
849
    
850
851
852
853
854
855
856
857
858
859
860
861
862
    char anno[MAX_NAME_LENGTH];
    codes_mapping_get_lp_info(lp->gid, lp_group_name, &mapping_grp_id, NULL,
            &mapping_type_id, anno, &mapping_rep_id, &mapping_offset);

    if (anno[0] == '\0'){
        r->anno = NULL;
        r->params = &all_params[num_params-1];
    } else{
        r->anno = strdup(anno);
        int id = configuration_get_annotation_index(anno, anno_map);
        r->params = &all_params[id];
    }

863
864
865
866
867
868
869
870
871
872
873
874
    dragonfly_param *p = r->params;
    p->num_real_groups = codes_mapping_get_lp_count(lp_group_name, 0, LP_CONFIG_NM_ROUT, NULL, 1);
    assert(p->num_real_groups > 0);
    if(p->num_real_groups % p->num_routers)
    {
        tw_error(TW_LOC, "\n Config error: num_routers specified %d "
                "does not divide num_router per group %d  ",
                p->num_real_groups , p->num_routers);
    }
    p->num_real_groups = p->num_real_groups/p->num_routers;
    
    num_routers_per_mgrp = codes_mapping_get_lp_count (lp_group_name, 1, LP_METHOD_NM_ROUT,
875
            NULL, 0);
876
    /*int num_grp_reps = codes_mapping_get_group_reps(lp_group_name);
877
878
879
880
    if(p->total_routers != num_grp_reps * num_routers_per_mgrp)
        tw_error(TW_LOC, "\n Config error: num_routers specified %d total routers computed in the network %d "
                "does not match with repetitions * dragonfly_router %d  ",
                p->num_routers, p->total_routers, num_grp_reps * num_routers_per_mgrp);
881
    */
882
883
884
   r->router_id=mapping_rep_id + mapping_offset;
   r->group_id=r->router_id/p->num_routers;

885
886
   r->fwd_events = 0;
   r->rev_events = 0;
887
888
   r->ross_rsample.fwd_events = 0;
   r->ross_rsample.rev_events = 0;
889

890
891
892
   r->global_channel = (int*)malloc(p->num_global_channels * sizeof(int));
   r->next_output_available_time = (tw_stime*)malloc(p->radix * sizeof(tw_stime));
   r->cur_hist_start_time = (tw_stime*)malloc(p->radix * sizeof(tw_stime));
893
   r->link_traffic = (int64_t*)malloc(p->radix * sizeof(int64_t));
894
   r->link_traffic_sample = (int64_t*)malloc(p->radix * sizeof(int64_t));
895
896
   r->cur_hist_num = (int*)malloc(p->radix * sizeof(int));
   r->prev_hist_num = (int*)malloc(p->radix * sizeof(int));
897
   
898
899
   r->vc_occupancy = (int**)malloc(p->radix * sizeof(int*));
   r->in_send_loop = (int*)malloc(p->radix * sizeof(int));
900
   r->pending_msgs = 
901
    (terminal_message_list***)malloc(p->radix * sizeof(terminal_message_list**));
902
   r->pending_msgs_tail = 
903
    (terminal_message_list***)malloc(p->radix * sizeof(terminal_message_list**));
904
   r->queued_msgs = 
905
    (terminal_message_list***)malloc(p->radix * sizeof(terminal_message_list**));
906
   r->queued_msgs_tail = 
907
    (terminal_message_list***)malloc(p->radix * sizeof(terminal_message_list**));
908
   r->queued_count = (int*)malloc(p->radix * sizeof(int));
909
   r->last_buf_full = (tw_stime**)malloc(p->radix * sizeof(tw_stime*));
910
   r->busy_time = (tw_stime*)malloc(p->radix * sizeof(tw_stime));
911
   r->busy_time_sample = (tw_stime*)malloc(p->radix * sizeof(tw_stime));
912

913
914
915
916
917
   /* set up for ROSS stats sampling */
   r->link_traffic_ross_sample = (int64_t*)calloc(p->radix, sizeof(int64_t));
   r->busy_time_ross_sample = (tw_stime*)calloc(p->radix, sizeof(tw_stime));
   if (g_st_model_stats)
       lp->model_types->mstat_sz = sizeof(tw_lpid) + (sizeof(int64_t) + sizeof(tw_stime)) * p->radix;
918
919
920
921
   if (g_st_use_analysis_lps)
       lp->model_types->sample_struct_sz = sizeof(struct dfly_router_sample) + (sizeof(tw_stime) + sizeof(int64_t)) * p->radix;
   r->ross_rsample.busy_time = (tw_stime*)calloc(p->radix, sizeof(tw_stime));
   r->ross_rsample.link_traffic_sample = (int64_t*)calloc(p->radix, sizeof(int64_t));
922

923
   rc_stack_create(&r->st);
924
   for(int i=0; i < p->radix; i++)
925
926
    {
       // Set credit & router occupancy
927
    r->busy_time[i] = 0.0;
928
    r->busy_time_sample[i] = 0.0;
929
930
	r->next_output_available_time[i]=0;
	r->cur_hist_start_time[i] = 0;
931
    r->link_traffic[i]=0;
932
    r->link_traffic_sample[i] = 0;
933
934
	r->cur_hist_num[i] = 0;
	r->prev_hist_num[i] = 0;
935
    r->queued_count[i] = 0;    
936
937
    r->in_send_loop[i] = 0;
    r->vc_occupancy[i] = (int*)malloc(p->num_vcs * sizeof(int));
938
    r->last_buf_full[i] = (tw_stime*)malloc(p->num_vcs * sizeof(tw_stime));
939
    r->pending_msgs[i] = (terminal_message_list**)malloc(p->num_vcs * 
940
        sizeof(terminal_message_list*));
941
    r->pending_msgs_tail[i] = (terminal_message_list**)malloc(p->num_vcs * 
942
        sizeof(terminal_message_list*));
943
    r->queued_msgs[i] = (terminal_message_list**)malloc(p->num_vcs * 
944
        sizeof(terminal_message_list*));
945
    r->queued_msgs_tail[i] = (terminal_message_list**)malloc(p->num_vcs * 
946
        sizeof(terminal_message_list*));
947
        for(int j = 0; j < p->num_vcs; j++) {
948
            r->