model-net.h 8.31 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5
6
7
8
9
10
 */

#ifndef MODELNET_H
#define MODELNET_H

#include "ross.h"
11
#include "codes/lp-type-lookup.h"
12
#include "codes/configuration.h"
13
#include "codes/lp-io.h"
14
#include <stdint.h>
15

16
17
#define PULL_MSG_SIZE 128

18
#define MAX_NAME_LENGTH 256
19
20
#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12
21

22
23
24
25
26
/* HACK: there is currently no scheduling fidelity across multiple
 * model_net_event calls. Hence, problems arise when some LP sends multiple
 * messages as part of an event and expects FCFS ordering. A proper fix which
 * involves model-net LP-level scheduling of requests is ideal, but not 
 * feasible for now (would basically have to redesign model-net), so expose
27
28
 * explicit start-sequence and stop-sequence markers as a workaround
 */
29
30
31
32
33
34
35
36
37
38
39
extern int in_sequence;
extern tw_stime mn_msg_offset;
#define MN_START_SEQ() do {\
    in_sequence = 1; \
    mn_msg_offset = 0.0; \
} while (0)
#define MN_END_SEQ() do {\
    in_sequence = 0;\
} while (0)


40
typedef struct mn_stats mn_stats;
41

42
43
44
45
46
47
48
49
50
51
52
53
// use the X-macro to get types and names rolled up into one structure
// format: { enum vals, config name, internal lp name, lp method struct}
// last value is sentinel
#define NETWORK_DEF \
    X(SIMPLENET, "modelnet_simplenet", "simplenet", &simplenet_method)\
    X(SIMPLEWAN, "modelnet_simplewan", "simplewan", &simplewan_method)\
    X(TORUS,     "modelnet_torus",     "torus",     &torus_method)\
    X(DRAGONFLY, "modelnet_dragonfly", "dragonfly", &dragonfly_method)\
    X(LOGGP,     "modelnet_loggp",     "loggp",     &loggp_method)\
    X(MAX_NETS,  NULL,                 NULL,        NULL)

#define X(a,b,c,d) a,
54
55
enum NETWORKS
{
56
    NETWORK_DEF
57
};
58
59
60
61
62
63
#undef X

// network identifiers (both the config lp names and the model-net internal
// names)
extern char * model_net_lp_config_names[];
extern char * model_net_method_names[];
64

65
66
67
68
69
70
71
72
73
74
75
76
77
78
// request structure that gets passed around (by the model-net implementation,
// not the user)
typedef struct model_net_request {
    tw_lpid  final_dest_lp;
    tw_lpid  src_lp;
    uint64_t msg_size;
    uint64_t packet_size;
    int      net_id;
    int      is_pull;
    int      remote_event_size;
    int      self_event_size;
    char     category[CATEGORY_NAME_MAX];
} model_net_request;

79
80
81
82
83
84
85
86
87
88
89
90
91
/* data structure for tracking network statistics */
struct mn_stats
{
    char category[CATEGORY_NAME_MAX];
    long send_count;
    long send_bytes;
    tw_stime send_time;
    long recv_count;
    long recv_bytes;
    tw_stime recv_time;
    long max_event_size;
};

92
93
94
/* Registers all model-net LPs in ROSS. Should be called after 
 * configuration_load, but before codes_mapping_setup */
void model_net_register();
95

96
97
98
99
100
101
102
103
/* Configures all model-net LPs based on the CODES configuration, and returns
 * ids to address the different types by.
 *
 * id_count - the output number of networks
 *
 * return - the set of network IDs, indexed in the order given by the
 * modelnet_order configuration parameter */
int* model_net_configure(int *id_count);
104
105
106
107
108
109

/* Initialize/configure the network(s) based on the CODES configuration.
 * returns an array of the network ids, indexed in the order given by the 
 * modelnet_order configuration parameter 
 * OUTPUT id_count - the output number of networks */
int* model_net_set_params(int *id_count);
110
111

// setup the modelnet parameters
112
int model_net_setup(char* net_name, uint64_t packet_size, const void* net_params);
113
114
115

/* utility function to get the modelnet ID post-setup */
int model_net_get_id(char *net_name);
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131

/* This event does a collective operation call for model-net */
void model_net_event_collective(
    int net_id,
    char* category,
    int message_size,
    int remote_event_size,
    const void* remote_event,
    tw_lp *sender);

/* reverse event of the collective operation call */
void model_net_event_collective_rc(
        int net_id,
        int message_size,
        tw_lp *sender);

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/* allocate and transmit a new event that will pass through model_net to 
 * arrive at its destination:
 *
 * - category: category name to associate with this communication
 *   - OPTIONAL: callers can set this to NULL if they don't want to use it,
 *     and model_net methods can ignore it if they don't support it
 * - final_dest_lp: the LP that the message should be delivered to.
 *   - NOTE: this is _not_ the LP of an underlying network method (for
 *     example, it is not a torus or dragonfly LP), but rather the LP of an
 *     MPI process or storage server that you are transmitting to.
 * - message_size: this is the size of the message (in bytes) that modelnet
 *     will simulate transmitting to the final_dest_lp.  It can be any size
 *     (i.e. it is not constrained by transport packet size).
 * - remote_event_size: this is the size of the ROSS event structure that
 *     will be delivered to the final_dest_lp.
 * - remote_event: pointer ot data to be used as the remove event message
 * - self_event_size: this is the size of the ROSS event structure that will
 *     be delivered to the calling LP once local completion has occurred for
 *     the network transmission.
 *     - NOTE: "local completion" in this sense means that model_net has
 *       transmitted the data off of the local node, but it does not mean that
 *       the data has been (or even will be) delivered.  Once this event is
 *       delivered the caller is free to re-use its buffer.
 * - self_event: pionter to data to be used as the self event message
 * - sender: pointer to the tw_lp structure of the API caller.  This is
 *     identical to the sender argument to tw_event_new().
 */
// first argument becomes the network ID
void model_net_event(
    int net_id,
    char* category, 
    tw_lpid final_dest_lp, 
164
    uint64_t message_size, 
165
    tw_stime offset,
166
167
168
169
170
171
    int remote_event_size,
    const void* remote_event,
    int self_event_size,
    const void* self_event,
    tw_lp *sender);

172
173
174
175
/* model_net_find_local_device()
 *
 * returns the LP id of the network card attached to the calling LP
 */
176
177
178
179
180
tw_lpid model_net_find_local_device(
        int          net_id,
        const char * annotation,
        int          ignore_annotations,
        tw_lp      * sender);
181

182
183
int model_net_get_msg_sz(int net_id);

184
185
186
187
188
189
190
191
192
193
194
195
196
/* model_net_event_rc()
 *
 * This function does reverse computation for the model_net_event_new()
 * function.
 * - sender: pointer to the tw_lp structure of the API caller.  This is
 *   identical to the sender argument to tw_event_new().
 */
/* NOTE: we may end up needing additoinal arguments here to track state for
 * reverse computation; add as needed 
 */
void model_net_event_rc(
    int net_id,
    tw_lp *sender,
197
    uint64_t message_size);
198

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224

/* Issue a 'pull' from the memory of the destination LP, without
 * requiring the destination LP to do event processing. This is meant as a
 * simulation-based abstraction of RDMA. A control packet will be sent to the
 * destination LP, the payload will be sent back to the requesting LP, and the
 * requesting LP will be issued it's given completion event.
 *
 * Parameters are largely the same as model_net_event, with the following
 * exceptions:
 * - final_dest_lp is the lp to pull data from
 * - self_event_size, self_event are applied at the requester upon receipt of 
 *   the payload from the dest
 */
void model_net_pull_event(
        int net_id,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
        const void *self_event,
        tw_lp *sender);
void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender);

225
226
227
/* returns pointer to LP information for simplenet module */
const tw_lptype* model_net_get_lp_type(int net_id);

228
uint64_t model_net_get_packet_size(int net_id);
229

230
231
232
/* used for reporting overall network statistics for e.g. average latency ,
 * maximum latency, total number of packets finished during the entire
 * simulation etc. */
233
void model_net_report_stats(int net_id);
234

235
/* writing model-net statistics on a per LP basis */
236
237
void model_net_write_stats(tw_lpid lpid, mn_stats* stat);

238
/* printing model-net statistics on a per LP basis */
239
240
241
242
void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[]);

/* find model-net statistics */
mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[]);
243
244
245
246
247
248
249
250
251
252
#endif /* MODELNET_H */

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */