model-net.h 9.45 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5 6 7 8 9 10
 */

#ifndef MODELNET_H
#define MODELNET_H

#include "ross.h"
11
#include "codes/lp-type-lookup.h"
12
#include "codes/configuration.h"
13
#include "codes/lp-io.h"
14
#include <stdint.h>
15

16 17
#define PULL_MSG_SIZE 128

18
#define MAX_NAME_LENGTH 256
19 20
#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12
21

22 23 24 25 26
/* HACK: there is currently no scheduling fidelity across multiple
 * model_net_event calls. Hence, problems arise when some LP sends multiple
 * messages as part of an event and expects FCFS ordering. A proper fix which
 * involves model-net LP-level scheduling of requests is ideal, but not 
 * feasible for now (would basically have to redesign model-net), so expose
27 28
 * explicit start-sequence and stop-sequence markers as a workaround
 */
29 30 31 32 33 34 35 36 37 38 39
extern int in_sequence;
extern tw_stime mn_msg_offset;
#define MN_START_SEQ() do {\
    in_sequence = 1; \
    mn_msg_offset = 0.0; \
} while (0)
#define MN_END_SEQ() do {\
    in_sequence = 0;\
} while (0)


40
typedef struct mn_stats mn_stats;
41

42 43 44 45 46 47 48 49 50 51 52 53
// use the X-macro to get types and names rolled up into one structure
// format: { enum vals, config name, internal lp name, lp method struct}
// last value is sentinel
#define NETWORK_DEF \
    X(SIMPLENET, "modelnet_simplenet", "simplenet", &simplenet_method)\
    X(SIMPLEWAN, "modelnet_simplewan", "simplewan", &simplewan_method)\
    X(TORUS,     "modelnet_torus",     "torus",     &torus_method)\
    X(DRAGONFLY, "modelnet_dragonfly", "dragonfly", &dragonfly_method)\
    X(LOGGP,     "modelnet_loggp",     "loggp",     &loggp_method)\
    X(MAX_NETS,  NULL,                 NULL,        NULL)

#define X(a,b,c,d) a,
54 55
enum NETWORKS
{
56
    NETWORK_DEF
57
};
58 59 60 61 62 63
#undef X

// network identifiers (both the config lp names and the model-net internal
// names)
extern char * model_net_lp_config_names[];
extern char * model_net_method_names[];
64

65 66 67 68 69 70 71 72 73 74 75 76 77 78
// request structure that gets passed around (by the model-net implementation,
// not the user)
typedef struct model_net_request {
    tw_lpid  final_dest_lp;
    tw_lpid  src_lp;
    uint64_t msg_size;
    uint64_t packet_size;
    int      net_id;
    int      is_pull;
    int      remote_event_size;
    int      self_event_size;
    char     category[CATEGORY_NAME_MAX];
} model_net_request;

79 80 81 82 83 84 85 86 87 88 89 90 91
/* data structure for tracking network statistics */
struct mn_stats
{
    char category[CATEGORY_NAME_MAX];
    long send_count;
    long send_bytes;
    tw_stime send_time;
    long recv_count;
    long recv_bytes;
    tw_stime recv_time;
    long max_event_size;
};

92 93 94
/* Registers all model-net LPs in ROSS. Should be called after 
 * configuration_load, but before codes_mapping_setup */
void model_net_register();
95

96 97 98 99 100 101 102 103
/* Configures all model-net LPs based on the CODES configuration, and returns
 * ids to address the different types by.
 *
 * id_count - the output number of networks
 *
 * return - the set of network IDs, indexed in the order given by the
 * modelnet_order configuration parameter */
int* model_net_configure(int *id_count);
104 105 106 107 108 109

/* Initialize/configure the network(s) based on the CODES configuration.
 * returns an array of the network ids, indexed in the order given by the 
 * modelnet_order configuration parameter 
 * OUTPUT id_count - the output number of networks */
int* model_net_set_params(int *id_count);
110 111

// setup the modelnet parameters
112
int model_net_setup(char* net_name, uint64_t packet_size, const void* net_params);
113 114 115

/* utility function to get the modelnet ID post-setup */
int model_net_get_id(char *net_name);
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131

/* This event does a collective operation call for model-net */
void model_net_event_collective(
    int net_id,
    char* category,
    int message_size,
    int remote_event_size,
    const void* remote_event,
    tw_lp *sender);

/* reverse event of the collective operation call */
void model_net_event_collective_rc(
        int net_id,
        int message_size,
        tw_lp *sender);

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
/* allocate and transmit a new event that will pass through model_net to 
 * arrive at its destination:
 *
 * - category: category name to associate with this communication
 *   - OPTIONAL: callers can set this to NULL if they don't want to use it,
 *     and model_net methods can ignore it if they don't support it
 * - final_dest_lp: the LP that the message should be delivered to.
 *   - NOTE: this is _not_ the LP of an underlying network method (for
 *     example, it is not a torus or dragonfly LP), but rather the LP of an
 *     MPI process or storage server that you are transmitting to.
 * - message_size: this is the size of the message (in bytes) that modelnet
 *     will simulate transmitting to the final_dest_lp.  It can be any size
 *     (i.e. it is not constrained by transport packet size).
 * - remote_event_size: this is the size of the ROSS event structure that
 *     will be delivered to the final_dest_lp.
 * - remote_event: pointer ot data to be used as the remove event message
 * - self_event_size: this is the size of the ROSS event structure that will
 *     be delivered to the calling LP once local completion has occurred for
 *     the network transmission.
 *     - NOTE: "local completion" in this sense means that model_net has
 *       transmitted the data off of the local node, but it does not mean that
 *       the data has been (or even will be) delivered.  Once this event is
 *       delivered the caller is free to re-use its buffer.
 * - self_event: pionter to data to be used as the self event message
 * - sender: pointer to the tw_lp structure of the API caller.  This is
 *     identical to the sender argument to tw_event_new().
158 159 160 161 162
 *
 * The modelnet LP used for communication is the LP in the same group, same
 * repetition, using net_id to differentiate different model types. If
 * more than one modelnet model of the same type but different annotation exist,
 * then the first one listed will be used.
163 164 165 166 167 168
 */
// first argument becomes the network ID
void model_net_event(
    int net_id,
    char* category, 
    tw_lpid final_dest_lp, 
169
    uint64_t message_size, 
170
    tw_stime offset,
171 172 173 174 175
    int remote_event_size,
    const void* remote_event,
    int self_event_size,
    const void* self_event,
    tw_lp *sender);
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
/*
 * See model_net_event for a general description.
 *
 * Unlike model_net_event, this function uses the annotation to differentiate
 * multiple modelnet LPs with the same type but different annotation. The caller
 * annotation is not consulted here.
 */
void model_net_event_annotated(
        int net_id,
        const char * annotation,
        char* category, 
        tw_lpid final_dest_lp, 
        uint64_t message_size, 
        tw_stime offset,
        int remote_event_size,
        const void* remote_event,
        int self_event_size,
        const void* self_event,
        tw_lp *sender);
195

196 197 198 199
/* model_net_find_local_device()
 *
 * returns the LP id of the network card attached to the calling LP
 */
200 201 202 203 204
tw_lpid model_net_find_local_device(
        int          net_id,
        const char * annotation,
        int          ignore_annotations,
        tw_lp      * sender);
205

206 207
int model_net_get_msg_sz(int net_id);

208 209 210 211 212 213 214 215 216 217 218 219 220
/* model_net_event_rc()
 *
 * This function does reverse computation for the model_net_event_new()
 * function.
 * - sender: pointer to the tw_lp structure of the API caller.  This is
 *   identical to the sender argument to tw_event_new().
 */
/* NOTE: we may end up needing additoinal arguments here to track state for
 * reverse computation; add as needed 
 */
void model_net_event_rc(
    int net_id,
    tw_lp *sender,
221
    uint64_t message_size);
222

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242

/* Issue a 'pull' from the memory of the destination LP, without
 * requiring the destination LP to do event processing. This is meant as a
 * simulation-based abstraction of RDMA. A control packet will be sent to the
 * destination LP, the payload will be sent back to the requesting LP, and the
 * requesting LP will be issued it's given completion event.
 *
 * Parameters are largely the same as model_net_event, with the following
 * exceptions:
 * - final_dest_lp is the lp to pull data from
 * - self_event_size, self_event are applied at the requester upon receipt of 
 *   the payload from the dest
 */
void model_net_pull_event(
        int net_id,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
243 244 245 246 247 248 249 250 251 252
        const void *self_event,
        tw_lp *sender);
void model_net_pull_event_annotated(
        int net_id,
        const char * annotation,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
253 254 255 256 257 258
        const void *self_event,
        tw_lp *sender);
void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender);

259 260 261
/* returns pointer to LP information for simplenet module */
const tw_lptype* model_net_get_lp_type(int net_id);

262
uint64_t model_net_get_packet_size(int net_id);
263

264 265 266
/* used for reporting overall network statistics for e.g. average latency ,
 * maximum latency, total number of packets finished during the entire
 * simulation etc. */
267
void model_net_report_stats(int net_id);
268

269
/* writing model-net statistics on a per LP basis */
270 271
void model_net_write_stats(tw_lpid lpid, mn_stats* stat);

272
/* printing model-net statistics on a per LP basis */
273 274 275 276
void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[]);

/* find model-net statistics */
mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[]);
277 278 279 280 281 282 283 284 285 286
#endif /* MODELNET_H */

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */