model-net.h 8.31 KB
Newer Older
1
/*
Philip Carns's avatar
Philip Carns committed
2
 * Copyright (C) 2013 University of Chicago.
3
 * See COPYRIGHT notice in top-level directory.
Philip Carns's avatar
Philip Carns committed
4
 *
5 6 7 8 9 10
 */

#ifndef MODELNET_H
#define MODELNET_H

#include "ross.h"
11
#include "codes/lp-type-lookup.h"
12
#include "codes/configuration.h"
13
#include "codes/lp-io.h"
14
#include <stdint.h>
15

16 17
#define PULL_MSG_SIZE 128

18
#define MAX_NAME_LENGTH 256
19 20
#define CATEGORY_NAME_MAX 16
#define CATEGORY_MAX 12
21

22 23 24 25 26
/* HACK: there is currently no scheduling fidelity across multiple
 * model_net_event calls. Hence, problems arise when some LP sends multiple
 * messages as part of an event and expects FCFS ordering. A proper fix which
 * involves model-net LP-level scheduling of requests is ideal, but not 
 * feasible for now (would basically have to redesign model-net), so expose
27 28
 * explicit start-sequence and stop-sequence markers as a workaround
 */
29 30 31 32 33 34 35 36 37 38 39
extern int in_sequence;
extern tw_stime mn_msg_offset;
#define MN_START_SEQ() do {\
    in_sequence = 1; \
    mn_msg_offset = 0.0; \
} while (0)
#define MN_END_SEQ() do {\
    in_sequence = 0;\
} while (0)


40
typedef struct mn_stats mn_stats;
41

42 43 44 45 46 47 48 49 50 51 52 53
// use the X-macro to get types and names rolled up into one structure
// format: { enum vals, config name, internal lp name, lp method struct}
// last value is sentinel
#define NETWORK_DEF \
    X(SIMPLENET, "modelnet_simplenet", "simplenet", &simplenet_method)\
    X(SIMPLEWAN, "modelnet_simplewan", "simplewan", &simplewan_method)\
    X(TORUS,     "modelnet_torus",     "torus",     &torus_method)\
    X(DRAGONFLY, "modelnet_dragonfly", "dragonfly", &dragonfly_method)\
    X(LOGGP,     "modelnet_loggp",     "loggp",     &loggp_method)\
    X(MAX_NETS,  NULL,                 NULL,        NULL)

#define X(a,b,c,d) a,
54 55
enum NETWORKS
{
56
    NETWORK_DEF
57
};
58 59 60 61 62 63
#undef X

// network identifiers (both the config lp names and the model-net internal
// names)
extern char * model_net_lp_config_names[];
extern char * model_net_method_names[];
64

65 66 67 68 69 70 71 72 73 74 75 76 77 78
// request structure that gets passed around (by the model-net implementation,
// not the user)
typedef struct model_net_request {
    tw_lpid  final_dest_lp;
    tw_lpid  src_lp;
    uint64_t msg_size;
    uint64_t packet_size;
    int      net_id;
    int      is_pull;
    int      remote_event_size;
    int      self_event_size;
    char     category[CATEGORY_NAME_MAX];
} model_net_request;

79 80 81 82 83 84 85 86 87 88 89 90 91
/* data structure for tracking network statistics */
struct mn_stats
{
    char category[CATEGORY_NAME_MAX];
    long send_count;
    long send_bytes;
    tw_stime send_time;
    long recv_count;
    long recv_bytes;
    tw_stime recv_time;
    long max_event_size;
};

92 93 94
/* Registers all model-net LPs in ROSS. Should be called after 
 * configuration_load, but before codes_mapping_setup */
void model_net_register();
95

96 97 98 99 100 101 102 103
/* Configures all model-net LPs based on the CODES configuration, and returns
 * ids to address the different types by.
 *
 * id_count - the output number of networks
 *
 * return - the set of network IDs, indexed in the order given by the
 * modelnet_order configuration parameter */
int* model_net_configure(int *id_count);
104 105 106 107 108 109

/* Initialize/configure the network(s) based on the CODES configuration.
 * returns an array of the network ids, indexed in the order given by the 
 * modelnet_order configuration parameter 
 * OUTPUT id_count - the output number of networks */
int* model_net_set_params(int *id_count);
110 111

// setup the modelnet parameters
112
int model_net_setup(char* net_name, uint64_t packet_size, const void* net_params);
113 114 115

/* utility function to get the modelnet ID post-setup */
int model_net_get_id(char *net_name);
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131

/* This event does a collective operation call for model-net */
void model_net_event_collective(
    int net_id,
    char* category,
    int message_size,
    int remote_event_size,
    const void* remote_event,
    tw_lp *sender);

/* reverse event of the collective operation call */
void model_net_event_collective_rc(
        int net_id,
        int message_size,
        tw_lp *sender);

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
/* allocate and transmit a new event that will pass through model_net to 
 * arrive at its destination:
 *
 * - category: category name to associate with this communication
 *   - OPTIONAL: callers can set this to NULL if they don't want to use it,
 *     and model_net methods can ignore it if they don't support it
 * - final_dest_lp: the LP that the message should be delivered to.
 *   - NOTE: this is _not_ the LP of an underlying network method (for
 *     example, it is not a torus or dragonfly LP), but rather the LP of an
 *     MPI process or storage server that you are transmitting to.
 * - message_size: this is the size of the message (in bytes) that modelnet
 *     will simulate transmitting to the final_dest_lp.  It can be any size
 *     (i.e. it is not constrained by transport packet size).
 * - remote_event_size: this is the size of the ROSS event structure that
 *     will be delivered to the final_dest_lp.
 * - remote_event: pointer ot data to be used as the remove event message
 * - self_event_size: this is the size of the ROSS event structure that will
 *     be delivered to the calling LP once local completion has occurred for
 *     the network transmission.
 *     - NOTE: "local completion" in this sense means that model_net has
 *       transmitted the data off of the local node, but it does not mean that
 *       the data has been (or even will be) delivered.  Once this event is
 *       delivered the caller is free to re-use its buffer.
 * - self_event: pionter to data to be used as the self event message
 * - sender: pointer to the tw_lp structure of the API caller.  This is
 *     identical to the sender argument to tw_event_new().
 */
// first argument becomes the network ID
void model_net_event(
    int net_id,
    char* category, 
    tw_lpid final_dest_lp, 
164
    uint64_t message_size, 
165
    tw_stime offset,
166 167 168 169 170 171
    int remote_event_size,
    const void* remote_event,
    int self_event_size,
    const void* self_event,
    tw_lp *sender);

172 173 174 175
/* model_net_find_local_device()
 *
 * returns the LP id of the network card attached to the calling LP
 */
176 177 178 179 180
tw_lpid model_net_find_local_device(
        int          net_id,
        const char * annotation,
        int          ignore_annotations,
        tw_lp      * sender);
181

182 183
int model_net_get_msg_sz(int net_id);

184 185 186 187 188 189 190 191 192 193 194 195 196
/* model_net_event_rc()
 *
 * This function does reverse computation for the model_net_event_new()
 * function.
 * - sender: pointer to the tw_lp structure of the API caller.  This is
 *   identical to the sender argument to tw_event_new().
 */
/* NOTE: we may end up needing additoinal arguments here to track state for
 * reverse computation; add as needed 
 */
void model_net_event_rc(
    int net_id,
    tw_lp *sender,
197
    uint64_t message_size);
198

199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224

/* Issue a 'pull' from the memory of the destination LP, without
 * requiring the destination LP to do event processing. This is meant as a
 * simulation-based abstraction of RDMA. A control packet will be sent to the
 * destination LP, the payload will be sent back to the requesting LP, and the
 * requesting LP will be issued it's given completion event.
 *
 * Parameters are largely the same as model_net_event, with the following
 * exceptions:
 * - final_dest_lp is the lp to pull data from
 * - self_event_size, self_event are applied at the requester upon receipt of 
 *   the payload from the dest
 */
void model_net_pull_event(
        int net_id,
        char *category,
        tw_lpid final_dest_lp,
        uint64_t message_size,
        tw_stime offset,
        int self_event_size,
        const void *self_event,
        tw_lp *sender);
void model_net_pull_event_rc(
        int net_id,
        tw_lp *sender);

225 226 227
/* returns pointer to LP information for simplenet module */
const tw_lptype* model_net_get_lp_type(int net_id);

228
uint64_t model_net_get_packet_size(int net_id);
229

230 231 232
/* used for reporting overall network statistics for e.g. average latency ,
 * maximum latency, total number of packets finished during the entire
 * simulation etc. */
233
void model_net_report_stats(int net_id);
234

235
/* writing model-net statistics on a per LP basis */
236 237
void model_net_write_stats(tw_lpid lpid, mn_stats* stat);

238
/* printing model-net statistics on a per LP basis */
239 240 241 242
void model_net_print_stats(tw_lpid lpid, mn_stats mn_stats_array[]);

/* find model-net statistics */
mn_stats* model_net_find_stats(const char* category, mn_stats mn_stats_array[]);
243 244 245 246 247 248 249 250 251 252
#endif /* MODELNET_H */

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */