codes-workload.h 10.9 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

7
/* I/O workload generator API to be used for reading I/O operations into
8 9 10
 * storage system simulations.  This API just describes the operations to be
 * executed; it does not service the operations.
 */
11 12 13 14

#ifndef CODES_WORKLOAD_H
#define CODES_WORKLOAD_H

Jonathan Jenkins's avatar
Jonathan Jenkins committed
15 16 17 18
#ifdef __cplusplus
extern "C" {
#endif

19 20 21
#include <ross.h>
#include "configuration.h"

22 23 24
#ifdef USE_ONLINE
#include <abt.h>
#endif
25
#define MAX_NAME_LENGTH_WKLD 512
26

27
/* implementations included with codes */
28
typedef struct iomock_params iomock_params;
29
typedef struct iolang_params iolang_params;
30
typedef struct darshan_params darshan_params;
31
typedef struct recorder_params recorder_params;
32

33 34
/* struct to hold the actual data from a single MPI event*/
typedef struct dumpi_trace_params dumpi_trace_params;
35
typedef struct checkpoint_wrkld_params checkpoint_wrkld_params;
36
typedef struct online_comm_params online_comm_params;
37

38 39 40 41 42 43 44 45 46 47 48 49
struct iomock_params
{
    uint64_t file_id;
    int use_uniq_file_ids;
    int is_write;
    int num_requests;
    int request_size;
    // for optimizing lookup - set higher (>= num ranks) to reduce collisions
    // and 0 to use the default
    int rank_table_size;
};

50
struct iolang_params
51
{
Jonathan Jenkins's avatar
Jonathan Jenkins committed
52
    /* the rank count is defined in the workload config file */
53
    int num_cns;
54 55
    /* flag - use path to find kernel files relative to the metafile */
    int use_relpath;
56
    char io_kernel_meta_path[MAX_NAME_LENGTH_WKLD];
57
    /* set by config in the metadata path */
58
    char io_kernel_path[MAX_NAME_LENGTH_WKLD];
59 60
};

61 62 63
struct darshan_params
{
    char log_file_path[MAX_NAME_LENGTH_WKLD];
64
    int64_t aggregator_cnt;
65 66
};

67 68 69
struct recorder_params
{
    char trace_dir_path[MAX_NAME_LENGTH_WKLD];
70
    int64_t nprocs;
71 72
};

73 74
struct dumpi_trace_params {
   char file_name[MAX_NAME_LENGTH_WKLD];
75
   int num_net_traces;
76
   int nprocs;
77
#ifdef ENABLE_CORTEX_PYTHON
78 79
   char cortex_script[MAX_NAME_LENGTH_WKLD];
   char cortex_class[MAX_NAME_LENGTH_WKLD];
80
   char cortex_gen[MAX_NAME_LENGTH_WKLD];
81
#endif
82 83
};

84 85 86 87 88
struct online_comm_params {
    char workload_name[MAX_NAME_LENGTH_WKLD];
    char file_path[MAX_NAME_LENGTH_WKLD];
    int nprocs;
};
89 90 91 92 93
struct checkpoint_wrkld_params
{
    int nprocs; /* number of workload processes */
    double checkpoint_sz; /* size of checkpoint, in TiB */
    double checkpoint_wr_bw; /* checkpoint write b/w, in GiB/s */
94
    int total_checkpoints; /* total number of checkpoint phases */
95 96
    double mtti; /* mean time to interrupt, in hours */
};
97

98 99 100 101
/* supported I/O operations */
enum codes_workload_op_type
{
    /* terminator; there are no more operations for this rank */
102
    CODES_WK_END = 1,
103 104 105 106
    /* sleep/delay to simulate computation or other activity */
    CODES_WK_DELAY,
    /* block until specified ranks have reached the same point */
    CODES_WK_BARRIER,
107 108

    /* IO operations */
109 110
    /* open */
    CODES_WK_OPEN,
111
    /* close */
112 113 114 115
    CODES_WK_CLOSE,
    /* write */
    CODES_WK_WRITE,
    /* read */
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
    CODES_WK_READ,

    /* network operations (modelled after MPI operations) */
    /* blocking send operation */
    CODES_WK_SEND,
    /* blocking recv operation */
    CODES_WK_RECV,
    /* non-blocking send operation */
    CODES_WK_ISEND,
    /* non-blocking receive operation */
    CODES_WK_IRECV,
    /* broadcast operation */
    CODES_WK_BCAST,
    /* Allgather operation */
    CODES_WK_ALLGATHER,
    /* Allgatherv operation */
    CODES_WK_ALLGATHERV,
    /* Alltoall operation */
    CODES_WK_ALLTOALL,
    /* Alltoallv operation */
    CODES_WK_ALLTOALLV,
    /* Reduce operation */
    CODES_WK_REDUCE,
    /* Allreduce operation */
    CODES_WK_ALLREDUCE,
    /* Generic collective operation */
    CODES_WK_COL,
143
    /* Waitall operation */
144
    CODES_WK_WAITALL,
145
    /* Wait operation */
146
    CODES_WK_WAIT,
147
    /* Waitsome operation */
148
    CODES_WK_WAITSOME,
149
    /* Waitany operation */
150
    CODES_WK_WAITANY,
151
    /* Testall operation */
152
    CODES_WK_TESTALL,
153 154
    /* MPI request free operation*/
    CODES_WK_REQ_FREE,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
155 156 157 158

    /* for workloads that have events not yet handled
     * (eg the workload language) */
    CODES_WK_IGNORE
159 160 161 162 163 164 165 166 167
};

/* I/O operation paramaters */
struct codes_workload_op
{
    /* TODO: do we need different "classes" of operations to differentiate
     * between different APIs?
     */

168
    /* what type of operation this is */
169
    int op_type;
170 171 172
    /* currently only used by network workloads */
    double start_time;
    double end_time;
173
    double sim_start_time;
174

175
    int64_t sequence_id;
176
    /* parameters for each operation type */
177 178 179 180
    union
    {
        struct {
            double seconds;
181
	    double nsecs;
182 183
        } delay;
        struct {
Philip Carns's avatar
Philip Carns committed
184
            int count;  /* num ranks in barrier, -1 means "all" */
185 186 187
            int root;   /* root rank */
        } barrier;
        struct {
188
            uint64_t file_id;      /* integer identifier for the file */
189 190 191
            int create_flag;  /* file must be created, not just opened */
        } open;
        struct {
192
            uint64_t file_id;  /* file to operate on */
193 194 195 196
            off_t offset; /* offset and size */
            size_t size;
        } write;
        struct {
197
            uint64_t file_id;  /* file to operate on */
198 199 200 201
            off_t offset; /* offset and size */
            size_t size;
        } read;
        struct {
202
            uint64_t file_id;  /* file to operate on */
203
        } close;
204 205 206 207
        struct {
            /* TODO: not sure why source rank is here */
            int source_rank;/* source rank of MPI send message */
            int dest_rank; /* dest rank of MPI send message */
208
            int64_t num_bytes; /* number of bytes to be transferred over the network */
209
            int16_t data_type; /* MPI data type to be matched with the recv */
210 211
            int count; /* number of elements to be received */
            int tag; /* tag of the message */
212
            unsigned int req_id;
213 214 215 216 217
        } send;
        struct {
            /* TODO: not sure why source rank is here */
            int source_rank;/* source rank of MPI recv message */
            int dest_rank;/* dest rank of MPI recv message */
218
            int64_t num_bytes; /* number of bytes to be transferred over the network */
219
            int16_t data_type; /* MPI data type to be matched with the send */
220 221
            int count; /* number of elements to be sent */
            int tag; /* tag of the message */
222
            unsigned int req_id;
223 224 225 226 227
        } recv;
        /* TODO: non-stub for other collectives */
        struct {
            int num_bytes;
        } collective;
228 229
        struct {
            int count;
230
            uint32_t* req_ids;
231 232
        } waits;
        struct {
233
            uint32_t req_id;
234
        } wait;
235 236
        struct
        {
237
            uint32_t req_id;
238 239
        }
        free;
240
    }u;
241 242
};

243 244 245 246 247 248 249
// helper macro for implementations - call this if multi-app support not
// available
#define APP_ID_UNSUPPORTED(id, name) \
    if (id != 0) \
        tw_error(TW_LOC,\
                "APP IDs not supported for %s generator, 0 required", name);

250 251 252 253 254 255 256 257 258
/* read workload configuration from a CODES configuration file and return the
 * workload name and parameters, which can then be passed to
 * codes_workload_load */
typedef struct
{
    char const * type;
    void * params;
} codes_workload_config_return;

259 260 261
// NOTE: some workloads (iolang, checkpoint) require information about the
// total number of ranks to correctly process traces/config files, etc. Other
// workload generators (darshan) ignore it
262 263
codes_workload_config_return codes_workload_read_config(
        ConfigHandle * handle,
264 265 266
        char const * section_name,
        char const * annotation,
        int num_ranks);
267 268 269

void codes_workload_free_config_return(codes_workload_config_return *c);

270 271
/* load and initialize workload of of type "type" with parameters specified by
 * "params".  The rank is the caller's relative rank within the collection
272 273 274
 * of processes that will participate in this workload. The app_id is the
 * "application" that the rank is participating in, used to differentiate
 * between multiple, concurrent workloads
275 276 277 278
 *
 * This function is intended to be called by a compute node LP in a model
 * and may be called multiple times over the course of a
 * simulation in order to execute different application workloads.
279
 *
280 281 282
 * Returns and identifier that can be used to retrieve operations later.
 * Returns -1 on failure.
 */
283 284 285 286 287
int codes_workload_load(
        const char* type,
        const char* params,
        int app_id,
        int rank);
288 289 290 291 292

/* Retrieves the next I/O operation to execute.  the wkld_id is the
 * identifier returned by the init() function.  The op argument is a pointer
 * to a structure to be filled in with I/O operation information.
 */
293 294 295 296 297
void codes_workload_get_next(
        int wkld_id,
        int app_id,
        int rank,
        struct codes_workload_op *op);
298 299

/* Reverse of the above function. */
300 301 302 303 304
void codes_workload_get_next_rc(
        int wkld_id,
        int app_id,
        int rank,
        const struct codes_workload_op *op);
305

306 307 308 309 310 311
/* Another version of reverse handler. */
void codes_workload_get_next_rc2(
                int wkld_id,
                int app_id,
                int rank);

312
/* Retrieve the number of ranks contained in a workload */
313 314 315 316
int codes_workload_get_rank_cnt(
        const char* type,
        const char* params,
        int app_id);
317

318 319 320 321 322 323 324
/* Finalize the workload */
int codes_workload_finalize(
        const char* type,
        const char* params,
        int app_id, 
        int rank);

325
/* for debugging/logging: print an individual operation to the specified file */
326 327 328 329 330
void codes_workload_print_op(
        FILE *f,
        struct codes_workload_op *op,
        int app_id,
        int rank);
331

332 333 334 335 336 337 338 339 340
/* implementation structure */
struct codes_workload_method
{
    char *method_name; /* name of the generator */
    void * (*codes_workload_read_config) (
            ConfigHandle *handle, char const * section_name,
            char const * annotation, int num_ranks);
    int (*codes_workload_load)(const char* params, int app_id, int rank);
    void (*codes_workload_get_next)(int app_id, int rank, struct codes_workload_op *op);
341
    void (*codes_workload_get_next_rc2)(int app_id, int rank);
342
    int (*codes_workload_get_rank_cnt)(const char* params, int app_id);
343
    int (*codes_workload_finalize)(const char* params, int app_id, int rank);
344 345
};

346

347 348 349 350
/* dynamically add to the workload implementation table. Must be done BEFORE
 * calls to codes_workload_read_config or codes_workload_load */
void codes_workload_add_method(struct codes_workload_method const * method);

351 352 353 354 355 356
/* NOTE: there is deliberately no finalize function; we don't have any
 * reliable way to tell when a workload is truly done and will not
 * participate in further reverse computation.   The underlying generators
 * will shut down automatically once they have issued their last event.
 */

Jonathan Jenkins's avatar
Jonathan Jenkins committed
357 358 359 360
#ifdef __cplusplus
}
#endif

361 362 363 364 365 366
#endif /* CODES_WORKLOAD_H */

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
367
 *  indent-tabs-mode: nil
368 369 370 371
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */