codes-workload.h 11.6 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

7
/* I/O workload generator API to be used for reading I/O operations into
8 9 10
 * storage system simulations.  This API just describes the operations to be
 * executed; it does not service the operations.
 */
11 12 13 14

#ifndef CODES_WORKLOAD_H
#define CODES_WORKLOAD_H

Jonathan Jenkins's avatar
Jonathan Jenkins committed
15 16 17 18
#ifdef __cplusplus
extern "C" {
#endif

19 20 21
#include <ross.h>
#include "configuration.h"

22 23 24
#ifdef USE_ONLINE
#include <abt.h>
#endif
25
#define MAX_NAME_LENGTH_WKLD 512
26

27
/* implementations included with codes */
28
typedef struct iomock_params iomock_params;
29
typedef struct iolang_params iolang_params;
30
typedef struct darshan_params darshan_params;
31
typedef struct recorder_params recorder_params;
32

33 34
/* struct to hold the actual data from a single MPI event*/
typedef struct dumpi_trace_params dumpi_trace_params;
35
typedef struct checkpoint_wrkld_params checkpoint_wrkld_params;
36
typedef struct online_comm_params online_comm_params;
37

38 39 40 41 42 43 44 45 46 47 48 49
struct iomock_params
{
    uint64_t file_id;
    int use_uniq_file_ids;
    int is_write;
    int num_requests;
    int request_size;
    // for optimizing lookup - set higher (>= num ranks) to reduce collisions
    // and 0 to use the default
    int rank_table_size;
};

50
struct iolang_params
51
{
Jonathan Jenkins's avatar
Jonathan Jenkins committed
52
    /* the rank count is defined in the workload config file */
53
    int num_cns;
54 55
    /* flag - use path to find kernel files relative to the metafile */
    int use_relpath;
56
    char io_kernel_meta_path[MAX_NAME_LENGTH_WKLD];
57
    /* set by config in the metadata path */
58
    char io_kernel_path[MAX_NAME_LENGTH_WKLD];
59 60
};

61 62 63
struct darshan_params
{
    char log_file_path[MAX_NAME_LENGTH_WKLD];
64
    int app_cnt;
65 66
};

67 68 69
struct recorder_params
{
    char trace_dir_path[MAX_NAME_LENGTH_WKLD];
70
    int64_t nprocs;
71 72
};

73 74
struct dumpi_trace_params {
   char file_name[MAX_NAME_LENGTH_WKLD];
75
   int num_net_traces;
76
   int nprocs;
77
#ifdef ENABLE_CORTEX_PYTHON
78 79
   char cortex_script[MAX_NAME_LENGTH_WKLD];
   char cortex_class[MAX_NAME_LENGTH_WKLD];
80
   char cortex_gen[MAX_NAME_LENGTH_WKLD];
81
#endif
82 83
};

84 85 86 87 88
struct online_comm_params {
    char workload_name[MAX_NAME_LENGTH_WKLD];
    char file_path[MAX_NAME_LENGTH_WKLD];
    int nprocs;
};
89 90 91 92 93
struct checkpoint_wrkld_params
{
    int nprocs; /* number of workload processes */
    double checkpoint_sz; /* size of checkpoint, in TiB */
    double checkpoint_wr_bw; /* checkpoint write b/w, in GiB/s */
94
    int total_checkpoints; /* total number of checkpoint phases */
95 96
    double mtti; /* mean time to interrupt, in hours */
};
97

98 99 100 101
/* supported I/O operations */
enum codes_workload_op_type
{
    /* terminator; there are no more operations for this rank */
102
    CODES_WK_END = 1,
103 104 105 106
    /* sleep/delay to simulate computation or other activity */
    CODES_WK_DELAY,
    /* block until specified ranks have reached the same point */
    CODES_WK_BARRIER,
107 108

    /* IO operations */
109 110
    /* open */
    CODES_WK_OPEN,
111
    /* close */
112 113 114 115
    CODES_WK_CLOSE,
    /* write */
    CODES_WK_WRITE,
    /* read */
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
    CODES_WK_READ,

    /* network operations (modelled after MPI operations) */
    /* blocking send operation */
    CODES_WK_SEND,
    /* blocking recv operation */
    CODES_WK_RECV,
    /* non-blocking send operation */
    CODES_WK_ISEND,
    /* non-blocking receive operation */
    CODES_WK_IRECV,
    /* broadcast operation */
    CODES_WK_BCAST,
    /* Allgather operation */
    CODES_WK_ALLGATHER,
    /* Allgatherv operation */
    CODES_WK_ALLGATHERV,
    /* Alltoall operation */
    CODES_WK_ALLTOALL,
    /* Alltoallv operation */
    CODES_WK_ALLTOALLV,
    /* Reduce operation */
    CODES_WK_REDUCE,
    /* Allreduce operation */
    CODES_WK_ALLREDUCE,
    /* Generic collective operation */
    CODES_WK_COL,
143
    /* Waitall operation */
144
    CODES_WK_WAITALL,
145
    /* Wait operation */
146
    CODES_WK_WAIT,
147
    /* Waitsome operation */
148
    CODES_WK_WAITSOME,
149
    /* Waitany operation */
150
    CODES_WK_WAITANY,
151
    /* Testall operation */
152
    CODES_WK_TESTALL,
153 154
    /* MPI request free operation*/
    CODES_WK_REQ_FREE,
Jonathan Jenkins's avatar
Jonathan Jenkins committed
155 156 157

    /* for workloads that have events not yet handled
     * (eg the workload language) */
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
    CODES_WK_IGNORE, 

    /* extended IO workload operations: MPI */

    /* open */
    CODES_WK_MPI_OPEN,
    /* close */
    CODES_WK_MPI_CLOSE,
    /* write */
    CODES_WK_MPI_WRITE,
    /* read */
    CODES_WK_MPI_READ,
    /* collective open */
    CODES_WK_MPI_COLL_OPEN,
    /* collective_write */
    CODES_WK_MPI_COLL_WRITE,
    /* collective_read */
    CODES_WK_MPI_COLL_READ,
176 177 178 179 180 181 182 183 184
};

/* I/O operation paramaters */
struct codes_workload_op
{
    /* TODO: do we need different "classes" of operations to differentiate
     * between different APIs?
     */

185
    /* what type of operation this is */
186
    enum codes_workload_op_type op_type;
187 188 189
    /* currently only used by network workloads */
    double start_time;
    double end_time;
190
    double sim_start_time;
191

192
    int64_t sequence_id;
193
    /* parameters for each operation type */
194 195 196 197
    union
    {
        struct {
            double seconds;
198
	    double nsecs;
199 200
        } delay;
        struct {
Philip Carns's avatar
Philip Carns committed
201
            int count;  /* num ranks in barrier, -1 means "all" */
202 203 204
            int root;   /* root rank */
        } barrier;
        struct {
205
            uint64_t file_id;      /* integer identifier for the file */
206 207 208
            int create_flag;  /* file must be created, not just opened */
        } open;
        struct {
209
            uint64_t file_id;  /* file to operate on */
210 211 212 213
            off_t offset; /* offset and size */
            size_t size;
        } write;
        struct {
214
            uint64_t file_id;  /* file to operate on */
215 216 217 218
            off_t offset; /* offset and size */
            size_t size;
        } read;
        struct {
219
            uint64_t file_id;  /* file to operate on */
220
        } close;
221 222 223 224
        struct {
            /* TODO: not sure why source rank is here */
            int source_rank;/* source rank of MPI send message */
            int dest_rank; /* dest rank of MPI send message */
225
            int64_t num_bytes; /* number of bytes to be transferred over the network */
226
            int16_t data_type; /* MPI data type to be matched with the recv */
227 228
            int count; /* number of elements to be received */
            int tag; /* tag of the message */
229
            unsigned int req_id;
230 231 232 233 234
        } send;
        struct {
            /* TODO: not sure why source rank is here */
            int source_rank;/* source rank of MPI recv message */
            int dest_rank;/* dest rank of MPI recv message */
235
            int64_t num_bytes; /* number of bytes to be transferred over the network */
236
            int16_t data_type; /* MPI data type to be matched with the send */
237 238
            int count; /* number of elements to be sent */
            int tag; /* tag of the message */
239
            unsigned int req_id;
240 241 242 243 244
        } recv;
        /* TODO: non-stub for other collectives */
        struct {
            int num_bytes;
        } collective;
245 246
        struct {
            int count;
247
            uint32_t* req_ids;
248 249
        } waits;
        struct {
250
            uint32_t req_id;
251
        } wait;
252 253
        struct
        {
254
            uint32_t req_id;
255 256
        }
        free;
257
    }u;
258 259
};

260 261 262 263 264 265 266
// helper macro for implementations - call this if multi-app support not
// available
#define APP_ID_UNSUPPORTED(id, name) \
    if (id != 0) \
        tw_error(TW_LOC,\
                "APP IDs not supported for %s generator, 0 required", name);

267 268 269 270 271 272 273 274 275
/* read workload configuration from a CODES configuration file and return the
 * workload name and parameters, which can then be passed to
 * codes_workload_load */
typedef struct
{
    char const * type;
    void * params;
} codes_workload_config_return;

276 277 278
// NOTE: some workloads (iolang, checkpoint) require information about the
// total number of ranks to correctly process traces/config files, etc. Other
// workload generators (darshan) ignore it
279 280
codes_workload_config_return codes_workload_read_config(
        ConfigHandle * handle,
281 282 283
        char const * section_name,
        char const * annotation,
        int num_ranks);
284 285 286

void codes_workload_free_config_return(codes_workload_config_return *c);

287 288
/* load and initialize workload of of type "type" with parameters specified by
 * "params".  The rank is the caller's relative rank within the collection
289 290 291
 * of processes that will participate in this workload. The app_id is the
 * "application" that the rank is participating in, used to differentiate
 * between multiple, concurrent workloads
292 293 294 295
 *
 * This function is intended to be called by a compute node LP in a model
 * and may be called multiple times over the course of a
 * simulation in order to execute different application workloads.
296
 *
297 298 299
 * Returns and identifier that can be used to retrieve operations later.
 * Returns -1 on failure.
 */
300 301 302 303 304
int codes_workload_load(
        const char* type,
        const char* params,
        int app_id,
        int rank);
305 306 307 308 309

/* Retrieves the next I/O operation to execute.  the wkld_id is the
 * identifier returned by the init() function.  The op argument is a pointer
 * to a structure to be filled in with I/O operation information.
 */
310 311 312 313 314
void codes_workload_get_next(
        int wkld_id,
        int app_id,
        int rank,
        struct codes_workload_op *op);
315 316

/* Reverse of the above function. */
317 318 319 320 321
void codes_workload_get_next_rc(
        int wkld_id,
        int app_id,
        int rank,
        const struct codes_workload_op *op);
322

323 324 325 326 327 328
/* Another version of reverse handler. */
void codes_workload_get_next_rc2(
                int wkld_id,
                int app_id,
                int rank);

329
/* Retrieve the number of ranks contained in a workload */
330 331 332 333
int codes_workload_get_rank_cnt(
        const char* type,
        const char* params,
        int app_id);
334

335 336 337 338 339 340 341
/* Finalize the workload */
int codes_workload_finalize(
        const char* type,
        const char* params,
        int app_id, 
        int rank);

342
/* for debugging/logging: print an individual operation to the specified file */
343 344 345 346 347
void codes_workload_print_op(
        FILE *f,
        struct codes_workload_op *op,
        int app_id,
        int rank);
348

349 350 351 352 353
int codes_workload_get_time(const char *type,
		const char * params,
		int app_id,
		int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes);

354 355 356 357 358 359 360 361 362
/* implementation structure */
struct codes_workload_method
{
    char *method_name; /* name of the generator */
    void * (*codes_workload_read_config) (
            ConfigHandle *handle, char const * section_name,
            char const * annotation, int num_ranks);
    int (*codes_workload_load)(const char* params, int app_id, int rank);
    void (*codes_workload_get_next)(int app_id, int rank, struct codes_workload_op *op);
363
    void (*codes_workload_get_next_rc2)(int app_id, int rank);
364
    int (*codes_workload_get_rank_cnt)(const char* params, int app_id);
365
    int (*codes_workload_finalize)(const char* params, int app_id, int rank);
366 367
    /* added for get all read or write time */
    int (*codes_workload_get_time)(const char * params, int app_id, int rank, double *read_time, double *write_time, int64_t *read_bytes, int64_t *written_bytes);
368 369
};

370

371 372 373 374
/* dynamically add to the workload implementation table. Must be done BEFORE
 * calls to codes_workload_read_config or codes_workload_load */
void codes_workload_add_method(struct codes_workload_method const * method);

375 376 377 378 379 380
/* NOTE: there is deliberately no finalize function; we don't have any
 * reliable way to tell when a workload is truly done and will not
 * participate in further reverse computation.   The underlying generators
 * will shut down automatically once they have issued their last event.
 */

Jonathan Jenkins's avatar
Jonathan Jenkins committed
381 382 383 384
#ifdef __cplusplus
}
#endif

385 386 387 388 389 390
#endif /* CODES_WORKLOAD_H */

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
391
 *  indent-tabs-mode: nil
392 393 394 395
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */