codes-workload.h 7.62 KB
Newer Older
1 2 3 4 5 6
/*
 * Copyright (C) 2013 University of Chicago.
 * See COPYRIGHT notice in top-level directory.
 *
 */

7
/* I/O workload generator API to be used for reading I/O operations into
8 9 10
 * storage system simulations.  This API just describes the operations to be
 * executed; it does not service the operations.
 */
11 12 13 14 15

#ifndef CODES_WORKLOAD_H
#define CODES_WORKLOAD_H

#include "ross.h"
16
#define MAX_NAME_LENGTH_WKLD 512
17

18
typedef struct bgp_params bgp_params;
19
typedef struct darshan_params darshan_params;
20
typedef struct recorder_params recorder_params;
21
typedef struct codes_workload_info codes_workload_info;
22

23 24 25 26
/* struct to hold the actual data from a single MPI event*/
typedef struct scala_trace_params scala_trace_params;
typedef struct dumpi_trace_params dumpi_trace_params;

27 28
struct bgp_params
{
29
    /* We have the number of ranks passed in from the bg/p model because
30 31 32
     * the I/O lang workloads have no information about the number of ranks.
     * Only the bg/p config file knows the number of ranks. */
    int num_cns;
33 34
    /* flag - use path to find kernel files relative to the metafile */
    int use_relpath;
35 36 37 38
    char io_kernel_meta_path[MAX_NAME_LENGTH_WKLD];
    char bgp_config_file[MAX_NAME_LENGTH_WKLD];
    char io_kernel_path[MAX_NAME_LENGTH_WKLD];
    char io_kernel_def_path[MAX_NAME_LENGTH_WKLD];
39 40
};

41 42 43
struct darshan_params
{
    char log_file_path[MAX_NAME_LENGTH_WKLD];
44
    int64_t aggregator_cnt;
45 46
};

47 48 49
struct recorder_params
{
    char trace_dir_path[MAX_NAME_LENGTH_WKLD];
50
    int64_t nprocs;
51 52 53
};


54 55 56 57 58
struct codes_workload_info
{
    int group_id; /* group id */
    int min_rank; /* minimum rank in the collective operation */
    int max_rank; /* maximum rank in the collective operation */
59
    int local_rank; /* local rank? never being used in the bg/p model */
60 61 62
    int num_lrank; /* number of ranks participating in the collective operation*/
};

63 64 65 66 67 68 69 70 71 72 73
struct scala_trace_params {
   char offset_file_name[MAX_NAME_LENGTH_WKLD];
   char nw_wrkld_file_name[MAX_NAME_LENGTH_WKLD];
};

struct dumpi_trace_params {
   int num_net_traces;
   char file_name[MAX_NAME_LENGTH_WKLD];
};


74 75 76 77
/* supported I/O operations */
enum codes_workload_op_type
{
    /* terminator; there are no more operations for this rank */
78
    CODES_WK_END = 1,
79 80 81 82
    /* sleep/delay to simulate computation or other activity */
    CODES_WK_DELAY,
    /* block until specified ranks have reached the same point */
    CODES_WK_BARRIER,
83 84

    /* IO operations */
85 86
    /* open */
    CODES_WK_OPEN,
87
    /* close */
88 89 90 91
    CODES_WK_CLOSE,
    /* write */
    CODES_WK_WRITE,
    /* read */
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
    CODES_WK_READ,

    /* network operations (modelled after MPI operations) */
    /* blocking send operation */
    CODES_WK_SEND,
    /* blocking recv operation */
    CODES_WK_RECV,
    /* non-blocking send operation */
    CODES_WK_ISEND,
    /* non-blocking receive operation */
    CODES_WK_IRECV,
    /* broadcast operation */
    CODES_WK_BCAST,
    /* Allgather operation */
    CODES_WK_ALLGATHER,
    /* Allgatherv operation */
    CODES_WK_ALLGATHERV,
    /* Alltoall operation */
    CODES_WK_ALLTOALL,
    /* Alltoallv operation */
    CODES_WK_ALLTOALLV,
    /* Reduce operation */
    CODES_WK_REDUCE,
    /* Allreduce operation */
    CODES_WK_ALLREDUCE,
    /* Generic collective operation */
    CODES_WK_COL,
119 120 121 122 123 124 125 126 127 128
    /* Waitall operation */
    CODES_NW_WAITALL,
    /* Wait operation */
    CODES_NW_WAIT,
    /* Waitsome operation */
    CODES_NW_WAITSOME,
    /* Waitany operation */
    CODES_NW_WAITANY,
    /* Testall operation */
    CODES_NW_TESTALL,
129 130 131 132 133 134 135 136 137
};

/* I/O operation paramaters */
struct codes_workload_op
{
    /* TODO: do we need different "classes" of operations to differentiate
     * between different APIs?
     */

138
    /* what type of operation this is */
139
    enum codes_workload_op_type op_type;
140 141 142
    /* currently only used by network workloads */
    double start_time;
    double end_time;
143
    double sim_start_time;
144

145
    /* parameters for each operation type */
146 147 148 149
    union
    {
        struct {
            double seconds;
150
	    double nsecs;
151 152
        } delay;
        struct {
Philip Carns's avatar
Philip Carns committed
153
            int count;  /* num ranks in barrier, -1 means "all" */
154 155 156
            int root;   /* root rank */
        } barrier;
        struct {
157
            uint64_t file_id;      /* integer identifier for the file */
158 159 160
            int create_flag;  /* file must be created, not just opened */
        } open;
        struct {
161
            uint64_t file_id;  /* file to operate on */
162 163 164 165
            off_t offset; /* offset and size */
            size_t size;
        } write;
        struct {
166
            uint64_t file_id;  /* file to operate on */
167 168 169 170
            off_t offset; /* offset and size */
            size_t size;
        } read;
        struct {
171
            uint64_t file_id;  /* file to operate on */
172
        } close;
173 174 175 176 177 178 179 180
        struct {
            /* TODO: not sure why source rank is here */
            int source_rank;/* source rank of MPI send message */
            int dest_rank; /* dest rank of MPI send message */
            int num_bytes; /* number of bytes to be transferred over the network */
            int data_type; /* MPI data type to be matched with the recv */
            int count; /* number of elements to be received */
            int tag; /* tag of the message */
181
            int16_t req_id;
182 183 184 185 186 187 188 189 190
        } send;
        struct {
            /* TODO: not sure why source rank is here */
            int source_rank;/* source rank of MPI recv message */
            int dest_rank;/* dest rank of MPI recv message */
            int num_bytes; /* number of bytes to be transferred over the network */
            int data_type; /* MPI data type to be matched with the send */
            int count; /* number of elements to be sent */
            int tag; /* tag of the message */
191
            int16_t req_id;
192 193 194 195 196
        } recv;
        /* TODO: non-stub for other collectives */
        struct {
            int num_bytes;
        } collective;
197 198 199 200 201 202 203
	struct {
	    int count;
	    int16_t* req_ids;
	} waits;
	struct {
	   int16_t req_id;
	} wait;
204
    }u;
205 206 207 208
};

/* load and initialize workload of of type "type" with parameters specified by
 * "params".  The rank is the caller's relative rank within the collection
209
 * of processes that will participate in this workload.
210 211 212 213
 *
 * This function is intended to be called by a compute node LP in a model
 * and may be called multiple times over the course of a
 * simulation in order to execute different application workloads.
214
 *
215 216 217 218 219 220 221 222 223
 * Returns and identifier that can be used to retrieve operations later.
 * Returns -1 on failure.
 */
int codes_workload_load(const char* type, const char* params, int rank);

/* Retrieves the next I/O operation to execute.  the wkld_id is the
 * identifier returned by the init() function.  The op argument is a pointer
 * to a structure to be filled in with I/O operation information.
 */
224
void codes_workload_get_next(int wkld_id, int rank, struct codes_workload_op *op);
225 226

/* Reverse of the above function. */
227
void codes_workload_get_next_rc(int wkld_id, int rank, const struct codes_workload_op *op);
228

229 230 231
/* Retrieve the number of ranks contained in a workload */
int codes_workload_get_rank_cnt(const char* type, const char* params);

232 233 234
/* for debugging/logging: print an individual operation to the specified file */
void codes_workload_print_op(FILE *f, struct codes_workload_op *op, int rank);

235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
/* NOTE: there is deliberately no finalize function; we don't have any
 * reliable way to tell when a workload is truly done and will not
 * participate in further reverse computation.   The underlying generators
 * will shut down automatically once they have issued their last event.
 */

#endif /* CODES_WORKLOAD_H */

/*
 * Local variables:
 *  c-indent-level: 4
 *  c-basic-offset: 4
 * End:
 *
 * vim: ft=c ts=8 sts=4 sw=4 expandtab
 */