mpidrma.h 8.87 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6 7 8
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */
#if !defined(MPICH_MPIDRMA_H_INCLUDED)
#define MPICH_MPIDRMA_H_INCLUDED

9 10
#include "mpl_utlist.h"

11
typedef enum MPIDI_RMA_Op_type {
12 13 14
    MPIDI_RMA_PUT               = 23,
    MPIDI_RMA_GET               = 24,
    MPIDI_RMA_ACCUMULATE        = 25,
15
 /* REMOVED: MPIDI_RMA_LOCK     = 26, */
16
    MPIDI_RMA_ACC_CONTIG        = 27,
17
    MPIDI_RMA_GET_ACCUMULATE    = 28,
18 19
    MPIDI_RMA_COMPARE_AND_SWAP  = 29,
    MPIDI_RMA_FETCH_AND_OP      = 30
20 21 22 23
} MPIDI_RMA_Op_type_t;

/* Special case RMA operations */

24
enum MPIDI_RMA_Datatype {
25 26 27 28
    MPIDI_RMA_DATATYPE_BASIC    = 50,
    MPIDI_RMA_DATATYPE_DERIVED  = 51
};

29
enum MPID_Lock_state {
30 31 32 33
    MPID_LOCK_NONE              = 0,
    MPID_LOCK_SHARED_ALL        = 1
};

34 35 36 37
/*
 * RMA Declarations.  We should move these into something separate from
 * a Request.
 */
38

39 40 41
/* to send derived datatype across in RMA ops */
typedef struct MPIDI_RMA_dtype_info { /* for derived datatypes */
    int           is_contig; 
42
    int           max_contig_blocks;
43 44
    int           size;     
    MPI_Aint      extent;   
45 46 47
    int           dataloop_size; /* not needed because this info is sent in 
				    packet header. remove it after lock/unlock 
				    is implemented in the device */
48 49 50 51 52 53 54 55 56
    void          *dataloop;  /* pointer needed to update pointers
                                 within dataloop on remote side */
    int           dataloop_depth; 
    int           eltype;
    MPI_Aint ub, lb, true_ub, true_lb;
    int has_sticky_ub, has_sticky_lb;
} MPIDI_RMA_dtype_info;

/* for keeping track of RMA ops, which will be executed at the next sync call */
57 58 59
typedef struct MPIDI_RMA_Op {
    struct MPIDI_RMA_Op *prev;  /* pointer to next element in list */
    struct MPIDI_RMA_Op *next;  /* pointer to next element in list */
60 61 62 63
    /* FIXME: It would be better to setup the packet that will be sent, at 
       least in most cases (if, as a result of the sync/ops/sync sequence,
       a different packet type is needed, it can be extracted from the 
       information otherwise stored). */
64
    MPIDI_RMA_Op_type_t type;
65 66 67 68 69 70 71 72
    void *origin_addr;
    int origin_count;
    MPI_Datatype origin_datatype;
    int target_rank;
    MPI_Aint target_disp;
    int target_count;
    MPI_Datatype target_datatype;
    MPI_Op op;  /* for accumulate */
73 74 75 76
    /* Used to complete operations */
    struct MPID_Request *request;
    MPIDI_RMA_dtype_info dtype_info;
    void *dataloop;
77 78 79 80 81 82
    void *result_addr;
    int result_count;
    MPI_Datatype result_datatype;
    void *compare_addr;
    int compare_count;
    MPI_Datatype compare_datatype;
83
} MPIDI_RMA_Op_t;
84 85 86 87 88 89 90 91 92 93

typedef struct MPIDI_PT_single_op {
    int type;  /* put, get, or accum. */
    void *addr;
    int count;
    MPI_Datatype datatype;
    MPI_Op op;
    void *data;  /* for queued puts and accumulates, data is copied here */
    MPI_Request request_handle;  /* for gets */
    int data_recd;  /* to indicate if the data has been received */
94
    MPIDI_CH3_Pkt_flags_t flags;
95 96 97 98 99 100 101
} MPIDI_PT_single_op;

typedef struct MPIDI_Win_lock_queue {
    struct MPIDI_Win_lock_queue *next;
    int lock_type;
    MPI_Win source_win_handle;
    MPIDI_VC_t * vc;
102 103
    struct MPIDI_PT_single_op *pt_single_op;  /* to store info for 
						 lock-put-unlock optimization */
104
} MPIDI_Win_lock_queue;
105 106 107 108

/* Routine use to tune RMA optimizations */
void MPIDI_CH3_RMA_SetAccImmed( int flag );

109 110
/*** RMA OPS LIST HELPER ROUTINES ***/

111
typedef MPIDI_RMA_Op_t * MPIDI_RMA_Ops_list_t;
112

113 114 115
/* Return nonzero if the RMA operations list is empty.
 */
#undef FUNCNAME
116
#define FUNCNAME MPIDI_CH3I_RMA_Ops_isempty
117 118
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
119
static inline int MPIDI_CH3I_RMA_Ops_isempty(MPIDI_RMA_Ops_list_t *list)
120
{
121 122 123 124 125 126 127 128 129 130
    return *list == NULL;
}


/* Return a pointer to the first element in the list.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_head
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
131
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_head(MPIDI_RMA_Ops_list_t *list)
132 133 134 135 136 137 138 139 140 141 142
{
    return *list;
}


/* Return a pointer to the last element in the list.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_tail
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
143
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_tail(MPIDI_RMA_Ops_list_t *list)
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
{
    return (*list) ? (*list)->prev : NULL;
}


/* Append an element to the tail of the RMA ops list
 *
 * @param IN    list      Pointer to the RMA ops list
 * @param IN    elem      Pointer to the element to be appended
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_append
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_append(MPIDI_RMA_Ops_list_t *list,
159
                                             MPIDI_RMA_Op_t *elem)
160 161
{
    MPL_DL_APPEND(*list, elem);
162 163 164 165 166
}


/* Allocate a new element on the tail of the RMA operations list.
 *
167 168
 * @param IN    list      Pointer to the RMA ops list
 * @param OUT   new_ptr   Pointer to the element that was allocated
169 170 171
 * @return                MPI error class
 */
#undef FUNCNAME
172
#define FUNCNAME MPIDI_CH3I_RMA_Ops_alloc_tail
173 174
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
175
static inline int MPIDI_CH3I_RMA_Ops_alloc_tail(MPIDI_RMA_Ops_list_t *list,
176
                                                MPIDI_RMA_Op_t **new_elem)
177 178
{
    int mpi_errno = MPI_SUCCESS;
179
    MPIDI_RMA_Op_t *tmp_ptr;
180 181 182
    MPIU_CHKPMEM_DECL(1);

    /* FIXME: We should use a pool allocator here */
183
    MPIU_CHKPMEM_MALLOC(tmp_ptr, MPIDI_RMA_Op_t *, sizeof(MPIDI_RMA_Op_t),
184 185 186 187 188
                        mpi_errno, "RMA operation entry");

    tmp_ptr->next = NULL;
    tmp_ptr->dataloop = NULL;

189
    MPL_DL_APPEND(*list, tmp_ptr);
190

191
    *new_elem = tmp_ptr;
192 193 194 195 196 197

 fn_exit:
    MPIU_CHKPMEM_COMMIT();
    return mpi_errno;
 fn_fail:
    MPIU_CHKPMEM_REAP();
198
    *new_elem = NULL;
199 200 201 202
    goto fn_exit;
}


203
/* Unlink an element from the RMA ops list
204
 *
205 206
 * @param IN    list      Pointer to the RMA ops list
 * @param IN    elem      Pointer to the element to be unlinked
207 208
 */
#undef FUNCNAME
209
#define FUNCNAME MPIDI_CH3I_RMA_Ops_unlink
210 211
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
212
static inline void MPIDI_CH3I_RMA_Ops_unlink(MPIDI_RMA_Ops_list_t *list,
213
                                             MPIDI_RMA_Op_t *elem)
214
{
215 216
    MPL_DL_DELETE(*list, elem);
}
217 218


219 220 221 222 223 224 225 226 227 228
/* Free an element in the RMA operations list.
 *
 * @param IN    list      Pointer to the RMA ops list
 * @param IN    curr_ptr  Pointer to the element to be freed.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free_elem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free_elem(MPIDI_RMA_Ops_list_t *list,
229
                                                MPIDI_RMA_Op_t *curr_ptr)
230
{
231
    MPIDI_RMA_Op_t *tmp_ptr = curr_ptr;
232

233
    MPIU_Assert(curr_ptr != NULL);
234

235
    MPL_DL_DELETE(*list, curr_ptr);
236 237 238 239 240 241 242

    /* Check if we allocated a dataloop for this op (see send/recv_rma_msg) */
    if (tmp_ptr->dataloop != NULL)
        MPIU_Free(tmp_ptr->dataloop);
    MPIU_Free( tmp_ptr );
}

243

244 245 246 247 248 249
/* Free an element in the RMA operations list.
 *
 * @param IN    list      Pointer to the RMA ops list
 * @param INOUT curr_ptr  Pointer to the element to be freed.  Will be updated
 *                        to point to the element following the element that
 *                        was freed.
250 251
 */
#undef FUNCNAME
252
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free_and_next
253 254
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
255
static inline void MPIDI_CH3I_RMA_Ops_free_and_next(MPIDI_RMA_Ops_list_t *list,
256
                                                    MPIDI_RMA_Op_t **curr_ptr)
257
{
258
    MPIDI_RMA_Op_t *next_ptr = (*curr_ptr)->next;
259

260 261 262
    MPIDI_CH3I_RMA_Ops_free_elem(list, *curr_ptr);
    *curr_ptr = next_ptr;
}
263 264


265 266 267 268 269 270 271 272
/* Free the entire RMA operations list.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free(MPIDI_RMA_Ops_list_t *list)
{
273
    MPIDI_RMA_Op_t *curr_ptr, *tmp_ptr;
274

275 276 277
    MPL_DL_FOREACH_SAFE(*list, curr_ptr, tmp_ptr) {
        MPIDI_CH3I_RMA_Ops_free_elem(list, curr_ptr);
    }
278 279 280
}


281 282 283 284
/* Retrieve the RMA ops list pointer from the window.  This routine detects
 * whether we are in an active or passive target epoch and returns the correct
 * ops list; we use a shared list for active target and separate per-target
 * lists for passive target.
285 286
 */
#undef FUNCNAME
287
#define FUNCNAME MPIDI_CH3I_RMA_Get_ops_list
288 289
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
290 291
static inline MPIDI_RMA_Ops_list_t *MPIDI_CH3I_RMA_Get_ops_list(MPID_Win *win_ptr,
                                                                int target)
292
{
293
    if (win_ptr->epoch_state == MPIDI_EPOCH_FENCE ||
294
        win_ptr->epoch_state == MPIDI_EPOCH_START ||
295
        win_ptr->epoch_state == MPIDI_EPOCH_PSCW)
296 297 298 299 300
    {
        return &win_ptr->at_rma_ops_list;
    }
    else {
        return &win_ptr->targets[target].rma_ops_list;
301 302 303
    }
}

304 305 306
#undef FUNCNAME
#undef FCNAME

307
#endif