mpidrma.h 10.7 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6 7 8
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */
#if !defined(MPICH_MPIDRMA_H_INCLUDED)
#define MPICH_MPIDRMA_H_INCLUDED

9 10
#include "mpl_utlist.h"

11 12 13 14 15 16
#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(wincreate_allgather);
MPIU_INSTR_DURATION_EXTERN_DECL(winfree_rs);
MPIU_INSTR_DURATION_EXTERN_DECL(winfree_complete);
#endif

17
typedef enum MPIDI_RMA_Op_type {
18 19 20
    MPIDI_RMA_PUT               = 23,
    MPIDI_RMA_GET               = 24,
    MPIDI_RMA_ACCUMULATE        = 25,
21
 /* REMOVED: MPIDI_RMA_LOCK     = 26, */
22
    MPIDI_RMA_ACC_CONTIG        = 27,
23
    MPIDI_RMA_GET_ACCUMULATE    = 28,
24 25
    MPIDI_RMA_COMPARE_AND_SWAP  = 29,
    MPIDI_RMA_FETCH_AND_OP      = 30
26 27 28 29
} MPIDI_RMA_Op_type_t;

/* Special case RMA operations */

30
enum MPIDI_RMA_Datatype {
31 32 33 34
    MPIDI_RMA_DATATYPE_BASIC    = 50,
    MPIDI_RMA_DATATYPE_DERIVED  = 51
};

35
enum MPID_Lock_state {
36 37 38 39
    MPID_LOCK_NONE              = 0,
    MPID_LOCK_SHARED_ALL        = 1
};

40 41 42 43
/*
 * RMA Declarations.  We should move these into something separate from
 * a Request.
 */
44

45 46 47
/* to send derived datatype across in RMA ops */
typedef struct MPIDI_RMA_dtype_info { /* for derived datatypes */
    int           is_contig; 
48
    int           max_contig_blocks;
49 50
    int           size;     
    MPI_Aint      extent;   
51 52 53
    int           dataloop_size; /* not needed because this info is sent in 
				    packet header. remove it after lock/unlock 
				    is implemented in the device */
54 55 56 57 58 59 60 61 62
    void          *dataloop;  /* pointer needed to update pointers
                                 within dataloop on remote side */
    int           dataloop_depth; 
    int           eltype;
    MPI_Aint ub, lb, true_ub, true_lb;
    int has_sticky_ub, has_sticky_lb;
} MPIDI_RMA_dtype_info;

/* for keeping track of RMA ops, which will be executed at the next sync call */
63 64 65
typedef struct MPIDI_RMA_Op {
    struct MPIDI_RMA_Op *prev;  /* pointer to next element in list */
    struct MPIDI_RMA_Op *next;  /* pointer to next element in list */
66 67 68 69
    /* FIXME: It would be better to setup the packet that will be sent, at 
       least in most cases (if, as a result of the sync/ops/sync sequence,
       a different packet type is needed, it can be extracted from the 
       information otherwise stored). */
70
    MPIDI_RMA_Op_type_t type;
71 72 73 74 75 76 77 78
    void *origin_addr;
    int origin_count;
    MPI_Datatype origin_datatype;
    int target_rank;
    MPI_Aint target_disp;
    int target_count;
    MPI_Datatype target_datatype;
    MPI_Op op;  /* for accumulate */
79 80 81 82
    /* Used to complete operations */
    struct MPID_Request *request;
    MPIDI_RMA_dtype_info dtype_info;
    void *dataloop;
83 84 85 86 87 88
    void *result_addr;
    int result_count;
    MPI_Datatype result_datatype;
    void *compare_addr;
    int compare_count;
    MPI_Datatype compare_datatype;
89
} MPIDI_RMA_Op_t;
90 91 92 93 94 95 96 97 98 99

typedef struct MPIDI_PT_single_op {
    int type;  /* put, get, or accum. */
    void *addr;
    int count;
    MPI_Datatype datatype;
    MPI_Op op;
    void *data;  /* for queued puts and accumulates, data is copied here */
    MPI_Request request_handle;  /* for gets */
    int data_recd;  /* to indicate if the data has been received */
100
    MPIDI_CH3_Pkt_flags_t flags;
101 102 103 104 105 106 107
} MPIDI_PT_single_op;

typedef struct MPIDI_Win_lock_queue {
    struct MPIDI_Win_lock_queue *next;
    int lock_type;
    MPI_Win source_win_handle;
    MPIDI_VC_t * vc;
108 109
    struct MPIDI_PT_single_op *pt_single_op;  /* to store info for 
						 lock-put-unlock optimization */
110
} MPIDI_Win_lock_queue;
111 112 113 114

/* Routine use to tune RMA optimizations */
void MPIDI_CH3_RMA_SetAccImmed( int flag );

115 116
/*** RMA OPS LIST HELPER ROUTINES ***/

117
typedef MPIDI_RMA_Op_t * MPIDI_RMA_Ops_list_t;
118

119 120 121
/* Return nonzero if the RMA operations list is empty.
 */
#undef FUNCNAME
122
#define FUNCNAME MPIDI_CH3I_RMA_Ops_isempty
123 124
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
125
static inline int MPIDI_CH3I_RMA_Ops_isempty(MPIDI_RMA_Ops_list_t *list)
126
{
127 128 129 130 131 132 133 134 135 136
    return *list == NULL;
}


/* Return a pointer to the first element in the list.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_head
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
137
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_head(MPIDI_RMA_Ops_list_t *list)
138 139 140 141 142 143 144 145 146 147 148
{
    return *list;
}


/* Return a pointer to the last element in the list.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_tail
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
149
static inline MPIDI_RMA_Op_t *MPIDI_CH3I_RMA_Ops_tail(MPIDI_RMA_Ops_list_t *list)
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
{
    return (*list) ? (*list)->prev : NULL;
}


/* Append an element to the tail of the RMA ops list
 *
 * @param IN    list      Pointer to the RMA ops list
 * @param IN    elem      Pointer to the element to be appended
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_append
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_append(MPIDI_RMA_Ops_list_t *list,
165
                                             MPIDI_RMA_Op_t *elem)
166 167
{
    MPL_DL_APPEND(*list, elem);
168 169 170 171 172
}


/* Allocate a new element on the tail of the RMA operations list.
 *
173 174
 * @param IN    list      Pointer to the RMA ops list
 * @param OUT   new_ptr   Pointer to the element that was allocated
175 176 177
 * @return                MPI error class
 */
#undef FUNCNAME
178
#define FUNCNAME MPIDI_CH3I_RMA_Ops_alloc_tail
179 180
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
181
static inline int MPIDI_CH3I_RMA_Ops_alloc_tail(MPIDI_RMA_Ops_list_t *list,
182
                                                MPIDI_RMA_Op_t **new_elem)
183 184
{
    int mpi_errno = MPI_SUCCESS;
185
    MPIDI_RMA_Op_t *tmp_ptr;
186 187 188
    MPIU_CHKPMEM_DECL(1);

    /* FIXME: We should use a pool allocator here */
189
    MPIU_CHKPMEM_MALLOC(tmp_ptr, MPIDI_RMA_Op_t *, sizeof(MPIDI_RMA_Op_t),
190 191 192 193 194
                        mpi_errno, "RMA operation entry");

    tmp_ptr->next = NULL;
    tmp_ptr->dataloop = NULL;

195
    MPL_DL_APPEND(*list, tmp_ptr);
196

197
    *new_elem = tmp_ptr;
198 199 200 201 202 203

 fn_exit:
    MPIU_CHKPMEM_COMMIT();
    return mpi_errno;
 fn_fail:
    MPIU_CHKPMEM_REAP();
204
    *new_elem = NULL;
205 206 207 208
    goto fn_exit;
}


209
/* Unlink an element from the RMA ops list
210
 *
211 212
 * @param IN    list      Pointer to the RMA ops list
 * @param IN    elem      Pointer to the element to be unlinked
213 214
 */
#undef FUNCNAME
215
#define FUNCNAME MPIDI_CH3I_RMA_Ops_unlink
216 217
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
218
static inline void MPIDI_CH3I_RMA_Ops_unlink(MPIDI_RMA_Ops_list_t *list,
219
                                             MPIDI_RMA_Op_t *elem)
220
{
221 222
    MPL_DL_DELETE(*list, elem);
}
223 224


225 226 227 228 229 230 231 232 233 234
/* Free an element in the RMA operations list.
 *
 * @param IN    list      Pointer to the RMA ops list
 * @param IN    curr_ptr  Pointer to the element to be freed.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free_elem
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free_elem(MPIDI_RMA_Ops_list_t *list,
235
                                                MPIDI_RMA_Op_t *curr_ptr)
236
{
237
    MPIDI_RMA_Op_t *tmp_ptr = curr_ptr;
238

239
    MPIU_Assert(curr_ptr != NULL);
240

241
    MPL_DL_DELETE(*list, curr_ptr);
242 243 244 245 246 247 248

    /* Check if we allocated a dataloop for this op (see send/recv_rma_msg) */
    if (tmp_ptr->dataloop != NULL)
        MPIU_Free(tmp_ptr->dataloop);
    MPIU_Free( tmp_ptr );
}

249

250 251 252 253 254 255
/* Free an element in the RMA operations list.
 *
 * @param IN    list      Pointer to the RMA ops list
 * @param INOUT curr_ptr  Pointer to the element to be freed.  Will be updated
 *                        to point to the element following the element that
 *                        was freed.
256 257
 */
#undef FUNCNAME
258
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free_and_next
259 260
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
261
static inline void MPIDI_CH3I_RMA_Ops_free_and_next(MPIDI_RMA_Ops_list_t *list,
262
                                                    MPIDI_RMA_Op_t **curr_ptr)
263
{
264
    MPIDI_RMA_Op_t *next_ptr = (*curr_ptr)->next;
265

266 267 268
    MPIDI_CH3I_RMA_Ops_free_elem(list, *curr_ptr);
    *curr_ptr = next_ptr;
}
269 270


271 272 273 274 275 276 277 278
/* Free the entire RMA operations list.
 */
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Ops_free
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline void MPIDI_CH3I_RMA_Ops_free(MPIDI_RMA_Ops_list_t *list)
{
279
    MPIDI_RMA_Op_t *curr_ptr, *tmp_ptr;
280

281 282 283
    MPL_DL_FOREACH_SAFE(*list, curr_ptr, tmp_ptr) {
        MPIDI_CH3I_RMA_Ops_free_elem(list, curr_ptr);
    }
284 285 286
}


287 288 289 290
/* Retrieve the RMA ops list pointer from the window.  This routine detects
 * whether we are in an active or passive target epoch and returns the correct
 * ops list; we use a shared list for active target and separate per-target
 * lists for passive target.
291 292
 */
#undef FUNCNAME
293
#define FUNCNAME MPIDI_CH3I_RMA_Get_ops_list
294 295
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
296 297
static inline MPIDI_RMA_Ops_list_t *MPIDI_CH3I_RMA_Get_ops_list(MPID_Win *win_ptr,
                                                                int target)
298
{
299
    if (win_ptr->epoch_state == MPIDI_EPOCH_FENCE ||
300
        win_ptr->epoch_state == MPIDI_EPOCH_START ||
301
        win_ptr->epoch_state == MPIDI_EPOCH_PSCW)
302 303 304 305 306
    {
        return &win_ptr->at_rma_ops_list;
    }
    else {
        return &win_ptr->targets[target].rma_ops_list;
307 308 309
    }
}

310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362

#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_Wait_for_pt_ops_finish
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_Wait_for_pt_ops_finish(MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS, total_pt_rma_puts_accs;
    MPID_Comm *comm_ptr;
    int errflag = FALSE;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_WAIT_FOR_PT_OPS_FINISH);

    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WAIT_FOR_PT_OPS_FINISH);

    comm_ptr = win_ptr->comm_ptr;
    MPIU_INSTR_DURATION_START(winfree_rs);
    mpi_errno = MPIR_Reduce_scatter_block_impl(win_ptr->pt_rma_puts_accs,
                                               &total_pt_rma_puts_accs, 1,
                                               MPI_INT, MPI_SUM, comm_ptr, &errflag);
    if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
    MPIU_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
    MPIU_INSTR_DURATION_END(winfree_rs);

    if (total_pt_rma_puts_accs != win_ptr->my_pt_rma_puts_accs)
    {
	MPID_Progress_state progress_state;

	/* poke the progress engine until the two are equal */
	MPIU_INSTR_DURATION_START(winfree_complete);
	MPID_Progress_start(&progress_state);
	while (total_pt_rma_puts_accs != win_ptr->my_pt_rma_puts_accs)
	{
	    mpi_errno = MPID_Progress_wait(&progress_state);
	    /* --BEGIN ERROR HANDLING-- */
	    if (mpi_errno != MPI_SUCCESS)
	    {
		MPID_Progress_end(&progress_state);
		MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**winnoprogress");
	    }
	    /* --END ERROR HANDLING-- */
	}
	MPID_Progress_end(&progress_state);
	MPIU_INSTR_DURATION_END(winfree_complete);
    }

 fn_exit:
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_WAIT_FOR_PT_OPS_FINISH);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


363 364 365
#undef FUNCNAME
#undef FCNAME

366
#endif