mpidrma.h 13.8 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7 8
#if !defined(MPID_RMA_H_INCLUDED)
#define MPID_RMA_H_INCLUDED
9

10 11 12
#include "mpid_rma_types.h"
#include "mpid_rma_oplist.h"
#include "mpid_rma_shm.h"
13

14
int MPIDI_CH3I_Issue_rma_op(MPIDI_RMA_Op_t * op_ptr, MPID_Win * win_ptr,
15
                            MPIDI_CH3_Pkt_flags_t flags);
16

17
#undef FUNCNAME
18
#define FUNCNAME send_lock_msg
19 20
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
21
static inline int send_lock_msg(int dest, int lock_type, MPID_Win * win_ptr)
22
{
23 24 25 26 27 28 29
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
30

31
    MPIU_Assert(win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED);
32

33
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
34

35 36 37 38
    MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
    lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    lock_pkt->source_win_handle = win_ptr->handle;
    lock_pkt->lock_type = lock_type;
39
    lock_pkt->origin_rank = win_ptr->comm_ptr->rank;
40

41 42
    win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_REQUESTED;
    win_ptr->targets[dest].remote_lock_mode = lock_type;
43

44 45 46 47
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
48

49 50 51 52
    /* release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }
53

54
  fn_exit:
55
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
56
    return mpi_errno;
57
    /* --BEGIN ERROR HANDLING-- */
58
  fn_fail:
59
    goto fn_exit;
60
    /* --END ERROR HANDLING-- */
61 62
}

63
#undef FUNCNAME
64
#define FUNCNAME send_unlock_msg
65 66
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
67
static inline int send_unlock_msg(int dest, MPID_Win * win_ptr)
68 69
{
    int mpi_errno = MPI_SUCCESS;
70 71 72 73 74 75
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
76

77
    MPIU_Assert(win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_GRANTED);
78

79
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
80

81 82
    /* Send a lock packet over to the target. wait for the lock_granted
     * reply. Then do all the RMA ops. */
83

84 85
    MPIDI_Pkt_init(unlock_pkt, MPIDI_CH3_PKT_UNLOCK);
    unlock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
86

87 88
    /* Reset the local state of the target to unlocked */
    win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_NONE;
89

90 91 92 93
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
94

95 96 97
    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
98
    }
99

100
  fn_exit:
101
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
102 103
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
104
  fn_fail:
105 106 107 108
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

109

110
#undef FUNCNAME
111
#define FUNCNAME MPIDI_CH3I_Send_lock_granted_pkt
112 113
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
114
static inline int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr, MPI_Win source_win_handle)
115
{
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_granted_t *lock_granted_pkt = &upkt.lock_granted;
    MPID_Request *req = NULL;
    int mpi_errno;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    /* send lock granted packet */
    MPIDI_Pkt_init(lock_granted_pkt, MPIDI_CH3_PKT_LOCK_GRANTED);
    lock_granted_pkt->source_win_handle = source_win_handle;
    lock_granted_pkt->target_rank = win_ptr->comm_ptr->rank;

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                     (MPIU_DBG_FDEST, "sending lock granted pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_granted_pkt->source_win_handle));

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_granted_pkt, sizeof(*lock_granted_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
138 139
    }

140 141
    if (req != NULL) {
        MPID_Request_release(req);
142
    }
143

144
  fn_fail:
145 146 147
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    return mpi_errno;
148 149 150 151
}


#undef FUNCNAME
Xin Zhao's avatar
Xin Zhao committed
152
#define FUNCNAME MPIDI_CH3I_Send_flush_ack_pkt
153 154
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
Xin Zhao's avatar
Xin Zhao committed
155
static inline int MPIDI_CH3I_Send_flush_ack_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
156
                                    MPI_Win source_win_handle)
157
{
158
    MPIDI_CH3_Pkt_t upkt;
Xin Zhao's avatar
Xin Zhao committed
159
    MPIDI_CH3_Pkt_flush_ack_t *flush_ack_pkt = &upkt.flush_ack;
160 161
    MPID_Request *req;
    int mpi_errno=MPI_SUCCESS;
Xin Zhao's avatar
Xin Zhao committed
162
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
163

Xin Zhao's avatar
Xin Zhao committed
164
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
165

Xin Zhao's avatar
Xin Zhao committed
166 167 168
    MPIDI_Pkt_init(flush_ack_pkt, MPIDI_CH3_PKT_FLUSH_ACK);
    flush_ack_pkt->source_win_handle = source_win_handle;
    flush_ack_pkt->target_rank = win_ptr->comm_ptr->rank;
169

170 171
    /* Because this is in a packet handler, it is already within a critical section */	
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
Xin Zhao's avatar
Xin Zhao committed
172
    mpi_errno = MPIDI_CH3_iStartMsg(vc, flush_ack_pkt, sizeof(*flush_ack_pkt), &req);
173 174 175
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
176 177
    }

178 179 180
    if (req != NULL)
    {
        MPID_Request_release(req);
181 182
    }

183
 fn_fail:
Xin Zhao's avatar
Xin Zhao committed
184
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_FLUSH_ACK_PKT);
185 186
    return mpi_errno;
}
187

188

189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
#undef FUNCNAME
#define FUNCNAME send_decr_at_cnt_msg
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int send_decr_at_cnt_msg(int dst, MPID_Win * win_ptr)
{
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_decr_at_counter_t *decr_at_cnt_pkt = &upkt.decr_at_cnt;
    MPIDI_VC_t * vc;
    MPID_Request *request = NULL;
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_DECR_AT_CNT_MSG);

    MPIDI_Pkt_init(decr_at_cnt_pkt, MPIDI_CH3_PKT_DECR_AT_COUNTER);
    decr_at_cnt_pkt->target_win_handle = win_ptr->all_win_handles[dst];

    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dst, &vc);

    MPIU_THREAD_CS_ENTER(CH3COMM,vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, decr_at_cnt_pkt,
                                    sizeof(*decr_at_cnt_pkt), &request);
    MPIU_THREAD_CS_EXIT(CH3COMM,vc);
    if (mpi_errno != MPI_SUCCESS) {
        MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg" );
    }

    if (request != NULL) {
        MPID_Request_release(request);
    }

  fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_DECR_AT_CNT_MSG);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

229 230 231 232
#undef FUNCNAME
#define FUNCNAME acquire_local_lock
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
233
static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);

    /* poke the progress engine until the local lock is granted */
    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
        while (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
            mpi_errno = MPID_Progress_wait(&progress_state);
            /* --BEGIN ERROR HANDLING-- */
            if (mpi_errno != MPI_SUCCESS) {
                MPID_Progress_end(&progress_state);
                MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
250
            }
251
            /* --END ERROR HANDLING-- */
252
        }
253
        MPID_Progress_end(&progress_state);
254 255
    }

256 257 258
    win_ptr->targets[win_ptr->comm_ptr->rank].remote_lock_state = MPIDI_CH3_WIN_LOCK_GRANTED;
    win_ptr->targets[win_ptr->comm_ptr->rank].remote_lock_mode = lock_type;

259
  fn_exit:
260
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
261 262
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
263
  fn_fail:
264 265 266 267 268
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
#undef FUNCNAME
#define FUNCNAME MPIDI_CH3I_RMA_Handle_flush_ack
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
static inline int MPIDI_CH3I_RMA_Handle_flush_ack(MPID_Win * win_ptr, int target_rank)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_RMA_Target_t *t;

    mpi_errno = MPIDI_CH3I_Win_find_target(win_ptr, target_rank, &t);
    if (mpi_errno != MPI_SUCCESS) MPIU_ERR_POP(mpi_errno);

    if (t == NULL) {
        win_ptr->outstanding_unlocks--;
        MPIU_Assert(win_ptr->outstanding_unlocks >= 0);
    }
    else {
        t->sync.outstanding_acks--;
        MPIU_Assert(t->sync.outstanding_acks >= 0);
    }

 fn_exit:
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}


297
#undef FUNCNAME
298
#define FUNCNAME do_accumulate_op
299 300
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
301
static inline int do_accumulate_op(MPID_Request *rreq)
302 303
{
    int mpi_errno = MPI_SUCCESS;
304 305 306 307 308 309
    MPI_Aint true_lb, true_extent;
    MPI_User_function *uop;
    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);

    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);

Xin Zhao's avatar
Xin Zhao committed
310 311
    MPIU_Assert(rreq->dev.final_user_buf != NULL);

312 313 314
    if (rreq->dev.op == MPI_REPLACE)
    {
        /* simply copy the data */
Xin Zhao's avatar
Xin Zhao committed
315
        mpi_errno = MPIR_Localcopy(rreq->dev.final_user_buf, rreq->dev.user_count,
316 317 318 319
                                   rreq->dev.datatype,
                                   rreq->dev.real_user_buf,
                                   rreq->dev.user_count,
                                   rreq->dev.datatype);
320
        if (mpi_errno) {
321 322
	    MPIU_ERR_POP(mpi_errno);
	}
323 324 325
        goto fn_exit;
    }

326 327 328 329 330 331 332 333 334 335 336
    if (HANDLE_GET_KIND(rreq->dev.op) == HANDLE_KIND_BUILTIN)
    {
        /* get the function by indexing into the op table */
        uop = MPIR_OP_HDL_TO_FN(rreq->dev.op);
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opnotpredefined", "**opnotpredefined %d", rreq->dev.op );
        return mpi_errno;
	/* --END ERROR HANDLING-- */
337 338
    }

339 340
    if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
    {
Xin Zhao's avatar
Xin Zhao committed
341
        (*uop)(rreq->dev.final_user_buf, rreq->dev.real_user_buf,
342 343 344 345 346
               &(rreq->dev.user_count), &(rreq->dev.datatype));
    }
    else
    {
	/* derived datatype */
347 348 349
        MPID_Segment *segp;
        DLOOP_VECTOR *dloop_vec;
        MPI_Aint first, last;
350 351
        int vec_len, i, count;
        MPI_Aint type_size;
352
        MPI_Datatype type;
353 354 355 356 357 358 359 360 361
        MPID_Datatype *dtp;

        segp = MPID_Segment_alloc();
	/* --BEGIN ERROR HANDLING-- */
        if (!segp)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
362
        }
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
	/* --END ERROR HANDLING-- */
        MPID_Segment_init(NULL, rreq->dev.user_count,
			  rreq->dev.datatype, segp, 0);
        first = 0;
        last  = SEGMENT_IGNORE_LAST;

        MPID_Datatype_get_ptr(rreq->dev.datatype, dtp);
        vec_len = dtp->max_contig_blocks * rreq->dev.user_count + 1;
        /* +1 needed because Rob says so */
        dloop_vec = (DLOOP_VECTOR *)
            MPIU_Malloc(vec_len * sizeof(DLOOP_VECTOR));
	/* --BEGIN ERROR HANDLING-- */
        if (!dloop_vec)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
380
        }
381 382 383 384 385 386 387 388 389
	/* --END ERROR HANDLING-- */

        MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);

        type = dtp->eltype;
        MPID_Datatype_get_size_macro(type, type_size);
        for (i=0; i<vec_len; i++)
	{
            MPIU_Assign_trunc(count, (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size, int);
Xin Zhao's avatar
Xin Zhao committed
390
            (*uop)((char *)rreq->dev.final_user_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
391 392
                   (char *)rreq->dev.real_user_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   &count, &type);
393 394
        }

395 396
        MPID_Segment_free(segp);
        MPIU_Free(dloop_vec);
397 398
    }

399 400 401
 fn_exit:
    /* free the temporary buffer */
    MPIR_Type_get_true_extent_impl(rreq->dev.datatype, &true_lb, &true_extent);
Xin Zhao's avatar
Xin Zhao committed
402
    MPIU_Free((char *) rreq->dev.final_user_buf + true_lb);
403

404
    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
405 406

    return mpi_errno;
407
 fn_fail:
408 409 410
    goto fn_exit;
}

411
static inline int wait_progress_engine(void)
412 413
{
    int mpi_errno = MPI_SUCCESS;
414
    MPID_Progress_state progress_state;
415

416 417 418 419 420 421
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_wait(&progress_state);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        MPID_Progress_end(&progress_state);
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
422
    }
423 424
    /* --END ERROR HANDLING-- */
    MPID_Progress_end(&progress_state);
425

426
  fn_exit:
427
    return mpi_errno;
428
  fn_fail:
429 430 431
    goto fn_exit;
}

432
static inline int poke_progress_engine(void)
433 434
{
    int mpi_errno = MPI_SUCCESS;
435
    MPID_Progress_state progress_state;
436

437 438 439 440 441
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_poke();
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
    MPID_Progress_end(&progress_state);
442

443
  fn_exit:
444
    return mpi_errno;
445
  fn_fail:
446 447 448
    goto fn_exit;
}

449
#endif /* MPID_RMA_H_INCLUDED */