mpidrma.h 12 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 3 4 5 6
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

7 8
#if !defined(MPID_RMA_H_INCLUDED)
#define MPID_RMA_H_INCLUDED
9

10 11 12
#include "mpid_rma_types.h"
#include "mpid_rma_oplist.h"
#include "mpid_rma_shm.h"
13

14 15 16
int MPIDI_CH3I_Issue_rma_op(MPIDI_RMA_Op_t * op_ptr, MPID_Win * win_ptr,
                            MPIDI_CH3_Pkt_flags_t flags, MPI_Win source_win_handle,
                            MPI_Win target_win_handle);
17

18
#undef FUNCNAME
19
#define FUNCNAME send_lock_msg
20 21
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
22
static inline int send_lock_msg(int dest, int lock_type, MPID_Win * win_ptr)
23
{
24 25 26 27 28 29 30
    int mpi_errno = MPI_SUCCESS;
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_t *lock_pkt = &upkt.lock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_LOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_LOCK_MSG);
31

32
    MPIU_Assert(win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_CALLED);
33

34
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
35

36 37 38 39
    MPIDI_Pkt_init(lock_pkt, MPIDI_CH3_PKT_LOCK);
    lock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
    lock_pkt->source_win_handle = win_ptr->handle;
    lock_pkt->lock_type = lock_type;
40
    lock_pkt->origin_rank = win_ptr->comm_ptr->rank;
41

42 43
    win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_REQUESTED;
    win_ptr->targets[dest].remote_lock_mode = lock_type;
44

45 46 47 48
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_pkt, sizeof(*lock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
49

50 51 52 53
    /* release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
    }
54

55
  fn_exit:
56
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_LOCK_MSG);
57
    return mpi_errno;
58
    /* --BEGIN ERROR HANDLING-- */
59
  fn_fail:
60
    goto fn_exit;
61
    /* --END ERROR HANDLING-- */
62 63
}

64
#undef FUNCNAME
65
#define FUNCNAME send_unlock_msg
66 67
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
68
static inline int send_unlock_msg(int dest, MPID_Win * win_ptr)
69 70
{
    int mpi_errno = MPI_SUCCESS;
71 72 73 74 75 76
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_unlock_t *unlock_pkt = &upkt.unlock;
    MPID_Request *req = NULL;
    MPIDI_VC_t *vc;
    MPIDI_STATE_DECL(MPID_STATE_SEND_UNLOCK_MSG);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_SEND_UNLOCK_MSG);
77

78
    MPIU_Assert(win_ptr->targets[dest].remote_lock_state == MPIDI_CH3_WIN_LOCK_GRANTED);
79

80
    MPIDI_Comm_get_vc_set_active(win_ptr->comm_ptr, dest, &vc);
81

82 83
    /* Send a lock packet over to the target. wait for the lock_granted
     * reply. Then do all the RMA ops. */
84

85 86
    MPIDI_Pkt_init(unlock_pkt, MPIDI_CH3_PKT_UNLOCK);
    unlock_pkt->target_win_handle = win_ptr->all_win_handles[dest];
87

88 89
    /* Reset the local state of the target to unlocked */
    win_ptr->targets[dest].remote_lock_state = MPIDI_CH3_WIN_LOCK_NONE;
90

91 92 93 94
    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, unlock_pkt, sizeof(*unlock_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    MPIU_ERR_CHKANDJUMP(mpi_errno != MPI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**ch3|rma_msg");
95

96 97 98
    /* Release the request returned by iStartMsg */
    if (req != NULL) {
        MPID_Request_release(req);
99
    }
100

101
  fn_exit:
102
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_SEND_UNLOCK_MSG);
103 104
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
105
  fn_fail:
106 107 108 109
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}

110

111
#undef FUNCNAME
112
#define FUNCNAME MPIDI_CH3I_Send_lock_granted_pkt
113 114
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
115
static inline int MPIDI_CH3I_Send_lock_granted_pkt(MPIDI_VC_t * vc, MPID_Win * win_ptr, MPI_Win source_win_handle)
116
{
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_lock_granted_t *lock_granted_pkt = &upkt.lock_granted;
    MPID_Request *req = NULL;
    int mpi_errno;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    /* send lock granted packet */
    MPIDI_Pkt_init(lock_granted_pkt, MPIDI_CH3_PKT_LOCK_GRANTED);
    lock_granted_pkt->source_win_handle = source_win_handle;
    lock_granted_pkt->target_rank = win_ptr->comm_ptr->rank;

    MPIU_DBG_MSG_FMT(CH3_OTHER, VERBOSE,
                     (MPIU_DBG_FDEST, "sending lock granted pkt on vc=%p, source_win_handle=%#08x",
                      vc, lock_granted_pkt->source_win_handle));

    MPIU_THREAD_CS_ENTER(CH3COMM, vc);
    mpi_errno = MPIDI_CH3_iStartMsg(vc, lock_granted_pkt, sizeof(*lock_granted_pkt), &req);
    MPIU_THREAD_CS_EXIT(CH3COMM, vc);
    if (mpi_errno) {
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**ch3|rmamsg");
139 140
    }

141 142
    if (req != NULL) {
        MPID_Request_release(req);
143
    }
144

145
  fn_fail:
146 147 148
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_LOCK_GRANTED_PKT);

    return mpi_errno;
149 150 151 152
}


#undef FUNCNAME
153
#define FUNCNAME MPIDI_CH3I_Send_pt_rma_done_pkt
154 155
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
156
static inline int MPIDI_CH3I_Send_pt_rma_done_pkt(MPIDI_VC_t *vc, MPID_Win *win_ptr,
157
                                    MPI_Win source_win_handle)
158
{
159 160 161 162 163
    MPIDI_CH3_Pkt_t upkt;
    MPIDI_CH3_Pkt_pt_rma_done_t *pt_rma_done_pkt = &upkt.pt_rma_done;
    MPID_Request *req;
    int mpi_errno=MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SEND_PT_RMA_DONE_PKT);
164

165
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SEND_PT_RMA_DONE_PKT);
166

167 168 169
    MPIDI_Pkt_init(pt_rma_done_pkt, MPIDI_CH3_PKT_PT_RMA_DONE);
    pt_rma_done_pkt->source_win_handle = source_win_handle;
    pt_rma_done_pkt->target_rank = win_ptr->comm_ptr->rank;
170

171 172 173 174 175 176
    /* Because this is in a packet handler, it is already within a critical section */	
    /* MPIU_THREAD_CS_ENTER(CH3COMM,vc); */
    mpi_errno = MPIDI_CH3_iStartMsg(vc, pt_rma_done_pkt, sizeof(*pt_rma_done_pkt), &req);
    /* MPIU_THREAD_CS_EXIT(CH3COMM,vc); */
    if (mpi_errno != MPI_SUCCESS) {
	MPIU_ERR_SETANDJUMP(mpi_errno,MPI_ERR_OTHER,"**ch3|rmamsg");
177 178
    }

179 180 181
    if (req != NULL)
    {
        MPID_Request_release(req);
182 183
    }

184 185 186 187
 fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3I_SEND_PT_RMA_DONE_PKT);
    return mpi_errno;
}
188

189

190 191 192 193
#undef FUNCNAME
#define FUNCNAME acquire_local_lock
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
194
static inline int acquire_local_lock(MPID_Win * win_ptr, int lock_type)
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_ACQUIRE_LOCAL_LOCK);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_ACQUIRE_LOCAL_LOCK);

    /* poke the progress engine until the local lock is granted */
    if (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
        MPID_Progress_state progress_state;

        MPID_Progress_start(&progress_state);
        while (MPIDI_CH3I_Try_acquire_win_lock(win_ptr, lock_type) == 0) {
            mpi_errno = MPID_Progress_wait(&progress_state);
            /* --BEGIN ERROR HANDLING-- */
            if (mpi_errno != MPI_SUCCESS) {
                MPID_Progress_end(&progress_state);
                MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
211
            }
212
            /* --END ERROR HANDLING-- */
213
        }
214
        MPID_Progress_end(&progress_state);
215 216
    }

217 218 219
    win_ptr->targets[win_ptr->comm_ptr->rank].remote_lock_state = MPIDI_CH3_WIN_LOCK_GRANTED;
    win_ptr->targets[win_ptr->comm_ptr->rank].remote_lock_mode = lock_type;

220
  fn_exit:
221
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_ACQUIRE_LOCAL_LOCK);
222 223
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
224
  fn_fail:
225 226 227 228 229 230
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


#undef FUNCNAME
231
#define FUNCNAME do_accumulate_op
232 233
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
234
static inline int do_accumulate_op(MPID_Request *rreq)
235 236
{
    int mpi_errno = MPI_SUCCESS;
237 238 239 240 241 242 243 244 245 246 247 248 249 250
    MPI_Aint true_lb, true_extent;
    MPI_User_function *uop;
    MPIDI_STATE_DECL(MPID_STATE_DO_ACCUMULATE_OP);

    MPIDI_FUNC_ENTER(MPID_STATE_DO_ACCUMULATE_OP);

    if (rreq->dev.op == MPI_REPLACE)
    {
        /* simply copy the data */
        mpi_errno = MPIR_Localcopy(rreq->dev.user_buf, rreq->dev.user_count,
                                   rreq->dev.datatype,
                                   rreq->dev.real_user_buf,
                                   rreq->dev.user_count,
                                   rreq->dev.datatype);
251
        if (mpi_errno) {
252 253
	    MPIU_ERR_POP(mpi_errno);
	}
254 255 256
        goto fn_exit;
    }

257 258 259 260 261 262 263 264 265 266 267
    if (HANDLE_GET_KIND(rreq->dev.op) == HANDLE_KIND_BUILTIN)
    {
        /* get the function by indexing into the op table */
        uop = MPIR_OP_HDL_TO_FN(rreq->dev.op);
    }
    else
    {
	/* --BEGIN ERROR HANDLING-- */
        mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OP, "**opnotpredefined", "**opnotpredefined %d", rreq->dev.op );
        return mpi_errno;
	/* --END ERROR HANDLING-- */
268 269
    }

270 271 272 273 274 275 276 277
    if (MPIR_DATATYPE_IS_PREDEFINED(rreq->dev.datatype))
    {
        (*uop)(rreq->dev.user_buf, rreq->dev.real_user_buf,
               &(rreq->dev.user_count), &(rreq->dev.datatype));
    }
    else
    {
	/* derived datatype */
278 279 280
        MPID_Segment *segp;
        DLOOP_VECTOR *dloop_vec;
        MPI_Aint first, last;
281 282
        int vec_len, i, count;
        MPI_Aint type_size;
283
        MPI_Datatype type;
284 285 286 287 288 289 290 291 292
        MPID_Datatype *dtp;

        segp = MPID_Segment_alloc();
	/* --BEGIN ERROR HANDLING-- */
        if (!segp)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
293
        }
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
	/* --END ERROR HANDLING-- */
        MPID_Segment_init(NULL, rreq->dev.user_count,
			  rreq->dev.datatype, segp, 0);
        first = 0;
        last  = SEGMENT_IGNORE_LAST;

        MPID_Datatype_get_ptr(rreq->dev.datatype, dtp);
        vec_len = dtp->max_contig_blocks * rreq->dev.user_count + 1;
        /* +1 needed because Rob says so */
        dloop_vec = (DLOOP_VECTOR *)
            MPIU_Malloc(vec_len * sizeof(DLOOP_VECTOR));
	/* --BEGIN ERROR HANDLING-- */
        if (!dloop_vec)
	{
            mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**nomem", 0 );
	    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
            return mpi_errno;
311
        }
312 313 314 315 316 317 318 319 320 321 322 323
	/* --END ERROR HANDLING-- */

        MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);

        type = dtp->eltype;
        MPID_Datatype_get_size_macro(type, type_size);
        for (i=0; i<vec_len; i++)
	{
            MPIU_Assign_trunc(count, (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size, int);
            (*uop)((char *)rreq->dev.user_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   (char *)rreq->dev.real_user_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                   &count, &type);
324 325
        }

326 327
        MPID_Segment_free(segp);
        MPIU_Free(dloop_vec);
328 329
    }

330 331 332 333
 fn_exit:
    /* free the temporary buffer */
    MPIR_Type_get_true_extent_impl(rreq->dev.datatype, &true_lb, &true_extent);
    MPIU_Free((char *) rreq->dev.user_buf + true_lb);
334

335
    MPIDI_FUNC_EXIT(MPID_STATE_DO_ACCUMULATE_OP);
336 337

    return mpi_errno;
338
 fn_fail:
339 340 341
    goto fn_exit;
}

342
static inline int wait_progress_engine(void)
343 344
{
    int mpi_errno = MPI_SUCCESS;
345
    MPID_Progress_state progress_state;
346

347 348 349 350 351 352
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_wait(&progress_state);
    /* --BEGIN ERROR HANDLING-- */
    if (mpi_errno != MPI_SUCCESS) {
        MPID_Progress_end(&progress_state);
        MPIU_ERR_SETANDJUMP(mpi_errno, MPI_ERR_OTHER, "**winnoprogress");
353
    }
354 355
    /* --END ERROR HANDLING-- */
    MPID_Progress_end(&progress_state);
356

357
  fn_exit:
358
    return mpi_errno;
359
  fn_fail:
360 361 362
    goto fn_exit;
}

363
static inline int poke_progress_engine(void)
364 365
{
    int mpi_errno = MPI_SUCCESS;
366
    MPID_Progress_state progress_state;
367

368 369 370 371 372
    MPID_Progress_start(&progress_state);
    mpi_errno = MPID_Progress_poke();
    if (mpi_errno != MPI_SUCCESS)
        MPIU_ERR_POP(mpi_errno);
    MPID_Progress_end(&progress_state);
373

374
  fn_exit:
375
    return mpi_errno;
376
  fn_fail:
377 378 379
    goto fn_exit;
}

380
#endif /* MPID_RMA_H_INCLUDED */