ch3u_rma_acc_ops.c 16.4 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidi_ch3_impl.h"
#include "mpidrma.h"

#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_set);
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif

#undef FUNCNAME
#define FUNCNAME MPIDI_Get_accumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
                         int target_count, MPI_Datatype target_datatype, MPI_Op op, MPID_Win *win_ptr)
{
25
26
27
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
    int rank, origin_predefined, result_predefined, target_predefined;
28
    int shm_locked = 0;
29
30
31
    int dt_contig ATTRIBUTE((unused));
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPID_Datatype *dtp;
32
33
    MPIU_CHKLMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET_ACCUMULATE);
34

35
36
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET_ACCUMULATE);

James Dinan's avatar
James Dinan committed
37
38
39
40
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

41
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
42
43
44
45
46
47
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

48
    MPIDI_Datatype_get_info(target_count, target_datatype, dt_contig, data_sz,
49
50
                            dtp, dt_true_lb);

James Dinan's avatar
James Dinan committed
51
    if (data_sz == 0) {
52
53
54
55
        goto fn_exit;
    }

    rank = win_ptr->myrank;
56

57
58
59
60
    origin_predefined = TRUE; /* quiet uninitialized warnings (b/c goto) */
    if (op != MPI_NO_OP) {
        MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
    }
61
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(result_datatype, result_predefined);
62
63
64
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);

    /* Do =! rank first (most likely branch?) */
65
66
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
    {
67
        MPI_User_function *uop;
68
69
70
71
72
73
74
75
76
77
78
79
80
        void *base;
        int disp_unit;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];
            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            shm_locked = 1;
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }
81

82
        /* Perform the local get first, then the accumulate */
83
84
        mpi_errno = MPIR_Localcopy((char *) base + disp_unit * target_disp,
                                   target_count, target_datatype,
85
                                   result_addr, result_count, result_datatype);
86
87
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

88
        /* NO_OP: Don't perform the accumulate */
89
90
91
92
93
94
        if (op == MPI_NO_OP) {
            if (shm_locked) {
                MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
                shm_locked = 0;
            }

95
            goto fn_exit;
96
        }
97

98
99
        if (op == MPI_REPLACE) {
            mpi_errno = MPIR_Localcopy(origin_addr, origin_count, origin_datatype,
100
101
                                (char *) base + disp_unit * target_disp,
                                target_count, target_datatype);
102
103

            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
104
105
106
107
108
109

            if (shm_locked) {
                MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
                shm_locked = 0;
            }

110
111
            goto fn_exit;
        }
112
113

        MPIU_ERR_CHKANDJUMP1((HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN),
114
115
                             mpi_errno, MPI_ERR_OP, "**opnotpredefined",
                             "**opnotpredefined %d", op );
116

117
        /* get the function by indexing into the op table */
118
        uop = MPIR_OP_HDL_TO_FN(op);
119
120

        if (origin_predefined && target_predefined) {
121
122
            /* Cast away const'ness for origin_address in order to
             * avoid changing the prototype for MPI_User_function */
123
124
            (*uop)((void *) origin_addr, (char *) base + disp_unit*target_disp,
                   &target_count, &target_datatype);
125
126
127
        }
        else {
            /* derived datatype */
128

129
130
131
132
133
134
135
136
            MPID_Segment *segp;
            DLOOP_VECTOR *dloop_vec;
            MPI_Aint first, last;
            int vec_len, i, type_size, count;
            MPI_Datatype type;
            MPI_Aint true_lb, true_extent, extent;
            void *tmp_buf=NULL, *target_buf;
            const void *source_buf;
137

138
139
140
141
            if (origin_datatype != target_datatype) {
                /* first copy the data into a temporary buffer with
                   the same datatype as the target. Then do the
                   accumulate operation. */
142

143
                MPIR_Type_get_true_extent_impl(target_datatype, &true_lb, &true_extent);
144
145
146
147
148
                MPID_Datatype_get_extent_macro(target_datatype, extent);

                MPIU_CHKLMEM_MALLOC(tmp_buf, void *,
                                    target_count * (MPIR_MAX(extent,true_extent)),
                                    mpi_errno, "temporary buffer");
149
150
                /* adjust for potential negative lower bound in datatype */
                tmp_buf = (void *)((char*)tmp_buf - true_lb);
151

152
153
                mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
                                           origin_datatype, tmp_buf,
154
                                           target_count, target_datatype);
155
156
157
                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
            }

158
            if (target_predefined) {
159
160
                /* target predefined type, origin derived datatype */

161
162
                (*uop)(tmp_buf, (char *) base + disp_unit * target_disp,
                       &target_count, &target_datatype);
163
164
            }
            else {
165

166
                segp = MPID_Segment_alloc();
167
168
                MPIU_ERR_CHKANDJUMP1((!segp), mpi_errno, MPI_ERR_OTHER,
                                     "**nomem","**nomem %s","MPID_Segment_alloc");
169
170
171
                MPID_Segment_init(NULL, target_count, target_datatype, segp, 0);
                first = 0;
                last  = SEGMENT_IGNORE_LAST;
172

173
                MPID_Datatype_get_ptr(target_datatype, dtp);
174
                vec_len = dtp->max_contig_blocks * target_count + 1;
175
                /* +1 needed because Rob says so */
176
177
                MPIU_CHKLMEM_MALLOC(dloop_vec, DLOOP_VECTOR *,
                                    vec_len * sizeof(DLOOP_VECTOR),
178
                                    mpi_errno, "dloop vector");
179

180
                MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);
181

182
                source_buf = (tmp_buf != NULL) ? tmp_buf : origin_addr;
183
                target_buf = (char *) base + disp_unit * target_disp;
184
185
186
187
188
189
190
191
192
                type = dtp->eltype;
                type_size = MPID_Datatype_get_basic_size(type);

                for (i=0; i<vec_len; i++) {
                    count = (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size;
                    (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                           (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                           &count, &type);
                }
193

194
195
196
                MPID_Segment_free(segp);
            }
        }
197
198
199
200
201

        if (shm_locked) {
            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
            shm_locked = 0;
        }
202
203
    }
    else {
204
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
205
        MPIDI_RMA_Op_t *new_ptr = NULL;
206

207
208
        /* Append the operation to the window's RMA ops queue */
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
209
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
210
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
211
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
212
213
214
215
216

        /* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */

        MPIU_INSTR_DURATION_START(rmaqueue_set);
        new_ptr->type = MPIDI_RMA_GET_ACCUMULATE;
217
        /* Cast away const'ness for origin_address as MPIDI_RMA_Op_t
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
         * contain both PUT and GET like ops */
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = origin_count;
        new_ptr->origin_datatype = origin_datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_count = result_count;
        new_ptr->result_datatype = result_datatype;
        new_ptr->target_rank = target_rank;
        new_ptr->target_disp = target_disp;
        new_ptr->target_count = target_count;
        new_ptr->target_datatype = target_datatype;
        new_ptr->op = op;
        MPIU_INSTR_DURATION_END(rmaqueue_set);

        /* if source or target datatypes are derived, increment their
           reference counts */
        if (!origin_predefined) {
            MPID_Datatype_get_ptr(origin_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
        if (!result_predefined) {
            MPID_Datatype_get_ptr(result_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
        if (!target_predefined) {
            MPID_Datatype_get_ptr(target_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
246
247
248
249
250
251
252
253
254
    }

 fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_GET_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
255
256
257
    if (shm_locked) {
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
    }
258
259
260
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
261
262
263
264
265
266
267
268
269
270
271


#undef FUNCNAME
#define FUNCNAME MPIDI_Compare_and_swap
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
                          void *result_addr, MPI_Datatype datatype, int target_rank,
                          MPI_Aint target_disp, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
272
    int shm_locked = 0;
273
274
275
276
277
278
279
280
281
    int rank;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_COMPARE_AND_SWAP);

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

282
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
283
284
285
286
287
288
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

289
290
291
292
293
294
    rank = win_ptr->myrank;

    /* The datatype must be predefined, and one of: C integer, Fortran integer,
     * Logical, Multi-language types, or Byte.  This is checked above the ADI,
     * so there's no need to check it again here. */

295
296
297
298
299
300
    /* FIXME: For shared memory windows, we should provide an implementation
     * that uses a processor atomic operation. */
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
    {
        void *base, *dest_addr;
        int disp_unit;
301
302
        int len;

303
304
305
306
307
308
309
310
311
312
313
314
315
316
        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];

            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            shm_locked = 1;
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }

        dest_addr = (char *) base + disp_unit * target_disp;

317
318
319
320
321
322
323
        MPID_Datatype_get_size_macro(datatype, len);
        MPIU_Memcpy(result_addr, dest_addr, len);

        if (MPIR_Compare_equal(compare_addr, dest_addr, datatype)) {
            MPIU_Memcpy(dest_addr, origin_addr, len);
        }

324
325
326
327
        if (shm_locked) {
            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
            shm_locked = 0;
        }
328
329
    }
    else {
330
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
331
        MPIDI_RMA_Op_t *new_ptr = NULL;
332

333
334
        /* Append this operation to the RMA ops queue */
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
335
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
336
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
337
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361

        MPIU_INSTR_DURATION_START(rmaqueue_set);
        new_ptr->type = MPIDI_RMA_COMPARE_AND_SWAP;
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = 1;
        new_ptr->origin_datatype = datatype;
        new_ptr->target_rank = target_rank;
        new_ptr->target_disp = target_disp;
        new_ptr->target_count = 1;
        new_ptr->target_datatype = datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_count = 1;
        new_ptr->result_datatype = datatype;
        new_ptr->compare_addr = (void *) compare_addr;
        new_ptr->compare_count = 1;
        new_ptr->compare_datatype = datatype;
        MPIU_INSTR_DURATION_END(rmaqueue_set);
    }

fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
362
363
364
    if (shm_locked) {
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
    }
365
366
367
368
369
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


370
371
372
373
374
375
376
377
378
#undef FUNCNAME
#define FUNCNAME MPIDI_Fetch_and_op
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
                       MPI_Datatype datatype, int target_rank,
                       MPI_Aint target_disp, MPI_Op op, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
379
    int shm_locked = 0;
380
381
    int rank;

382
383
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_FETCH_AND_OP);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_FETCH_AND_OP);
384
385
386
387
388

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

389
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
390
391
392
393
394
395
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

396
397
398
399
400
    rank = win_ptr->myrank;

    /* The datatype and op must be predefined.  This is checked above the ADI,
     * so there's no need to check it again here. */

401
402
403
404
    /* FIXME: For shared memory windows, we should provide an implementation
     * that uses a processor atomic operation. */
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED)
    {
405
        MPI_User_function *uop;
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
        void *base, *dest_addr;
        int disp_unit;
        int len, one;

        if (win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED) {
            base = win_ptr->shm_base_addrs[target_rank];
            disp_unit = win_ptr->disp_units[target_rank];

            MPIDI_CH3I_SHM_MUTEX_LOCK(win_ptr);
            shm_locked = 1;
        }
        else {
            base = win_ptr->base;
            disp_unit = win_ptr->disp_unit;
        }

        dest_addr = (char *) base + disp_unit * target_disp;
423
424
425
426
427
428
429
430
431

        MPID_Datatype_get_size_macro(datatype, len);
        MPIU_Memcpy(result_addr, dest_addr, len);

        uop = MPIR_OP_HDL_TO_FN(op);
        one = 1;

        (*uop)((void *) origin_addr, dest_addr, &one, &datatype);

432
433
434
435
        if (shm_locked) {
            MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
            shm_locked = 0;
        }
436
437
    }
    else {
438
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
439
        MPIDI_RMA_Op_t *new_ptr = NULL;
440

441
442
        /* Append this operation to the RMA ops queue */
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
443
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
444
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
445
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463

        MPIU_INSTR_DURATION_START(rmaqueue_set);
        new_ptr->type = MPIDI_RMA_FETCH_AND_OP;
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = 1;
        new_ptr->origin_datatype = datatype;
        new_ptr->target_rank = target_rank;
        new_ptr->target_disp = target_disp;
        new_ptr->target_count = 1;
        new_ptr->target_datatype = datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_count = 1;
        new_ptr->result_datatype = datatype;
        new_ptr->op = op;
        MPIU_INSTR_DURATION_END(rmaqueue_set);
    }

fn_exit:
464
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_FETCH_AND_OP);
465
466
467
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
468
469
470
    if (shm_locked) {
        MPIDI_CH3I_SHM_MUTEX_UNLOCK(win_ptr);
    }
471
472
473
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}