ch3u_rma_acc_ops.c 12.7 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidi_ch3_impl.h"
#include "mpidrma.h"

#ifdef USE_MPIU_INSTR
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_alloc);
MPIU_INSTR_DURATION_EXTERN_DECL(rmaqueue_set);
extern void MPIDI_CH3_RMA_InitInstr(void);
#endif

#undef FUNCNAME
#define FUNCNAME MPIDI_Get_accumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
                         int target_count, MPI_Datatype target_datatype, MPI_Op op, MPID_Win *win_ptr)
{
25
26
27
28
29
30
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
    int rank, origin_predefined, result_predefined, target_predefined;
    int dt_contig ATTRIBUTE((unused));
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPID_Datatype *dtp;
31
32
    MPIU_CHKLMEM_DECL(2);
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET_ACCUMULATE);
33

34
35
36
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET_ACCUMULATE);

    MPIDI_Datatype_get_info(origin_count, origin_datatype, dt_contig, data_sz,
37
38
                            dtp, dt_true_lb);

39
40
41
42
43
    if ((data_sz == 0) || (target_rank == MPI_PROC_NULL)) {
        goto fn_exit;
    }

    rank = win_ptr->myrank;
44

45
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(origin_datatype, origin_predefined);
46
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(result_datatype, result_predefined);
47
48
49
50
51
    MPIDI_CH3I_DATATYPE_IS_PREDEFINED(target_datatype, target_predefined);

    /* Do =! rank first (most likely branch?) */
    if (target_rank == rank) {
        MPI_User_function *uop;
52

53
54
55
        /* Perform the local get first, then the accumulate */
        mpi_errno = MPIR_Localcopy((char *) win_ptr->base + win_ptr->disp_unit *
                                   target_disp, target_count, target_datatype,
56
                                   result_addr, result_count, result_datatype);
57
58
59
60
61
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }

        if (op == MPI_REPLACE) {
            mpi_errno = MPIR_Localcopy(origin_addr, origin_count, origin_datatype,
                                (char *) win_ptr->base + win_ptr->disp_unit *
62
                                target_disp, target_count, target_datatype);
63
64
65
66

            if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
            goto fn_exit;
        }
67
68

        MPIU_ERR_CHKANDJUMP1((HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN),
69
70
                             mpi_errno, MPI_ERR_OP, "**opnotpredefined",
                             "**opnotpredefined %d", op );
71

72
        /* get the function by indexing into the op table */
73
        uop = MPIR_OP_HDL_TO_FN(op);
74
75

        if (origin_predefined && target_predefined) {
76
77
78
79
80
81
82
            /* Cast away const'ness for origin_address in order to
             * avoid changing the prototype for MPI_User_function */
            (*uop)((void *) origin_addr, (char *) win_ptr->base + win_ptr->disp_unit *
                   target_disp, &target_count, &target_datatype);
        }
        else {
            /* derived datatype */
83

84
85
86
87
88
89
90
91
            MPID_Segment *segp;
            DLOOP_VECTOR *dloop_vec;
            MPI_Aint first, last;
            int vec_len, i, type_size, count;
            MPI_Datatype type;
            MPI_Aint true_lb, true_extent, extent;
            void *tmp_buf=NULL, *target_buf;
            const void *source_buf;
92

93
94
95
96
            if (origin_datatype != target_datatype) {
                /* first copy the data into a temporary buffer with
                   the same datatype as the target. Then do the
                   accumulate operation. */
97

98
                MPIR_Type_get_true_extent_impl(target_datatype, &true_lb, &true_extent);
99
100
101
102
103
                MPID_Datatype_get_extent_macro(target_datatype, extent);

                MPIU_CHKLMEM_MALLOC(tmp_buf, void *,
                                    target_count * (MPIR_MAX(extent,true_extent)),
                                    mpi_errno, "temporary buffer");
104
105
                /* adjust for potential negative lower bound in datatype */
                tmp_buf = (void *)((char*)tmp_buf - true_lb);
106

107
108
                mpi_errno = MPIR_Localcopy(origin_addr, origin_count,
                                           origin_datatype, tmp_buf,
109
                                           target_count, target_datatype);
110
111
112
                if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
            }

113
            if (target_predefined) {
114
115
116
                /* target predefined type, origin derived datatype */

                (*uop)(tmp_buf, (char *) win_ptr->base + win_ptr->disp_unit *
117
                       target_disp, &target_count, &target_datatype);
118
119
            }
            else {
120

121
                segp = MPID_Segment_alloc();
122
123
                MPIU_ERR_CHKANDJUMP1((!segp), mpi_errno, MPI_ERR_OTHER,
                                     "**nomem","**nomem %s","MPID_Segment_alloc");
124
125
126
                MPID_Segment_init(NULL, target_count, target_datatype, segp, 0);
                first = 0;
                last  = SEGMENT_IGNORE_LAST;
127

128
                MPID_Datatype_get_ptr(target_datatype, dtp);
129
                vec_len = dtp->max_contig_blocks * target_count + 1;
130
                /* +1 needed because Rob says so */
131
132
                MPIU_CHKLMEM_MALLOC(dloop_vec, DLOOP_VECTOR *,
                                    vec_len * sizeof(DLOOP_VECTOR),
133
                                    mpi_errno, "dloop vector");
134

135
                MPID_Segment_pack_vector(segp, first, &last, dloop_vec, &vec_len);
136

137
                source_buf = (tmp_buf != NULL) ? tmp_buf : origin_addr;
138
                target_buf = (char *) win_ptr->base +
139
140
141
142
143
144
145
146
147
148
                    win_ptr->disp_unit * target_disp;
                type = dtp->eltype;
                type_size = MPID_Datatype_get_basic_size(type);

                for (i=0; i<vec_len; i++) {
                    count = (dloop_vec[i].DLOOP_VECTOR_LEN)/type_size;
                    (*uop)((char *)source_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                           (char *)target_buf + MPIU_PtrToAint(dloop_vec[i].DLOOP_VECTOR_BUF),
                           &count, &type);
                }
149

150
151
152
153
154
                MPID_Segment_free(segp);
            }
        }
    }
    else {
155
156
        MPIDI_RMA_ops *new_ptr = NULL;

157
158
        /* Append the operation to the window's RMA ops queue */
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
159
        mpi_errno = MPIDI_CH3I_Win_ops_alloc_tail(win_ptr, &new_ptr);
160
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
161
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

        /* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */

        MPIU_INSTR_DURATION_START(rmaqueue_set);
        new_ptr->type = MPIDI_RMA_GET_ACCUMULATE;
        /* Cast away const'ness for origin_address as MPIDI_RMA_ops
         * contain both PUT and GET like ops */
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = origin_count;
        new_ptr->origin_datatype = origin_datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_count = result_count;
        new_ptr->result_datatype = result_datatype;
        new_ptr->target_rank = target_rank;
        new_ptr->target_disp = target_disp;
        new_ptr->target_count = target_count;
        new_ptr->target_datatype = target_datatype;
        new_ptr->op = op;
        MPIU_INSTR_DURATION_END(rmaqueue_set);

        /* if source or target datatypes are derived, increment their
           reference counts */
        if (!origin_predefined) {
            MPID_Datatype_get_ptr(origin_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
        if (!result_predefined) {
            MPID_Datatype_get_ptr(result_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
        if (!target_predefined) {
            MPID_Datatype_get_ptr(target_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
196
197
198
199
200
201
202
203
204
205
206
207
    }

 fn_exit:
    MPIU_CHKLMEM_FREEALL();
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_GET_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234


#undef FUNCNAME
#define FUNCNAME MPIDI_Compare_and_swap
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
                          void *result_addr, MPI_Datatype datatype, int target_rank,
                          MPI_Aint target_disp, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int rank;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_COMPARE_AND_SWAP);

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

    rank = win_ptr->myrank;

    /* The datatype must be predefined, and one of: C integer, Fortran integer,
     * Logical, Multi-language types, or Byte.  This is checked above the ADI,
     * so there's no need to check it again here. */

    if (target_rank == rank) {
235
        void *dest_addr = (char *) win_ptr->base + win_ptr->disp_unit * target_disp;
236
237
238
239
240
241
242
243
244
245
246
247
        int len;

        MPID_Datatype_get_size_macro(datatype, len);
        MPIU_Memcpy(result_addr, dest_addr, len);

        if (MPIR_Compare_equal(compare_addr, dest_addr, datatype)) {
            MPIU_Memcpy(dest_addr, origin_addr, len);
        }

        goto fn_exit;
    }
    else {
248
249
        MPIDI_RMA_ops *new_ptr = NULL;

250
251
        /* Append this operation to the RMA ops queue */
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
252
        mpi_errno = MPIDI_CH3I_Win_ops_alloc_tail(win_ptr, &new_ptr);
253
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
254
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283

        MPIU_INSTR_DURATION_START(rmaqueue_set);
        new_ptr->type = MPIDI_RMA_COMPARE_AND_SWAP;
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = 1;
        new_ptr->origin_datatype = datatype;
        new_ptr->target_rank = target_rank;
        new_ptr->target_disp = target_disp;
        new_ptr->target_count = 1;
        new_ptr->target_datatype = datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_count = 1;
        new_ptr->result_datatype = datatype;
        new_ptr->compare_addr = (void *) compare_addr;
        new_ptr->compare_count = 1;
        new_ptr->compare_datatype = datatype;
        MPIU_INSTR_DURATION_END(rmaqueue_set);
    }

fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


284
285
286
287
288
289
290
291
292
293
294
#undef FUNCNAME
#define FUNCNAME MPIDI_Fetch_and_op
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
                       MPI_Datatype datatype, int target_rank,
                       MPI_Aint target_disp, MPI_Op op, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int rank;

295
296
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_FETCH_AND_OP);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_FETCH_AND_OP);
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

    rank = win_ptr->myrank;

    /* The datatype and op must be predefined.  This is checked above the ADI,
     * so there's no need to check it again here. */

    if (target_rank == rank) {
        void *dest_addr = (char *) win_ptr->base + win_ptr->disp_unit * target_disp;
        int len, one;
        MPI_User_function *uop;

        MPID_Datatype_get_size_macro(datatype, len);
        MPIU_Memcpy(result_addr, dest_addr, len);

        uop = MPIR_OP_HDL_TO_FN(op);
        one = 1;

        (*uop)((void *) origin_addr, dest_addr, &one, &datatype);

        goto fn_exit;
    }
    else {
323
324
        MPIDI_RMA_ops *new_ptr = NULL;

325
326
        /* Append this operation to the RMA ops queue */
        MPIU_INSTR_DURATION_START(rmaqueue_alloc);
327
        mpi_errno = MPIDI_CH3I_Win_ops_alloc_tail(win_ptr, &new_ptr);
328
        MPIU_INSTR_DURATION_END(rmaqueue_alloc);
329
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347

        MPIU_INSTR_DURATION_START(rmaqueue_set);
        new_ptr->type = MPIDI_RMA_FETCH_AND_OP;
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = 1;
        new_ptr->origin_datatype = datatype;
        new_ptr->target_rank = target_rank;
        new_ptr->target_disp = target_disp;
        new_ptr->target_count = 1;
        new_ptr->target_datatype = datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_count = 1;
        new_ptr->result_datatype = datatype;
        new_ptr->op = op;
        MPIU_INSTR_DURATION_END(rmaqueue_set);
    }

fn_exit:
348
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_FETCH_AND_OP);
349
350
351
352
353
354
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}