ch3u_rma_acc_ops.c 14.2 KB
Newer Older
1
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2
3
4
5
6
7
8
/*
 *  (C) 2001 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */

#include "mpidrma.h"

9
10
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_alloc);
MPIR_T_PVAR_DOUBLE_TIMER_DECL_EXTERN(RMA, rma_rmaqueue_set);
11
12
13
14
15
16
17
18
19
20

#undef FUNCNAME
#define FUNCNAME MPIDI_Get_accumulate
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Get_accumulate(const void *origin_addr, int origin_count,
                         MPI_Datatype origin_datatype, void *result_addr, int result_count,
                         MPI_Datatype result_datatype, int target_rank, MPI_Aint target_disp,
                         int target_count, MPI_Datatype target_datatype, MPI_Op op, MPID_Win *win_ptr)
{
21
22
    int mpi_errno = MPI_SUCCESS;
    MPIDI_msg_sz_t data_sz;
23
    int rank;
24
25
26
    int dt_contig ATTRIBUTE((unused));
    MPI_Aint dt_true_lb ATTRIBUTE((unused));
    MPID_Datatype *dtp;
27
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
28
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_GET_ACCUMULATE);
29

30
31
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_GET_ACCUMULATE);

James Dinan's avatar
James Dinan committed
32
33
34
35
    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

36
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
37
38
39
40
41
42
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

43
    MPIDI_Datatype_get_info(target_count, target_datatype, dt_contig, data_sz,
44
45
                            dtp, dt_true_lb);

James Dinan's avatar
James Dinan committed
46
    if (data_sz == 0) {
47
48
49
        goto fn_exit;
    }

50
    rank = win_ptr->comm_ptr->rank;
51

Xin Zhao's avatar
Xin Zhao committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
           if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
           which is only set to TRUE when SHM region is allocated in nemesis.
           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
        */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

66
    /* Do =! rank first (most likely branch?) */
Xin Zhao's avatar
Xin Zhao committed
67
68
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
69
    {
70
71
72
73
74
        mpi_errno = MPIDI_CH3I_Shm_get_acc_op(origin_addr, origin_count, origin_datatype,
                                              result_addr, result_count, result_datatype,
                                              target_rank, target_disp, target_count, target_datatype,
                                              op, win_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
75
76
    }
    else {
77
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
78
        MPIDI_RMA_Op_t *new_ptr = NULL;
79

80
        /* Append the operation to the window's RMA ops queue */
81
        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
82
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
83
        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
84
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
85
86
87

        /* TODO: Can we use the MPIDI_RMA_ACC_CONTIG optimization? */

88
        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
Pavan Balaji's avatar
Pavan Balaji committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

        if (op == MPI_NO_OP) {
            /* Convert GAcc to a Get */
            MPIDI_CH3_Pkt_get_t *get_pkt = &(new_ptr->pkt.get);
            MPIDI_Pkt_init(get_pkt, MPIDI_CH3_PKT_GET);
            get_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
                win_ptr->disp_units[target_rank] * target_disp;
            get_pkt->count = target_count;
            get_pkt->datatype = target_datatype;
            get_pkt->dataloop_size = 0;
            get_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
            get_pkt->source_win_handle = win_ptr->handle;

            new_ptr->origin_addr = result_addr;
            new_ptr->origin_count = result_count;
            new_ptr->origin_datatype = result_datatype;
            new_ptr->target_rank = target_rank;
        }

        else {
            MPIDI_CH3_Pkt_accum_t *accum_pkt = &(new_ptr->pkt.accum);
            MPIDI_Pkt_init(accum_pkt, MPIDI_CH3_PKT_GET_ACCUM);
            accum_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
                win_ptr->disp_units[target_rank] * target_disp;
            accum_pkt->count = target_count;
            accum_pkt->datatype = target_datatype;
            accum_pkt->dataloop_size = 0;
            accum_pkt->op = op;
            accum_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
            accum_pkt->source_win_handle = win_ptr->handle;

            new_ptr->origin_addr = (void *) origin_addr;
            new_ptr->origin_count = origin_count;
            new_ptr->origin_datatype = origin_datatype;
            new_ptr->result_addr = result_addr;
            new_ptr->result_count = result_count;
            new_ptr->result_datatype = result_datatype;
            new_ptr->target_rank = target_rank;
        }

129
        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
130

131
132
        /* if source or target datatypes are derived, increment their
           reference counts */
133
        if (op != MPI_NO_OP && !MPIR_DATATYPE_IS_PREDEFINED(origin_datatype)) {
134
135
136
            MPID_Datatype_get_ptr(origin_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
137
        if (!MPIR_DATATYPE_IS_PREDEFINED(result_datatype)) {
138
139
140
            MPID_Datatype_get_ptr(result_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
        }
141
        if (!MPIR_DATATYPE_IS_PREDEFINED(target_datatype)) {
142
143
            MPID_Datatype_get_ptr(target_datatype, dtp);
            MPID_Datatype_add_ref(dtp);
144
        }
145
146
147
148
149
150
151
152
153
154
155
    }

 fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_GET_ACCUMULATE);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
156
157
158
159
160
161
162
163
164
165
166
167


#undef FUNCNAME
#define FUNCNAME MPIDI_Compare_and_swap
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Compare_and_swap(const void *origin_addr, const void *compare_addr,
                          void *result_addr, MPI_Datatype datatype, int target_rank,
                          MPI_Aint target_disp, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int rank;
168
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
169
170
171
172
173
174
175
176

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_COMPARE_AND_SWAP);

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

177
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
178
179
180
181
182
183
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

184
    rank = win_ptr->comm_ptr->rank;
185

Xin Zhao's avatar
Xin Zhao committed
186
187
188
189
190
191
192
193
194
195
196
197
198
199
    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
           if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
           which is only set to TRUE when SHM region is allocated in nemesis.
           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
        */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

200
201
202
203
    /* The datatype must be predefined, and one of: C integer, Fortran integer,
     * Logical, Multi-language types, or Byte.  This is checked above the ADI,
     * so there's no need to check it again here. */

204
205
    /* FIXME: For shared memory windows, we should provide an implementation
     * that uses a processor atomic operation. */
Xin Zhao's avatar
Xin Zhao committed
206
207
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
208
    {
209
210
211
        mpi_errno = MPIDI_CH3I_Shm_cas_op(origin_addr, compare_addr, result_addr,
                                          datatype, target_rank, target_disp, win_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
212
213
    }
    else {
214
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
215
        MPIDI_RMA_Op_t *new_ptr = NULL;
216

Pavan Balaji's avatar
Pavan Balaji committed
217
218
        MPIDI_CH3_Pkt_cas_t *cas_pkt = NULL;

219
        /* Append this operation to the RMA ops queue */
220
        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
221
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
222
        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
223
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
224

225
        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
Pavan Balaji's avatar
Pavan Balaji committed
226
227
228
229
230
231
232
233
234

        cas_pkt = &(new_ptr->pkt.cas);
        MPIDI_Pkt_init(cas_pkt, MPIDI_CH3_PKT_CAS);
        cas_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
            win_ptr->disp_units[target_rank] * target_disp;
        cas_pkt->datatype = datatype;
        cas_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];
        cas_pkt->source_win_handle = win_ptr->handle;

235
236
237
238
239
240
241
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = 1;
        new_ptr->origin_datatype = datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_datatype = datatype;
        new_ptr->compare_addr = (void *) compare_addr;
        new_ptr->compare_datatype = datatype;
Pavan Balaji's avatar
Pavan Balaji committed
242
        new_ptr->target_rank = target_rank;
243
        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
244
245
246
247
248
249
250
251
252
253
254
255
    }

fn_exit:
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_COMPARE_AND_SWAP);
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}


256
257
258
259
260
261
262
263
264
265
#undef FUNCNAME
#define FUNCNAME MPIDI_Fetch_and_op
#undef FCNAME
#define FCNAME MPIDI_QUOTE(FUNCNAME)
int MPIDI_Fetch_and_op(const void *origin_addr, void *result_addr,
                       MPI_Datatype datatype, int target_rank,
                       MPI_Aint target_disp, MPI_Op op, MPID_Win *win_ptr)
{
    int mpi_errno = MPI_SUCCESS;
    int rank;
266
    MPIDI_VC_t *orig_vc = NULL, *target_vc = NULL;
267

268
269
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_FETCH_AND_OP);
    MPIDI_RMA_FUNC_ENTER(MPID_STATE_MPIDI_FETCH_AND_OP);
270
271
272
273
274

    if (target_rank == MPI_PROC_NULL) {
        goto fn_exit;
    }

275
    if (win_ptr->epoch_state == MPIDI_EPOCH_NONE && win_ptr->fence_issued) {
James Dinan's avatar
James Dinan committed
276
277
278
279
280
281
        win_ptr->epoch_state = MPIDI_EPOCH_FENCE;
    }

    MPIU_ERR_CHKANDJUMP(win_ptr->epoch_state == MPIDI_EPOCH_NONE,
                        mpi_errno, MPI_ERR_RMA_SYNC, "**rmasync");

282
    rank = win_ptr->comm_ptr->rank;
283

Xin Zhao's avatar
Xin Zhao committed
284
285
286
287
288
289
290
291
292
293
294
295
296
297
    if (win_ptr->shm_allocated == TRUE && target_rank != rank && win_ptr->create_flavor != MPI_WIN_FLAVOR_SHARED) {
        /* check if target is local and shared memory is allocated on window,
           if so, we directly perform this operation on shared memory region. */

        /* FIXME: Here we decide whether to perform SHM operations by checking if origin and target are on
           the same node. However, in ch3:sock, even if origin and target are on the same node, they do
           not within the same SHM region. Here we filter out ch3:sock by checking shm_allocated flag first,
           which is only set to TRUE when SHM region is allocated in nemesis.
           In future we need to figure out a way to check if origin and target are in the same "SHM comm".
        */
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, rank, &orig_vc);
        MPIDI_Comm_get_vc(win_ptr->comm_ptr, target_rank, &target_vc);
    }

298
299
300
    /* The datatype and op must be predefined.  This is checked above the ADI,
     * so there's no need to check it again here. */

301
302
    /* FIXME: For shared memory windows, we should provide an implementation
     * that uses a processor atomic operation. */
Xin Zhao's avatar
Xin Zhao committed
303
304
    if (target_rank == rank || win_ptr->create_flavor == MPI_WIN_FLAVOR_SHARED ||
        (win_ptr->shm_allocated == TRUE && orig_vc->node_id == target_vc->node_id))
305
    {
306
307
308
        mpi_errno = MPIDI_CH3I_Shm_fop_op(origin_addr, result_addr, datatype,
                                          target_rank, target_disp, op, win_ptr);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
309
310
    }
    else {
311
        MPIDI_RMA_Ops_list_t *ops_list = MPIDI_CH3I_RMA_Get_ops_list(win_ptr, target_rank);
312
        MPIDI_RMA_Op_t *new_ptr = NULL;
313

Pavan Balaji's avatar
Pavan Balaji committed
314
315
        MPIDI_CH3_Pkt_fop_t *fop_pkt = NULL;

316
        /* Append this operation to the RMA ops queue */
317
        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_alloc);
318
        mpi_errno = MPIDI_CH3I_RMA_Ops_alloc_tail(ops_list, &new_ptr);
319
        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_alloc);
320
        if (mpi_errno) { MPIU_ERR_POP(mpi_errno); }
321

322
        MPIR_T_PVAR_TIMER_START(RMA, rma_rmaqueue_set);
Pavan Balaji's avatar
Pavan Balaji committed
323
324
325
326
327
328
329
330
331
        fop_pkt = &(new_ptr->pkt.fop);
        MPIDI_Pkt_init(fop_pkt, MPIDI_CH3_PKT_FOP);
        fop_pkt->addr = (char *) win_ptr->base_addrs[target_rank] +
            win_ptr->disp_units[target_rank] * target_disp;
        fop_pkt->datatype = datatype;
        fop_pkt->op = op;
        fop_pkt->source_win_handle = win_ptr->handle;
        fop_pkt->target_win_handle = win_ptr->all_win_handles[target_rank];

332
333
334
335
336
        new_ptr->origin_addr = (void *) origin_addr;
        new_ptr->origin_count = 1;
        new_ptr->origin_datatype = datatype;
        new_ptr->result_addr = result_addr;
        new_ptr->result_datatype = datatype;
Pavan Balaji's avatar
Pavan Balaji committed
337
        new_ptr->target_rank = target_rank;
338
        MPIR_T_PVAR_TIMER_END(RMA, rma_rmaqueue_set);
339
340
341
    }

fn_exit:
342
    MPIDI_RMA_FUNC_EXIT(MPID_STATE_MPIDI_FETCH_AND_OP);
343
344
345
346
347
348
    return mpi_errno;
    /* --BEGIN ERROR HANDLING-- */
fn_fail:
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}